linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
  18
  19#include <linux/syscalls.h>
  20#include <linux/string.h>
  21#include <linux/mm.h>
  22#include <linux/fs.h>
  23#include <linux/fsnotify.h>
  24#include <linux/slab.h>
  25#include <linux/init.h>
  26#include <linux/hash.h>
  27#include <linux/cache.h>
  28#include <linux/export.h>
  29#include <linux/mount.h>
  30#include <linux/file.h>
  31#include <asm/uaccess.h>
  32#include <linux/security.h>
  33#include <linux/seqlock.h>
  34#include <linux/swap.h>
  35#include <linux/bootmem.h>
  36#include <linux/fs_struct.h>
  37#include <linux/hardirq.h>
  38#include <linux/bit_spinlock.h>
  39#include <linux/rculist_bl.h>
  40#include <linux/prefetch.h>
  41#include <linux/ratelimit.h>
  42#include "internal.h"
  43#include "mount.h"
  44
  45/*
  46 * Usage:
  47 * dcache->d_inode->i_lock protects:
  48 *   - i_dentry, d_alias, d_inode of aliases
  49 * dcache_hash_bucket lock protects:
  50 *   - the dcache hash table
  51 * s_anon bl list spinlock protects:
  52 *   - the s_anon list (see __d_drop)
  53 * dcache_lru_lock protects:
  54 *   - the dcache lru lists and counters
  55 * d_lock protects:
  56 *   - d_flags
  57 *   - d_name
  58 *   - d_lru
  59 *   - d_count
  60 *   - d_unhashed()
  61 *   - d_parent and d_subdirs
  62 *   - childrens' d_child and d_parent
  63 *   - d_alias, d_inode
  64 *
  65 * Ordering:
  66 * dentry->d_inode->i_lock
  67 *   dentry->d_lock
  68 *     dcache_lru_lock
  69 *     dcache_hash_bucket lock
  70 *     s_anon lock
  71 *
  72 * If there is an ancestor relationship:
  73 * dentry->d_parent->...->d_parent->d_lock
  74 *   ...
  75 *     dentry->d_parent->d_lock
  76 *       dentry->d_lock
  77 *
  78 * If no ancestor relationship:
  79 * if (dentry1 < dentry2)
  80 *   dentry1->d_lock
  81 *     dentry2->d_lock
  82 */
  83int sysctl_vfs_cache_pressure __read_mostly = 100;
  84EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  85
  86static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  87__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  88
  89EXPORT_SYMBOL(rename_lock);
  90
  91static struct kmem_cache *dentry_cache __read_mostly;
  92
  93/*
  94 * This is the single most critical data structure when it comes
  95 * to the dcache: the hashtable for lookups. Somebody should try
  96 * to make this good - I've just made it work.
  97 *
  98 * This hash-function tries to avoid losing too many bits of hash
  99 * information, yet avoid using a prime hash-size or similar.
 100 */
 101#define D_HASHBITS     d_hash_shift
 102#define D_HASHMASK     d_hash_mask
 103
 104static unsigned int d_hash_mask __read_mostly;
 105static unsigned int d_hash_shift __read_mostly;
 106
 107static struct hlist_bl_head *dentry_hashtable __read_mostly;
 108
 109static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 110                                        unsigned int hash)
 111{
 112        hash += (unsigned long) parent / L1_CACHE_BYTES;
 113        hash = hash + (hash >> D_HASHBITS);
 114        return dentry_hashtable + (hash & D_HASHMASK);
 115}
 116
 117/* Statistics gathering. */
 118struct dentry_stat_t dentry_stat = {
 119        .age_limit = 45,
 120};
 121
 122/*
 123 * dcache_negative_dentry_limit_sysctl:
 124 * This is sysctl parameter "negative-dentry-limit" which specifies a
 125 * limit for the number of negative dentries allowed in a system as a
 126 * multiple of one-thousandth of the total system memory. The default
 127 * is 0 which means there is no limit and the valid range is 0-100.
 128 * So up to 10% of the total system memory can be used.
 129 *
 130 * negative_dentry_limit:
 131 * The actual number of negative dentries allowed which is computed after
 132 * the user changes dcache_negative_dentry_limit_sysctl.
 133 */
 134static long negative_dentry_limit;
 135int dcache_negative_dentry_limit_sysctl;
 136EXPORT_SYMBOL_GPL(dcache_negative_dentry_limit_sysctl);
 137
 138/*
 139 * There will be a periodic check to see if the negative dentry limit
 140 * is exceeded. If so, the excess negative dentries will be removed.
 141 */
 142#define NEGATIVE_DENTRY_CHECK_PERIOD    (15 * HZ)       /* Check every 15s */
 143static void prune_negative_dentry(struct work_struct *work);
 144static DECLARE_DELAYED_WORK(prune_negative_dentry_work, prune_negative_dentry);
 145
 146static DEFINE_PER_CPU(long, nr_dentry);
 147static DEFINE_PER_CPU(long, nr_dentry_unused);
 148static DEFINE_PER_CPU(long, nr_dentry_negative);
 149
 150#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 151
 152/*
 153 * Here we resort to our own counters instead of using generic per-cpu counters
 154 * for consistency with what the vfs inode code does. We are expected to harvest
 155 * better code and performance by having our own specialized counters.
 156 *
 157 * Please note that the loop is done over all possible CPUs, not over all online
 158 * CPUs. The reason for this is that we don't want to play games with CPUs going
 159 * on and off. If one of them goes off, we will just keep their counters.
 160 *
 161 * glommer: See cffbc8a for details, and if you ever intend to change this,
 162 * please update all vfs counters to match.
 163 */
 164static long get_nr_dentry(void)
 165{
 166        int i;
 167        long sum = 0;
 168        for_each_possible_cpu(i)
 169                sum += per_cpu(nr_dentry, i);
 170        return sum < 0 ? 0 : sum;
 171}
 172
 173static long get_nr_dentry_unused(void)
 174{
 175        int i;
 176        long sum = 0;
 177        for_each_possible_cpu(i)
 178                sum += per_cpu(nr_dentry_unused, i);
 179        return sum < 0 ? 0 : sum;
 180}
 181
 182static long get_nr_dentry_negative(void)
 183{
 184        int i;
 185        long sum = 0;
 186
 187        for_each_possible_cpu(i)
 188                sum += per_cpu(nr_dentry_negative, i);
 189        return sum < 0 ? 0 : sum;
 190}
 191
 192int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 193                   size_t *lenp, loff_t *ppos)
 194{
 195        dentry_stat.nr_dentry = get_nr_dentry();
 196        dentry_stat.nr_unused = get_nr_dentry_unused();
 197        dentry_stat.nr_negative = get_nr_dentry_negative();
 198        return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 199}
 200#endif
 201
 202/*
 203 * Sysctl proc handler for dcache_negativ3_dentry_limit_sysctl.
 204 */
 205int proc_dcache_negative_dentry_limit(struct ctl_table *ctl, int write,
 206                                      void __user *buffer, size_t *lenp,
 207                                      loff_t *ppos)
 208{
 209        /* Rough estimate of # of dentries allocated per page */
 210        const unsigned int nr_dentry_page = PAGE_SIZE/sizeof(struct dentry);
 211        int old = dcache_negative_dentry_limit_sysctl;
 212        int ret;
 213
 214        ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
 215
 216        if (!write || ret || (dcache_negative_dentry_limit_sysctl == old))
 217                return ret;
 218
 219        negative_dentry_limit = totalram_pages * nr_dentry_page *
 220                                dcache_negative_dentry_limit_sysctl / 1000;
 221
 222        /*
 223         * The periodic dentry pruner only runs when the limit is non-zero.
 224         * The sysctl handler is the only trigger mechanism that can be
 225         * used to start/stop the prune work reliably, so we do that here
 226         * after calculating the new limit.
 227         */
 228        if (dcache_negative_dentry_limit_sysctl && !old)
 229                schedule_delayed_work(&prune_negative_dentry_work, 0);
 230
 231        if (!dcache_negative_dentry_limit_sysctl && old)
 232                cancel_delayed_work_sync(&prune_negative_dentry_work);
 233
 234        pr_info("Negative dentry limits = %ld\n", negative_dentry_limit);
 235        return 0;
 236}
 237EXPORT_SYMBOL_GPL(proc_dcache_negative_dentry_limit);
 238
 239/*
 240 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 241 * The strings are both count bytes long, and count is non-zero.
 242 */
 243#ifdef CONFIG_DCACHE_WORD_ACCESS
 244
 245#include <asm/word-at-a-time.h>
 246/*
 247 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 248 * aligned allocation for this particular component. We don't
 249 * strictly need the load_unaligned_zeropad() safety, but it
 250 * doesn't hurt either.
 251 *
 252 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 253 * need the careful unaligned handling.
 254 */
 255static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 256{
 257        unsigned long a,b,mask;
 258
 259        for (;;) {
 260                a = *(unsigned long *)cs;
 261                b = load_unaligned_zeropad(ct);
 262                if (tcount < sizeof(unsigned long))
 263                        break;
 264                if (unlikely(a != b))
 265                        return 1;
 266                cs += sizeof(unsigned long);
 267                ct += sizeof(unsigned long);
 268                tcount -= sizeof(unsigned long);
 269                if (!tcount)
 270                        return 0;
 271        }
 272        mask = bytemask_from_count(tcount);
 273        return unlikely(!!((a ^ b) & mask));
 274}
 275
 276#else
 277
 278static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 279{
 280        do {
 281                if (*cs != *ct)
 282                        return 1;
 283                cs++;
 284                ct++;
 285                tcount--;
 286        } while (tcount);
 287        return 0;
 288}
 289
 290#endif
 291
 292static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 293{
 294        const unsigned char *cs;
 295        /*
 296         * Be careful about RCU walk racing with rename:
 297         * use ACCESS_ONCE to fetch the name pointer.
 298         *
 299         * NOTE! Even if a rename will mean that the length
 300         * was not loaded atomically, we don't care. The
 301         * RCU walk will check the sequence count eventually,
 302         * and catch it. And we won't overrun the buffer,
 303         * because we're reading the name pointer atomically,
 304         * and a dentry name is guaranteed to be properly
 305         * terminated with a NUL byte.
 306         *
 307         * End result: even if 'len' is wrong, we'll exit
 308         * early because the data cannot match (there can
 309         * be no NUL in the ct/tcount data)
 310         */
 311        cs = ACCESS_ONCE(dentry->d_name.name);
 312        smp_read_barrier_depends();
 313        return dentry_string_cmp(cs, ct, tcount);
 314}
 315
 316void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
 317{
 318        size_t size = 0;
 319        char *buf = NULL;
 320
 321        if (unlikely(dname_external(dentry))) {
 322                size = READ_ONCE(dentry->d_name.len);
 323retry:
 324                /* Pre allocate buffer */
 325                name->name = buf = kmalloc(size + 1, GFP_KERNEL);
 326                if (!buf)
 327                        return;
 328        }
 329
 330        spin_lock(&dentry->d_lock);
 331        if (unlikely(dname_external(dentry))) {
 332                if (size < dentry->d_name.len) {
 333                        /* Raced with rename and need to redo the allocation */
 334                        size = dentry->d_name.len;
 335                        spin_unlock(&dentry->d_lock);
 336                        kfree(buf);
 337                        goto retry;
 338                }
 339                strcpy(buf, dentry->d_name.name);
 340                buf = NULL;
 341        } else {
 342                memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
 343                name->name = name->inline_name;
 344        }
 345        spin_unlock(&dentry->d_lock);
 346        kfree(buf);
 347}
 348EXPORT_SYMBOL(take_dentry_name_snapshot);
 349
 350void release_dentry_name_snapshot(struct name_snapshot *name)
 351{
 352        if (unlikely(name->name != name->inline_name))
 353                kfree(name->name);
 354}
 355EXPORT_SYMBOL(release_dentry_name_snapshot);
 356
 357static void __d_free(struct rcu_head *head)
 358{
 359        struct dentry *dentry = container_of(
 360                (struct hlist_node *)head, struct dentry, d_alias);
 361
 362        if (dname_external(dentry))
 363                kfree(dentry->d_name.name);
 364        kmem_cache_free(dentry_cache, dentry); 
 365}
 366
 367/*
 368 * no locks, please.
 369 */
 370static void d_free(struct dentry *dentry)
 371{
 372        struct rcu_head *p = (struct rcu_head *)&dentry->d_alias;
 373        BUG_ON((int)dentry->d_lockref.count > 0);
 374        this_cpu_dec(nr_dentry);
 375        if (dentry->d_op && dentry->d_op->d_release)
 376                dentry->d_op->d_release(dentry);
 377
 378        /* if dentry was never visible to RCU, immediate free is OK */
 379        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 380                __d_free(p);
 381        else
 382                call_rcu(p, __d_free);
 383}
 384
 385/**
 386 * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups
 387 * @dentry: the target dentry
 388 * After this call, in-progress rcu-walk path lookup will fail. This
 389 * should be called after unhashing, and after changing d_inode (if
 390 * the dentry has not already been unhashed).
 391 */
 392static inline void dentry_rcuwalk_invalidate(struct dentry *dentry)
 393{
 394        lockdep_assert_held(&dentry->d_lock);
 395        /* Go through am invalidation barrier */
 396        write_seqcount_invalidate(&dentry->d_seq);
 397}
 398
 399/*
 400 * Release the dentry's inode, using the filesystem
 401 * d_iput() operation if defined. Dentry has no refcount
 402 * and is unhashed.
 403 */
 404static void dentry_iput(struct dentry * dentry)
 405        __releases(dentry->d_lock)
 406        __releases(dentry->d_inode->i_lock)
 407{
 408        struct inode *inode = dentry->d_inode;
 409        if (inode) {
 410                dentry->d_inode = NULL;
 411                hlist_del_init(&dentry->d_alias);
 412                spin_unlock(&dentry->d_lock);
 413                spin_unlock(&inode->i_lock);
 414                if (!inode->i_nlink)
 415                        fsnotify_inoderemove(inode);
 416                if (dentry->d_op && dentry->d_op->d_iput)
 417                        dentry->d_op->d_iput(dentry, inode);
 418                else
 419                        iput(inode);
 420        } else {
 421                spin_unlock(&dentry->d_lock);
 422        }
 423}
 424
 425/*
 426 * Release the dentry's inode, using the filesystem
 427 * d_iput() operation if defined. dentry remains in-use.
 428 */
 429static void dentry_unlink_inode(struct dentry * dentry)
 430        __releases(dentry->d_lock)
 431        __releases(dentry->d_inode->i_lock)
 432{
 433        struct inode *inode = dentry->d_inode;
 434        __d_clear_type(dentry);
 435        dentry->d_inode = NULL;
 436        if (dentry->d_flags & DCACHE_LRU_LIST)
 437                this_cpu_inc(nr_dentry_negative);
 438        hlist_del_init(&dentry->d_alias);
 439        dentry_rcuwalk_invalidate(dentry);
 440        spin_unlock(&dentry->d_lock);
 441        spin_unlock(&inode->i_lock);
 442        if (!inode->i_nlink)
 443                fsnotify_inoderemove(inode);
 444        if (dentry->d_op && dentry->d_op->d_iput)
 445                dentry->d_op->d_iput(dentry, inode);
 446        else
 447                iput(inode);
 448}
 449
 450/*
 451 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 452 */
 453static void dentry_lru_add(struct dentry *dentry)
 454{
 455        if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
 456                bool negative = d_is_negative(dentry);
 457
 458                spin_lock(&dcache_lru_lock);
 459                dentry->d_flags |= DCACHE_LRU_LIST;
 460
 461                /*
 462                 * Negative dentries are added to the tail so that they
 463                 * will be pruned first. For those negative dentries that
 464                 * are referenced more than once, they will be put back
 465                 * to the head in the pruning process.
 466                 */
 467                if (negative)
 468                        list_add_tail(&dentry->d_lru,
 469                                      &dentry->d_sb->s_dentry_lru);
 470                else
 471                        list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 472                dentry->d_sb->s_nr_dentry_unused++;
 473                spin_unlock(&dcache_lru_lock);
 474                this_cpu_inc(nr_dentry_unused);
 475                if (d_is_negative(dentry))
 476                        this_cpu_inc(nr_dentry_negative);
 477        } else {
 478                dentry->d_flags |= DCACHE_REFERENCED;
 479        }
 480}
 481
 482static void __dentry_lru_del(struct dentry *dentry)
 483{
 484        list_del_init(&dentry->d_lru);
 485        dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
 486        dentry->d_sb->s_nr_dentry_unused--;
 487}
 488
 489/*
 490 * Remove a dentry with references from the LRU.
 491 */
 492static void dentry_lru_del(struct dentry *dentry)
 493{
 494        if (!list_empty(&dentry->d_lru)) {
 495                spin_lock(&dcache_lru_lock);
 496                __dentry_lru_del(dentry);
 497                spin_unlock(&dcache_lru_lock);
 498                this_cpu_dec(nr_dentry_unused);
 499                if (d_is_negative(dentry))
 500                        this_cpu_dec(nr_dentry_negative);
 501        }
 502}
 503
 504static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 505{
 506        spin_lock(&dcache_lru_lock);
 507        if (list_empty(&dentry->d_lru)) {
 508                dentry->d_flags |= DCACHE_LRU_LIST;
 509                list_add_tail(&dentry->d_lru, list);
 510                dentry->d_sb->s_nr_dentry_unused++;
 511                this_cpu_inc(nr_dentry_unused);
 512        } else {
 513                list_move_tail(&dentry->d_lru, list);
 514        }
 515        spin_unlock(&dcache_lru_lock);
 516}
 517
 518/*
 519 * Unhash a dentry without inserting an RCU walk barrier or checking that
 520 * dentry->d_lock is locked.  The caller must take care of that, if
 521 * appropriate.
 522 */
 523static void __d_shrink(struct dentry *dentry)
 524{
 525        if (!d_unhashed(dentry)) {
 526                struct hlist_bl_head *b;
 527                /*
 528                 * Hashed dentries are normally on the dentry hashtable,
 529                 * with the exception of those newly allocated by
 530                 * d_obtain_alias, which are always IS_ROOT:
 531                 */
 532                if (unlikely(IS_ROOT(dentry)))
 533                        b = &dentry->d_sb->s_anon;
 534                else
 535                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 536
 537                hlist_bl_lock(b);
 538                __hlist_bl_del(&dentry->d_hash);
 539                dentry->d_hash.pprev = NULL;
 540                hlist_bl_unlock(b);
 541        }
 542}
 543
 544/**
 545 * d_drop - drop a dentry
 546 * @dentry: dentry to drop
 547 *
 548 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 549 * be found through a VFS lookup any more. Note that this is different from
 550 * deleting the dentry - d_delete will try to mark the dentry negative if
 551 * possible, giving a successful _negative_ lookup, while d_drop will
 552 * just make the cache lookup fail.
 553 *
 554 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 555 * reason (NFS timeouts or autofs deletes).
 556 *
 557 * __d_drop requires dentry->d_lock.
 558 */
 559void __d_drop(struct dentry *dentry)
 560{
 561        if (!d_unhashed(dentry)) {
 562                __d_shrink(dentry);
 563                dentry_rcuwalk_invalidate(dentry);
 564        }
 565}
 566EXPORT_SYMBOL(__d_drop);
 567
 568static void __d_hash_move(struct dentry *dentry, struct hlist_bl_head *new)
 569{
 570        if (!d_unhashed(dentry)) {
 571                struct hlist_bl_head *b;
 572
 573                if (unlikely(IS_ROOT(dentry)))
 574                        b = &dentry->d_sb->s_anon;
 575                else
 576                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 577
 578                hlist_bl_lock(b);
 579                __hlist_bl_del(&dentry->d_hash);
 580                hlist_bl_unlock(b);
 581                dentry_rcuwalk_invalidate(dentry);
 582        }
 583        hlist_bl_lock(new);
 584        hlist_bl_add_head_rcu(&dentry->d_hash, new);
 585        hlist_bl_unlock(new);
 586}
 587
 588
 589void d_drop(struct dentry *dentry)
 590{
 591        spin_lock(&dentry->d_lock);
 592        __d_drop(dentry);
 593        spin_unlock(&dentry->d_lock);
 594}
 595EXPORT_SYMBOL(d_drop);
 596
 597static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
 598{
 599        struct dentry *next;
 600        /*
 601         * Inform d_walk() and shrink_dentry_list() that we are no longer
 602         * attached to the dentry tree
 603         */
 604        dentry->d_flags |= DCACHE_DENTRY_KILLED;
 605        if (unlikely(list_empty(&dentry->d_u.d_child)))
 606                return;
 607        __list_del_entry(&dentry->d_u.d_child);
 608        /*
 609         * Cursors can move around the list of children.  While we'd been
 610         * a normal list member, it didn't matter - ->d_child.next would've
 611         * been updated.  However, from now on it won't be and for the
 612         * things like d_walk() it might end up with a nasty surprise.
 613         * Normally d_walk() doesn't care about cursors moving around -
 614         * ->d_lock on parent prevents that and since a cursor has no children
 615         * of its own, we get through it without ever unlocking the parent.
 616         * There is one exception, though - if we ascend from a child that
 617         * gets killed as soon as we unlock it, the next sibling is found
 618         * using the value left in its ->d_child.next.  And if _that_
 619         * pointed to a cursor, and cursor got moved (e.g. by lseek())
 620         * before d_walk() regains parent->d_lock, we'll end up skipping
 621         * everything the cursor had been moved past.
 622         *
 623         * Solution: make sure that the pointer left behind in ->d_child.next
 624         * points to something that won't be moving around.  I.e. skip the
 625         * cursors.
 626         */
 627        while (dentry->d_u.d_child.next != &parent->d_subdirs) {
 628                next = list_entry(dentry->d_u.d_child.next, struct dentry, d_u.d_child);
 629                if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
 630                        break;
 631                dentry->d_u.d_child.next = next->d_u.d_child.next;
 632        }
 633}
 634
 635static void __dentry_kill(struct dentry *dentry)
 636{
 637        struct dentry *parent;
 638        if (IS_ROOT(dentry))
 639                parent = NULL;
 640        else
 641                parent = dentry->d_parent;
 642
 643        /*
 644         * The dentry is now unrecoverably dead to the world.
 645         */
 646        lockref_mark_dead(&dentry->d_lockref);
 647
 648        /*
 649         * inform the fs via d_prune that this dentry is about to be
 650         * unhashed and destroyed.
 651         */
 652        if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry))
 653                dentry->d_op->d_prune(dentry);
 654
 655        dentry_lru_del(dentry);
 656        /* if it was on the hash then remove it */
 657        __d_drop(dentry);
 658        dentry_unlist(dentry, parent);
 659        if (parent)
 660                spin_unlock(&parent->d_lock);
 661        dentry_iput(dentry);
 662        /*
 663         * dentry_iput drops the locks, at which point nobody (except
 664         * transient RCU lookups) can reach this dentry.
 665         */
 666        d_free(dentry);
 667}
 668
 669/*
 670 * Finish off a dentry we've decided to kill.
 671 * dentry->d_lock must be held, returns with it unlocked.
 672 * If ref is non-zero, then decrement the refcount too.
 673 * Returns dentry requiring refcount drop, or NULL if we're done.
 674 */
 675static inline struct dentry *dentry_kill(struct dentry *dentry)
 676        __releases(dentry->d_lock)
 677{
 678        struct inode *inode = dentry->d_inode;
 679        struct dentry *parent = NULL;
 680
 681        if (inode && unlikely(!spin_trylock(&inode->i_lock)))
 682                goto failed;
 683
 684        if (!IS_ROOT(dentry)) {
 685                parent = dentry->d_parent;
 686                if (unlikely(!spin_trylock(&parent->d_lock))) {
 687                        if (inode)
 688                                spin_unlock(&inode->i_lock);
 689                        goto failed;
 690                }
 691        }
 692
 693        __dentry_kill(dentry);
 694        return parent;
 695
 696failed:
 697        spin_unlock(&dentry->d_lock);
 698        return dentry; /* try again with same dentry */
 699}
 700
 701static inline struct dentry *lock_parent(struct dentry *dentry)
 702{
 703        struct dentry *parent = dentry->d_parent;
 704        if (IS_ROOT(dentry))
 705                return NULL;
 706        if (likely(spin_trylock(&parent->d_lock)))
 707                return parent;
 708        spin_unlock(&dentry->d_lock);
 709        rcu_read_lock();
 710again:
 711        parent = ACCESS_ONCE(dentry->d_parent);
 712        spin_lock(&parent->d_lock);
 713        /*
 714         * We can't blindly lock dentry until we are sure
 715         * that we won't violate the locking order.
 716         * Any changes of dentry->d_parent must have
 717         * been done with parent->d_lock held, so
 718         * spin_lock() above is enough of a barrier
 719         * for checking if it's still our child.
 720         */
 721        if (unlikely(parent != dentry->d_parent)) {
 722                spin_unlock(&parent->d_lock);
 723                goto again;
 724        }
 725        rcu_read_unlock();
 726        if (parent != dentry)
 727                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 728        else
 729                parent = NULL;
 730        return parent;
 731}
 732
 733/* 
 734 * This is dput
 735 *
 736 * This is complicated by the fact that we do not want to put
 737 * dentries that are no longer on any hash chain on the unused
 738 * list: we'd much rather just get rid of them immediately.
 739 *
 740 * However, that implies that we have to traverse the dentry
 741 * tree upwards to the parents which might _also_ now be
 742 * scheduled for deletion (it may have been only waiting for
 743 * its last child to go away).
 744 *
 745 * This tail recursion is done by hand as we don't want to depend
 746 * on the compiler to always get this right (gcc generally doesn't).
 747 * Real recursion would eat up our stack space.
 748 */
 749
 750/*
 751 * dput - release a dentry
 752 * @dentry: dentry to release 
 753 *
 754 * Release a dentry. This will drop the usage count and if appropriate
 755 * call the dentry unlink method as well as removing it from the queues and
 756 * releasing its resources. If the parent dentries were scheduled for release
 757 * they too may now get deleted.
 758 */
 759void dput(struct dentry *dentry)
 760{
 761        if (unlikely(!dentry))
 762                return;
 763
 764repeat:
 765        might_sleep();
 766
 767        if (lockref_put_or_lock(&dentry->d_lockref))
 768                return;
 769
 770        /* Unreachable? Get rid of it */
 771        if (unlikely(d_unhashed(dentry)))
 772                goto kill_it;
 773
 774        if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 775                goto kill_it;
 776
 777        if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
 778                if (dentry->d_op->d_delete(dentry))
 779                        goto kill_it;
 780        }
 781
 782        dentry_lru_add(dentry);
 783
 784        dentry->d_lockref.count--;
 785        spin_unlock(&dentry->d_lock);
 786        return;
 787
 788kill_it:
 789        dentry = dentry_kill(dentry);
 790        if (dentry) {
 791                cond_resched();
 792                goto repeat;
 793        }
 794}
 795EXPORT_SYMBOL(dput);
 796
 797
 798/* This must be called with d_lock held */
 799static inline void __dget_dlock(struct dentry *dentry)
 800{
 801        dentry->d_lockref.count++;
 802}
 803
 804static inline void __dget(struct dentry *dentry)
 805{
 806        lockref_get(&dentry->d_lockref);
 807}
 808
 809struct dentry *dget_parent(struct dentry *dentry)
 810{
 811        int gotref;
 812        struct dentry *ret;
 813
 814        /*
 815         * Do optimistic parent lookup without any
 816         * locking.
 817         */
 818        rcu_read_lock();
 819        ret = ACCESS_ONCE(dentry->d_parent);
 820        gotref = lockref_get_not_zero(&ret->d_lockref);
 821        rcu_read_unlock();
 822        if (likely(gotref)) {
 823                if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
 824                        return ret;
 825                dput(ret);
 826        }
 827
 828repeat:
 829        /*
 830         * Don't need rcu_dereference because we re-check it was correct under
 831         * the lock.
 832         */
 833        rcu_read_lock();
 834        ret = dentry->d_parent;
 835        spin_lock(&ret->d_lock);
 836        if (unlikely(ret != dentry->d_parent)) {
 837                spin_unlock(&ret->d_lock);
 838                rcu_read_unlock();
 839                goto repeat;
 840        }
 841        rcu_read_unlock();
 842        BUG_ON(!ret->d_lockref.count);
 843        ret->d_lockref.count++;
 844        spin_unlock(&ret->d_lock);
 845        return ret;
 846}
 847EXPORT_SYMBOL(dget_parent);
 848
 849/**
 850 * d_find_alias - grab a hashed alias of inode
 851 * @inode: inode in question
 852 * @want_discon:  flag, used by d_splice_alias, to request
 853 *          that only a DISCONNECTED alias be returned.
 854 *
 855 * If inode has a hashed alias, or is a directory and has any alias,
 856 * acquire the reference to alias and return it. Otherwise return NULL.
 857 * Notice that if inode is a directory there can be only one alias and
 858 * it can be unhashed only if it has no children, or if it is the root
 859 * of a filesystem, or if the directory was renamed and d_revalidate
 860 * was the first vfs operation to notice.
 861 *
 862 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 863 * any other hashed alias over that one unless @want_discon is set,
 864 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 865 */
 866static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 867{
 868        struct dentry *alias, *discon_alias;
 869
 870again:
 871        discon_alias = NULL;
 872        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 873                spin_lock(&alias->d_lock);
 874                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 875                        if (IS_ROOT(alias) &&
 876                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 877                                discon_alias = alias;
 878                        } else if (!want_discon) {
 879                                __dget_dlock(alias);
 880                                spin_unlock(&alias->d_lock);
 881                                return alias;
 882                        }
 883                }
 884                spin_unlock(&alias->d_lock);
 885        }
 886        if (discon_alias) {
 887                alias = discon_alias;
 888                spin_lock(&alias->d_lock);
 889                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 890                        if (IS_ROOT(alias) &&
 891                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 892                                __dget_dlock(alias);
 893                                spin_unlock(&alias->d_lock);
 894                                return alias;
 895                        }
 896                }
 897                spin_unlock(&alias->d_lock);
 898                goto again;
 899        }
 900        return NULL;
 901}
 902
 903struct dentry *d_find_alias(struct inode *inode)
 904{
 905        struct dentry *de = NULL;
 906
 907        if (!hlist_empty(&inode->i_dentry)) {
 908                spin_lock(&inode->i_lock);
 909                de = __d_find_alias(inode, 0);
 910                spin_unlock(&inode->i_lock);
 911        }
 912        return de;
 913}
 914EXPORT_SYMBOL(d_find_alias);
 915
 916/*
 917 *      Try to kill dentries associated with this inode.
 918 * WARNING: you must own a reference to inode.
 919 */
 920void d_prune_aliases(struct inode *inode)
 921{
 922        struct dentry *dentry;
 923restart:
 924        spin_lock(&inode->i_lock);
 925        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 926                spin_lock(&dentry->d_lock);
 927                if (!dentry->d_lockref.count) {
 928                        /*
 929                         * inform the fs via d_prune that this dentry
 930                         * is about to be unhashed and destroyed.
 931                         */
 932                        if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
 933                            !d_unhashed(dentry))
 934                                dentry->d_op->d_prune(dentry);
 935
 936                        __dget_dlock(dentry);
 937                        __d_drop(dentry);
 938                        spin_unlock(&dentry->d_lock);
 939                        spin_unlock(&inode->i_lock);
 940                        dput(dentry);
 941                        goto restart;
 942                }
 943                spin_unlock(&dentry->d_lock);
 944        }
 945        spin_unlock(&inode->i_lock);
 946}
 947EXPORT_SYMBOL(d_prune_aliases);
 948
 949
 950static void shrink_dentry_list(struct list_head *list)
 951{
 952        struct dentry *dentry, *parent;
 953
 954        for (;;) {
 955                struct inode *inode;
 956
 957                cond_resched();
 958                rcu_read_lock();
 959
 960                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 961                if (&dentry->d_lru == list) {
 962                        rcu_read_unlock();
 963                        break; /* empty */
 964                }
 965                spin_lock(&dentry->d_lock);
 966                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 967                        spin_unlock(&dentry->d_lock);
 968                        rcu_read_unlock();
 969                        continue;
 970                }
 971
 972                parent = lock_parent(dentry);
 973
 974                /*
 975                 * We found an inuse dentry which was not removed from
 976                 * the LRU because of laziness during lookup.  Do not free
 977                 * it - just keep it off the LRU list.
 978                 */
 979                if (dentry->d_lockref.count) {
 980                        dentry_lru_del(dentry);
 981                        spin_unlock(&dentry->d_lock);
 982                        if (parent)
 983                                spin_unlock(&parent->d_lock);
 984                        rcu_read_unlock();
 985                        continue;
 986                }
 987
 988                rcu_read_unlock();
 989
 990                inode = dentry->d_inode;
 991                if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
 992                        spin_unlock(&dentry->d_lock);
 993                        if (parent)
 994                                spin_unlock(&parent->d_lock);
 995                        cpu_relax();
 996                        continue;
 997                }
 998
 999                __dentry_kill(dentry);
1000
1001                /*
1002                 * We need to prune ancestors too. This is necessary to prevent
1003                 * quadratic behavior of shrink_dcache_parent(), but is also
1004                 * expected to be beneficial in reducing dentry cache
1005                 * fragmentation.
1006                 */
1007                dentry = parent;
1008                while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
1009                        parent = lock_parent(dentry);
1010                        if (dentry->d_lockref.count != 1) {
1011                                dentry->d_lockref.count--;
1012                                spin_unlock(&dentry->d_lock);
1013                                if (parent)
1014                                        spin_unlock(&parent->d_lock);
1015                                break;
1016                        }
1017                        inode = dentry->d_inode;        /* can't be NULL */
1018                        if (unlikely(!spin_trylock(&inode->i_lock))) {
1019                                spin_unlock(&dentry->d_lock);
1020                                if (parent)
1021                                        spin_unlock(&parent->d_lock);
1022                                cpu_relax();
1023                                continue;
1024                        }
1025                        __dentry_kill(dentry);
1026                        dentry = parent;
1027                }
1028        }
1029}
1030
1031/**
1032 * prune_dcache_sb - shrink the dcache
1033 * @sb: superblock
1034 * @count: number of entries to try to free
1035 * @negative_only: prune only negative dentries if true
1036 * Return: # of dentries reclaimed.
1037 *
1038 * Attempt to shrink the superblock dcache LRU by @count entries. This is
1039 * done when we need more memory an called from the superblock shrinker
1040 * function.
1041 *
1042 * This function may fail to free any resources if all the dentries are in
1043 * use.
1044 *
1045 * For negative dentry pruning, the positive dentries are skipped and
1046 * temporarily put into a positive dentry list. This list will be put
1047 * back to the main LRU list at the end of pruning process.
1048 */
1049static long __prune_dcache_sb(struct super_block *sb, long count,
1050                              bool negative_only)
1051{
1052        struct dentry *dentry;
1053        long orig_cnt = count;
1054        LIST_HEAD(referenced);
1055        LIST_HEAD(positive);
1056        LIST_HEAD(tmp);
1057
1058relock:
1059        spin_lock(&dcache_lru_lock);
1060        while (!list_empty(&sb->s_dentry_lru)) {
1061                dentry = list_entry(sb->s_dentry_lru.prev,
1062                                struct dentry, d_lru);
1063                BUG_ON(dentry->d_sb != sb);
1064
1065                if (!spin_trylock(&dentry->d_lock)) {
1066                        spin_unlock(&dcache_lru_lock);
1067                        cpu_relax();
1068                        goto relock;
1069                }
1070
1071                if (dentry->d_flags & DCACHE_REFERENCED) {
1072                        dentry->d_flags &= ~DCACHE_REFERENCED;
1073                        list_move(&dentry->d_lru, &referenced);
1074                        spin_unlock(&dentry->d_lock);
1075                } else if (negative_only && !d_is_negative(dentry)) {
1076                        list_move(&dentry->d_lru, &positive);
1077                        spin_unlock(&dentry->d_lock);
1078                } else {
1079                        list_move_tail(&dentry->d_lru, &tmp);
1080                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1081                        spin_unlock(&dentry->d_lock);
1082                        if (!--count)
1083                                break;
1084                }
1085                cond_resched_lock(&dcache_lru_lock);
1086        }
1087        if (!list_empty(&referenced))
1088                list_splice(&referenced, &sb->s_dentry_lru);
1089        if (!list_empty(&positive))
1090                list_splice_tail(&positive, &sb->s_dentry_lru);
1091        spin_unlock(&dcache_lru_lock);
1092
1093        shrink_dentry_list(&tmp);
1094        return orig_cnt - count;
1095}
1096
1097void prune_dcache_sb(struct super_block *sb, int count)
1098{
1099        __prune_dcache_sb(sb, count, false);
1100}
1101
1102/**
1103 * shrink_dcache_sb - shrink dcache for a superblock
1104 * @sb: superblock
1105 *
1106 * Shrink the dcache for the specified super block. This is used to free
1107 * the dcache before unmounting a file system.
1108 */
1109void shrink_dcache_sb(struct super_block *sb)
1110{
1111        LIST_HEAD(tmp);
1112
1113        spin_lock(&dcache_lru_lock);
1114        while (!list_empty(&sb->s_dentry_lru)) {
1115                list_splice_init(&sb->s_dentry_lru, &tmp);
1116                spin_unlock(&dcache_lru_lock);
1117                shrink_dentry_list(&tmp);
1118                spin_lock(&dcache_lru_lock);
1119        }
1120        spin_unlock(&dcache_lru_lock);
1121}
1122EXPORT_SYMBOL(shrink_dcache_sb);
1123
1124struct prune_negative_ctrl
1125{
1126        unsigned long   prune_count;
1127        int             prune_percent; /* Each unit = 0.01% */
1128};
1129
1130/*
1131 * Prune dentries from a super block.
1132 */
1133static void prune_negative_one_sb(struct super_block *sb, void *arg)
1134{
1135        unsigned long count = sb->s_nr_dentry_unused;
1136        struct prune_negative_ctrl *ctrl = arg;
1137        long scan = (count * ctrl->prune_percent) / 10000;
1138
1139        if (scan)
1140                ctrl->prune_count += __prune_dcache_sb(sb, scan, true);
1141}
1142
1143/*
1144 * A workqueue function to prune negative dentry.
1145 */
1146static void prune_negative_dentry(struct work_struct *work)
1147{
1148        long count = get_nr_dentry_negative();
1149        long limit = negative_dentry_limit;
1150        struct prune_negative_ctrl ctrl;
1151        unsigned long start;
1152
1153        if (!limit || count <= limit)
1154                goto requeue_work;
1155
1156        /*
1157         * Add an extra 1% as a minimum and to increase the chance
1158         * that the after operation dentry count stays below the limit.
1159         */
1160        ctrl.prune_count = 0;
1161        ctrl.prune_percent = ((count - limit) * 10000 / count) + 100;
1162        start = jiffies;
1163
1164        /*
1165         * iterate_supers() will take a read lock on the supers blocking
1166         * concurrent umount.
1167         */
1168        iterate_supers(prune_negative_one_sb, &ctrl);
1169
1170        /*
1171         * Report negative dentry pruning stat.
1172         */
1173        pr_debug("%ld negative dentries freed in %d ms\n",
1174                 ctrl.prune_count, jiffies_to_msecs(jiffies - start));
1175
1176requeue_work:
1177        /*
1178         * The requeuing will get cancelled if there is a concurrent
1179         * cancel_delayed_work_sync() call from user sysctl operation.
1180         * That call will wait until this work finishes and cancel it.
1181         */
1182        schedule_delayed_work(&prune_negative_dentry_work,
1183                              NEGATIVE_DENTRY_CHECK_PERIOD);
1184}
1185
1186/*
1187 * destroy a single subtree of dentries for unmount
1188 * - see the comments on shrink_dcache_for_umount() for a description of the
1189 *   locking
1190 */
1191#define RESCHED_CHECK_BATCH 1024
1192static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
1193{
1194        struct dentry *parent;
1195        int batch = RESCHED_CHECK_BATCH;
1196
1197        BUG_ON(!IS_ROOT(dentry));
1198
1199        for (;;) {
1200                /* descend to the first leaf in the current subtree */
1201                while (!list_empty(&dentry->d_subdirs))
1202                        dentry = list_entry(dentry->d_subdirs.next,
1203                                            struct dentry, d_u.d_child);
1204
1205                /* consume the dentries from this leaf up through its parents
1206                 * until we find one with children or run out altogether */
1207                do {
1208                        struct inode *inode;
1209
1210                        /*
1211                         * inform the fs that this dentry is about to be
1212                         * unhashed and destroyed.
1213                         */
1214                        if ((dentry->d_flags & DCACHE_OP_PRUNE) &&
1215                            !d_unhashed(dentry))
1216                                dentry->d_op->d_prune(dentry);
1217
1218                        dentry_lru_del(dentry);
1219                        __d_shrink(dentry);
1220
1221                        if (dentry->d_lockref.count != 0) {
1222                                printk(KERN_ERR
1223                                       "BUG: Dentry %p{i=%lx,n=%s}"
1224                                       " still in use (%d)"
1225                                       " [unmount of %s %s]\n",
1226                                       dentry,
1227                                       dentry->d_inode ?
1228                                       dentry->d_inode->i_ino : 0UL,
1229                                       dentry->d_name.name,
1230                                       dentry->d_lockref.count,
1231                                       dentry->d_sb->s_type->name,
1232                                       dentry->d_sb->s_id);
1233                                BUG();
1234                        }
1235
1236                        if (IS_ROOT(dentry)) {
1237                                parent = NULL;
1238                                list_del(&dentry->d_u.d_child);
1239                        } else {
1240                                parent = dentry->d_parent;
1241                                parent->d_lockref.count--;
1242                                list_del(&dentry->d_u.d_child);
1243                        }
1244
1245                        inode = dentry->d_inode;
1246                        if (inode) {
1247                                dentry->d_inode = NULL;
1248                                hlist_del_init(&dentry->d_alias);
1249                                if (dentry->d_op && dentry->d_op->d_iput)
1250                                        dentry->d_op->d_iput(dentry, inode);
1251                                else
1252                                        iput(inode);
1253                        }
1254
1255                        d_free(dentry);
1256
1257                        /* finished when we fall off the top of the tree,
1258                         * otherwise we ascend to the parent and move to the
1259                         * next sibling if there is one */
1260                        if (!parent)
1261                                return;
1262                        dentry = parent;
1263                        if (!--batch) {
1264                                cond_resched();
1265                                batch = RESCHED_CHECK_BATCH;
1266                        }
1267                } while (list_empty(&dentry->d_subdirs));
1268
1269                dentry = list_entry(dentry->d_subdirs.next,
1270                                    struct dentry, d_u.d_child);
1271        }
1272}
1273
1274/*
1275 * destroy the dentries attached to a superblock on unmounting
1276 * - we don't need to use dentry->d_lock because:
1277 *   - the superblock is detached from all mountings and open files, so the
1278 *     dentry trees will not be rearranged by the VFS
1279 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
1280 *     any dentries belonging to this superblock that it comes across
1281 *   - the filesystem itself is no longer permitted to rearrange the dentries
1282 *     in this superblock
1283 */
1284void shrink_dcache_for_umount(struct super_block *sb)
1285{
1286        struct dentry *dentry;
1287
1288        if (down_read_trylock(&sb->s_umount))
1289                BUG();
1290
1291        dentry = sb->s_root;
1292        sb->s_root = NULL;
1293        dentry->d_lockref.count--;
1294        shrink_dcache_for_umount_subtree(dentry);
1295
1296        while (!hlist_bl_empty(&sb->s_anon)) {
1297                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1298                shrink_dcache_for_umount_subtree(dentry);
1299        }
1300}
1301
1302/**
1303 * enum d_walk_ret - action to talke during tree walk
1304 * @D_WALK_CONTINUE:    contrinue walk
1305 * @D_WALK_QUIT:        quit walk
1306 * @D_WALK_NORETRY:     quit when retry is needed
1307 * @D_WALK_SKIP:        skip this dentry and its children
1308 */
1309enum d_walk_ret {
1310        D_WALK_CONTINUE,
1311        D_WALK_QUIT,
1312        D_WALK_NORETRY,
1313        D_WALK_SKIP,
1314};
1315
1316/**
1317 * d_walk - walk the dentry tree
1318 * @parent:     start of walk
1319 * @data:       data passed to @enter() and @finish()
1320 * @enter:      callback when first entering the dentry
1321 * @finish:     callback when successfully finished the walk
1322 *
1323 * The @enter() and @finish() callbacks are called with d_lock held.
1324 */
1325static void d_walk(struct dentry *parent, void *data,
1326                   enum d_walk_ret (*enter)(void *, struct dentry *),
1327                   void (*finish)(void *))
1328{
1329        struct dentry *this_parent;
1330        struct list_head *next;
1331        unsigned seq = 0;
1332        enum d_walk_ret ret;
1333        bool retry = true;
1334
1335again:
1336        read_seqbegin_or_lock(&rename_lock, &seq);
1337        this_parent = parent;
1338        spin_lock(&this_parent->d_lock);
1339
1340        ret = enter(data, this_parent);
1341        switch (ret) {
1342        case D_WALK_CONTINUE:
1343                break;
1344        case D_WALK_QUIT:
1345        case D_WALK_SKIP:
1346                goto out_unlock;
1347        case D_WALK_NORETRY:
1348                retry = false;
1349                break;
1350        }
1351repeat:
1352        next = this_parent->d_subdirs.next;
1353resume:
1354        while (next != &this_parent->d_subdirs) {
1355                struct list_head *tmp = next;
1356                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1357                next = tmp->next;
1358
1359                if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
1360                        continue;
1361
1362                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1363
1364                ret = enter(data, dentry);
1365                switch (ret) {
1366                case D_WALK_CONTINUE:
1367                        break;
1368                case D_WALK_QUIT:
1369                        spin_unlock(&dentry->d_lock);
1370                        goto out_unlock;
1371                case D_WALK_NORETRY:
1372                        retry = false;
1373                        break;
1374                case D_WALK_SKIP:
1375                        spin_unlock(&dentry->d_lock);
1376                        continue;
1377                }
1378
1379                if (!list_empty(&dentry->d_subdirs)) {
1380                        spin_unlock(&this_parent->d_lock);
1381                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1382                        this_parent = dentry;
1383                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1384                        goto repeat;
1385                }
1386                spin_unlock(&dentry->d_lock);
1387        }
1388        /*
1389         * All done at this level ... ascend and resume the search.
1390         */
1391        rcu_read_lock();
1392ascend:
1393        if (this_parent != parent) {
1394                struct dentry *child = this_parent;
1395                this_parent = child->d_parent;
1396
1397                spin_unlock(&child->d_lock);
1398                spin_lock(&this_parent->d_lock);
1399
1400                /* might go back up the wrong parent if we have had a rename. */
1401                if (need_seqretry(&rename_lock, seq))
1402                        goto rename_retry;
1403                /* go into the first sibling still alive */
1404                do {
1405                        next = child->d_u.d_child.next;
1406                        if (next == &this_parent->d_subdirs)
1407                                goto ascend;
1408                        child = list_entry(next, struct dentry, d_u.d_child);
1409                } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
1410                rcu_read_unlock();
1411                goto resume;
1412        }
1413        if (need_seqretry(&rename_lock, seq))
1414                goto rename_retry;
1415        rcu_read_unlock();
1416        if (finish)
1417                finish(data);
1418
1419out_unlock:
1420        spin_unlock(&this_parent->d_lock);
1421        done_seqretry(&rename_lock, seq);
1422        return;
1423
1424rename_retry:
1425        spin_unlock(&this_parent->d_lock);
1426        rcu_read_unlock();
1427        BUG_ON(seq & 1);
1428        if (!retry)
1429                return;
1430        seq = 1;
1431        goto again;
1432}
1433
1434/*
1435 * Search for at least 1 mount point in the dentry's subdirs.
1436 * We descend to the next level whenever the d_subdirs
1437 * list is non-empty and continue searching.
1438 */
1439
1440/**
1441 * have_submounts - check for mounts over a dentry
1442 * @parent: dentry to check.
1443 *
1444 * Return true if the parent or its subdirectories contain
1445 * a mount point
1446 */
1447
1448static enum d_walk_ret check_mount(void *data, struct dentry *dentry)
1449{
1450        int *ret = data;
1451        if (d_mountpoint(dentry)) {
1452                *ret = 1;
1453                return D_WALK_QUIT;
1454        }
1455        return D_WALK_CONTINUE;
1456}
1457
1458int have_submounts(struct dentry *parent)
1459{
1460        int ret = 0;
1461
1462        d_walk(parent, &ret, check_mount, NULL);
1463
1464        return ret;
1465}
1466EXPORT_SYMBOL(have_submounts);
1467
1468struct check_mount {
1469        struct vfsmount *mnt;
1470        unsigned int mounted;
1471};
1472
1473static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
1474{
1475        struct check_mount *info = data;
1476        struct path path = { .mnt = info->mnt, .dentry = dentry };
1477
1478        if (likely(!d_mountpoint(dentry)))
1479                return D_WALK_CONTINUE;
1480        if (__path_is_mountpoint(&path)) {
1481                info->mounted = 1;
1482                return D_WALK_QUIT;
1483        }
1484        return D_WALK_CONTINUE;
1485}
1486
1487/**
1488 * path_has_submounts - check for mounts over a dentry in the
1489 *                      current namespace.
1490 * @parent: path to check.
1491 *
1492 * Return true if the parent or its subdirectories contain
1493 * a mount point in the current namespace.
1494 */
1495int path_has_submounts(const struct path *parent)
1496{
1497        struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
1498
1499        read_seqlock_excl(&mount_lock);
1500        d_walk(parent->dentry, &data, path_check_mount, NULL);
1501        read_sequnlock_excl(&mount_lock);
1502
1503        return data.mounted;
1504}
1505EXPORT_SYMBOL(path_has_submounts);
1506
1507/*
1508 * Called by mount code to set a mountpoint and check if the mountpoint is
1509 * reachable (e.g. NFS can unhash a directory dentry and then the complete
1510 * subtree can become unreachable).
1511 *
1512 * Only one of d_invalidate() and d_set_mounted() must succeed.  For
1513 * this reason take rename_lock and d_lock on dentry and ancestors.
1514 */
1515int d_set_mounted(struct dentry *dentry)
1516{
1517        struct dentry *p;
1518        int ret = -ENOENT;
1519        write_seqlock(&rename_lock);
1520        for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
1521                /* Need exclusion wrt. d_invalidate() */
1522                spin_lock(&p->d_lock);
1523                if (unlikely(d_unhashed(p))) {
1524                        spin_unlock(&p->d_lock);
1525                        goto out;
1526                }
1527                spin_unlock(&p->d_lock);
1528        }
1529        spin_lock(&dentry->d_lock);
1530        if (!d_unlinked(dentry)) {
1531                ret = -EBUSY;
1532                if (!d_mountpoint(dentry)) {
1533                        dentry->d_flags |= DCACHE_MOUNTED;
1534                        ret = 0;
1535                }
1536        }
1537        spin_unlock(&dentry->d_lock);
1538out:
1539        write_sequnlock(&rename_lock);
1540        return ret;
1541}
1542
1543/*
1544 * Search the dentry child list of the specified parent,
1545 * and move any unused dentries to the end of the unused
1546 * list for prune_dcache(). We descend to the next level
1547 * whenever the d_subdirs list is non-empty and continue
1548 * searching.
1549 *
1550 * It returns zero iff there are no unused children,
1551 * otherwise  it returns the number of children moved to
1552 * the end of the unused list. This may not be the total
1553 * number of unused children, because select_parent can
1554 * drop the lock and return early due to latency
1555 * constraints.
1556 */
1557
1558struct select_data {
1559        struct dentry *start;
1560        struct list_head dispose;
1561        int found;
1562};
1563
1564static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1565{
1566        struct select_data *data = _data;
1567        enum d_walk_ret ret = D_WALK_CONTINUE;
1568
1569        if (data->start == dentry)
1570                goto out;
1571
1572        /*
1573         * move only zero ref count dentries to the dispose list.
1574         *
1575         * Those which are presently on the shrink list, being processed
1576         * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1577         * loop in shrink_dcache_parent() might not make any progress
1578         * and loop forever.
1579         */
1580        if (dentry->d_lockref.count) {
1581                dentry_lru_del(dentry);
1582        } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1583                dentry_lru_move_list(dentry, &data->dispose);
1584                dentry->d_flags |= DCACHE_SHRINK_LIST;
1585                data->found++;
1586                ret = D_WALK_NORETRY;
1587        }
1588        /*
1589         * We can return to the caller if we have found some (this
1590         * ensures forward progress). We'll be coming back to find
1591         * the rest.
1592         */
1593        if (data->found && need_resched())
1594                ret = D_WALK_QUIT;
1595out:
1596        return ret;
1597}
1598
1599/**
1600 * shrink_dcache_parent - prune dcache
1601 * @parent: parent of entries to prune
1602 *
1603 * Prune the dcache to remove unused children of the parent dentry.
1604 */
1605void shrink_dcache_parent(struct dentry *parent)
1606{
1607        for (;;) {
1608                struct select_data data;
1609
1610                INIT_LIST_HEAD(&data.dispose);
1611                data.start = parent;
1612                data.found = 0;
1613
1614                d_walk(parent, &data, select_collect, NULL);
1615                if (!data.found)
1616                        break;
1617
1618                shrink_dentry_list(&data.dispose);
1619        }
1620}
1621EXPORT_SYMBOL(shrink_dcache_parent);
1622
1623struct detach_data {
1624        struct select_data select;
1625        struct dentry *mountpoint;
1626};
1627static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
1628{
1629        struct detach_data *data = _data;
1630
1631        if (d_mountpoint(dentry)) {
1632                __dget_dlock(dentry);
1633                data->mountpoint = dentry;
1634                return D_WALK_QUIT;
1635        }
1636
1637        return select_collect(&data->select, dentry);
1638}
1639
1640static void check_and_drop(void *_data)
1641{
1642        struct detach_data *data = _data;
1643
1644        if (!data->mountpoint && !data->select.found)
1645                __d_drop(data->select.start);
1646}
1647
1648/**
1649 * d_invalidate - detach submounts, prune dcache, and drop
1650 * @dentry: dentry to invalidate (aka detach, prune and drop)
1651 *
1652 * no dcache lock.
1653 *
1654 * The final d_drop is done as an atomic operation relative to
1655 * rename_lock ensuring there are no races with d_set_mounted.  This
1656 * ensures there are no unhashed dentries on the path to a mountpoint.
1657 */
1658int d_invalidate(struct dentry *dentry)
1659{
1660        /*
1661         * If it's already been dropped, return OK.
1662         */
1663        spin_lock(&dentry->d_lock);
1664        if (d_unhashed(dentry)) {
1665                spin_unlock(&dentry->d_lock);
1666                return 0;
1667        }
1668        spin_unlock(&dentry->d_lock);
1669
1670        /* Negative dentries can be dropped without further checks */
1671        if (!dentry->d_inode) {
1672                d_drop(dentry);
1673                return 0;
1674        }
1675
1676        for (;;) {
1677                struct detach_data data;
1678
1679                data.mountpoint = NULL;
1680                INIT_LIST_HEAD(&data.select.dispose);
1681                data.select.start = dentry;
1682                data.select.found = 0;
1683
1684                d_walk(dentry, &data, detach_and_collect, check_and_drop);
1685
1686                if (data.select.found)
1687                        shrink_dentry_list(&data.select.dispose);
1688
1689                if (data.mountpoint) {
1690                        if (may_detach_mounts) {
1691                                detach_mounts(data.mountpoint);
1692                                dput(data.mountpoint);
1693                        } else {
1694                                dput(data.mountpoint);
1695                                return -EBUSY;
1696                        }
1697                }
1698
1699                if (!data.mountpoint && !data.select.found)
1700                        return 0;
1701        }
1702}
1703EXPORT_SYMBOL(d_invalidate);
1704
1705/**
1706 * __d_alloc    -       allocate a dcache entry
1707 * @sb: filesystem it will belong to
1708 * @name: qstr of the name
1709 *
1710 * Allocates a dentry. It returns %NULL if there is insufficient memory
1711 * available. On a success the dentry is returned. The name passed in is
1712 * copied and the copy passed in may be reused after this call.
1713 */
1714 
1715struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1716{
1717        struct dentry *dentry;
1718        char *dname;
1719
1720        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1721        if (!dentry)
1722                return NULL;
1723
1724        /*
1725         * We guarantee that the inline name is always NUL-terminated.
1726         * This way the memcpy() done by the name switching in rename
1727         * will still always have a NUL at the end, even if we might
1728         * be overwriting an internal NUL character
1729         */
1730        dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1731        if (name->len > DNAME_INLINE_LEN-1) {
1732                dname = kmalloc(name->len + 1, GFP_KERNEL);
1733                if (!dname) {
1734                        kmem_cache_free(dentry_cache, dentry); 
1735                        return NULL;
1736                }
1737        } else  {
1738                dname = dentry->d_iname;
1739        }       
1740
1741        dentry->d_name.len = name->len;
1742        dentry->d_name.hash = name->hash;
1743        memcpy(dname, name->name, name->len);
1744        dname[name->len] = 0;
1745
1746        /* Make sure we always see the terminating NUL character */
1747        smp_wmb();
1748        dentry->d_name.name = dname;
1749
1750        dentry->d_lockref.count = 1;
1751        dentry->d_flags = 0;
1752        spin_lock_init(&dentry->d_lock);
1753        seqcount_init(&dentry->d_seq);
1754        dentry->d_inode = NULL;
1755        dentry->d_parent = dentry;
1756        dentry->d_sb = sb;
1757        dentry->d_op = NULL;
1758        dentry->d_fsdata = NULL;
1759        INIT_HLIST_BL_NODE(&dentry->d_hash);
1760        INIT_LIST_HEAD(&dentry->d_lru);
1761        INIT_LIST_HEAD(&dentry->d_subdirs);
1762        INIT_HLIST_NODE(&dentry->d_alias);
1763        INIT_LIST_HEAD(&dentry->d_u.d_child);
1764        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1765
1766        this_cpu_inc(nr_dentry);
1767
1768        return dentry;
1769}
1770
1771/**
1772 * d_alloc      -       allocate a dcache entry
1773 * @parent: parent of entry to allocate
1774 * @name: qstr of the name
1775 *
1776 * Allocates a dentry. It returns %NULL if there is insufficient memory
1777 * available. On a success the dentry is returned. The name passed in is
1778 * copied and the copy passed in may be reused after this call.
1779 */
1780struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1781{
1782        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1783        if (!dentry)
1784                return NULL;
1785
1786        dentry->d_flags |= DCACHE_RCUACCESS;
1787        spin_lock(&parent->d_lock);
1788        /*
1789         * don't need child lock because it is not subject
1790         * to concurrency here
1791         */
1792        __dget_dlock(parent);
1793        dentry->d_parent = parent;
1794        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1795        spin_unlock(&parent->d_lock);
1796
1797        return dentry;
1798}
1799EXPORT_SYMBOL(d_alloc);
1800
1801struct dentry *d_alloc_anon(struct super_block *sb)
1802{
1803        static const struct qstr anonstring = QSTR_INIT("/", 1);
1804
1805        return __d_alloc(sb, &anonstring);
1806}
1807EXPORT_SYMBOL(d_alloc_anon);
1808
1809struct dentry *d_alloc_cursor(struct dentry * parent)
1810{
1811        struct dentry *dentry = d_alloc_anon(parent->d_sb);
1812        if (dentry) {
1813                dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
1814                dentry->d_parent = dget(parent);
1815        }
1816        return dentry;
1817}
1818
1819/**
1820 * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
1821 * @sb: the superblock
1822 * @name: qstr of the name
1823 *
1824 * For a filesystem that just pins its dentries in memory and never
1825 * performs lookups at all, return an unhashed IS_ROOT dentry.
1826 */
1827struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1828{
1829        return __d_alloc(sb, name);
1830}
1831EXPORT_SYMBOL(d_alloc_pseudo);
1832
1833struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1834{
1835        struct qstr q;
1836
1837        q.name = name;
1838        q.len = strlen(name);
1839        q.hash = full_name_hash(q.name, q.len);
1840        return d_alloc(parent, &q);
1841}
1842EXPORT_SYMBOL(d_alloc_name);
1843
1844void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1845{
1846        WARN_ON_ONCE(dentry->d_op);
1847        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1848                                DCACHE_OP_COMPARE       |
1849                                DCACHE_OP_REVALIDATE    |
1850                                DCACHE_OP_WEAK_REVALIDATE       |
1851                                DCACHE_OP_DELETE        |
1852                                DCACHE_OP_REAL));
1853        dentry->d_op = op;
1854        if (!op)
1855                return;
1856        if (op->d_hash)
1857                dentry->d_flags |= DCACHE_OP_HASH;
1858        if (op->d_compare)
1859                dentry->d_flags |= DCACHE_OP_COMPARE;
1860        if (op->d_revalidate)
1861                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1862        if (op->d_weak_revalidate)
1863                dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1864        if (op->d_delete)
1865                dentry->d_flags |= DCACHE_OP_DELETE;
1866        if (op->d_prune)
1867                dentry->d_flags |= DCACHE_OP_PRUNE;
1868
1869        if (get_real_dop(dentry))
1870                dentry->d_flags |= DCACHE_OP_REAL;
1871}
1872EXPORT_SYMBOL(d_set_d_op);
1873
1874static unsigned d_flags_for_inode(struct inode *inode)
1875{
1876        unsigned add_flags = DCACHE_FILE_TYPE;
1877
1878        if (!inode)
1879                return DCACHE_MISS_TYPE;
1880
1881        if (S_ISDIR(inode->i_mode)) {
1882                add_flags = DCACHE_DIRECTORY_TYPE;
1883                if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
1884                        if (unlikely(!inode->i_op->lookup))
1885                                add_flags = DCACHE_AUTODIR_TYPE;
1886                        else
1887                                inode->i_opflags |= IOP_LOOKUP;
1888                }
1889        } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1890                if (unlikely(inode->i_op->follow_link))
1891                        add_flags = DCACHE_SYMLINK_TYPE;
1892                else
1893                        inode->i_opflags |= IOP_NOFOLLOW;
1894        }
1895
1896        if (unlikely(IS_AUTOMOUNT(inode)))
1897                add_flags |= DCACHE_NEED_AUTOMOUNT;
1898        return add_flags;
1899}
1900
1901static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1902{
1903        unsigned add_flags = d_flags_for_inode(inode);
1904
1905        spin_lock(&dentry->d_lock);
1906        /*
1907         * Decrement negative dentry count if it was in the LRU list.
1908         */
1909        if (dentry->d_flags & DCACHE_LRU_LIST)
1910                this_cpu_dec(nr_dentry_negative);
1911        __d_set_type(dentry, add_flags);
1912        if (inode)
1913                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1914        dentry->d_inode = inode;
1915        dentry_rcuwalk_invalidate(dentry);
1916        if (inode)
1917                fsnotify_update_flags(dentry);
1918        spin_unlock(&dentry->d_lock);
1919}
1920
1921/**
1922 * d_instantiate - fill in inode information for a dentry
1923 * @entry: dentry to complete
1924 * @inode: inode to attach to this dentry
1925 *
1926 * Fill in inode information in the entry.
1927 *
1928 * This turns negative dentries into productive full members
1929 * of society.
1930 *
1931 * NOTE! This assumes that the inode count has been incremented
1932 * (or otherwise set) by the caller to indicate that it is now
1933 * in use by the dcache.
1934 */
1935 
1936void d_instantiate(struct dentry *entry, struct inode * inode)
1937{
1938        BUG_ON(!hlist_unhashed(&entry->d_alias));
1939        if (inode)
1940                spin_lock(&inode->i_lock);
1941        __d_instantiate(entry, inode);
1942        if (inode)
1943                spin_unlock(&inode->i_lock);
1944        security_d_instantiate(entry, inode);
1945}
1946EXPORT_SYMBOL(d_instantiate);
1947
1948/*
1949 * This should be equivalent to d_instantiate() + unlock_new_inode(),
1950 * with lockdep-related part of unlock_new_inode() done before
1951 * anything else.  Use that instead of open-coding d_instantiate()/
1952 * unlock_new_inode() combinations.
1953 */
1954void d_instantiate_new(struct dentry *entry, struct inode *inode)
1955{
1956        BUG_ON(!hlist_unhashed(&entry->d_alias));
1957        BUG_ON(!inode);
1958        lockdep_annotate_inode_mutex_key(inode);
1959        spin_lock(&inode->i_lock);
1960        __d_instantiate(entry, inode);
1961        WARN_ON(!(inode->i_state & I_NEW));
1962        inode->i_state &= ~I_NEW & ~I_CREATING;
1963        smp_mb();
1964        wake_up_bit(&inode->i_state, __I_NEW);
1965        spin_unlock(&inode->i_lock);
1966        security_d_instantiate(entry, inode);
1967}
1968EXPORT_SYMBOL(d_instantiate_new);
1969
1970/**
1971 * d_instantiate_unique - instantiate a non-aliased dentry
1972 * @entry: dentry to instantiate
1973 * @inode: inode to attach to this dentry
1974 *
1975 * Fill in inode information in the entry. On success, it returns NULL.
1976 * If an unhashed alias of "entry" already exists, then we return the
1977 * aliased dentry instead and drop one reference to inode.
1978 *
1979 * Note that in order to avoid conflicts with rename() etc, the caller
1980 * had better be holding the parent directory semaphore.
1981 *
1982 * This also assumes that the inode count has been incremented
1983 * (or otherwise set) by the caller to indicate that it is now
1984 * in use by the dcache.
1985 */
1986static struct dentry *__d_instantiate_unique(struct dentry *entry,
1987                                             struct inode *inode)
1988{
1989        struct dentry *alias;
1990        int len = entry->d_name.len;
1991        const char *name = entry->d_name.name;
1992        unsigned int hash = entry->d_name.hash;
1993
1994        if (!inode) {
1995                __d_instantiate(entry, NULL);
1996                return NULL;
1997        }
1998
1999        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
2000                /*
2001                 * Don't need alias->d_lock here, because aliases with
2002                 * d_parent == entry->d_parent are not subject to name or
2003                 * parent changes, because the parent inode i_mutex is held.
2004                 */
2005                if (alias->d_name.hash != hash)
2006                        continue;
2007                if (alias->d_parent != entry->d_parent)
2008                        continue;
2009                if (alias->d_name.len != len)
2010                        continue;
2011                if (dentry_cmp(alias, name, len))
2012                        continue;
2013                __dget(alias);
2014                return alias;
2015        }
2016
2017        __d_instantiate(entry, inode);
2018        return NULL;
2019}
2020
2021struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
2022{
2023        struct dentry *result;
2024
2025        BUG_ON(!hlist_unhashed(&entry->d_alias));
2026
2027        if (inode)
2028                spin_lock(&inode->i_lock);
2029        result = __d_instantiate_unique(entry, inode);
2030        if (inode)
2031                spin_unlock(&inode->i_lock);
2032
2033        if (!result) {
2034                security_d_instantiate(entry, inode);
2035                return NULL;
2036        }
2037
2038        BUG_ON(!d_unhashed(result));
2039        iput(inode);
2040        return result;
2041}
2042
2043EXPORT_SYMBOL(d_instantiate_unique);
2044
2045/**
2046 * d_instantiate_no_diralias - instantiate a non-aliased dentry
2047 * @entry: dentry to complete
2048 * @inode: inode to attach to this dentry
2049 *
2050 * Fill in inode information in the entry.  If a directory alias is found, then
2051 * return an error (and drop inode).  Together with d_materialise_unique() this
2052 * guarantees that a directory inode may never have more than one alias.
2053 */
2054int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
2055{
2056        BUG_ON(!hlist_unhashed(&entry->d_alias));
2057
2058        spin_lock(&inode->i_lock);
2059        if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
2060                spin_unlock(&inode->i_lock);
2061                iput(inode);
2062                return -EBUSY;
2063        }
2064        __d_instantiate(entry, inode);
2065        spin_unlock(&inode->i_lock);
2066        security_d_instantiate(entry, inode);
2067
2068        return 0;
2069}
2070EXPORT_SYMBOL(d_instantiate_no_diralias);
2071
2072struct dentry *d_make_root(struct inode *root_inode)
2073{
2074        struct dentry *res = NULL;
2075
2076        if (root_inode) {
2077                res = d_alloc_anon(root_inode->i_sb);
2078                if (res)
2079                        d_instantiate(res, root_inode);
2080                else
2081                        iput(root_inode);
2082        }
2083        return res;
2084}
2085EXPORT_SYMBOL(d_make_root);
2086
2087static struct dentry * __d_find_any_alias(struct inode *inode)
2088{
2089        struct dentry *alias;
2090
2091        if (hlist_empty(&inode->i_dentry))
2092                return NULL;
2093        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
2094        __dget(alias);
2095        return alias;
2096}
2097
2098/**
2099 * d_find_any_alias - find any alias for a given inode
2100 * @inode: inode to find an alias for
2101 *
2102 * If any aliases exist for the given inode, take and return a
2103 * reference for one of them.  If no aliases exist, return %NULL.
2104 */
2105struct dentry *d_find_any_alias(struct inode *inode)
2106{
2107        struct dentry *de;
2108
2109        spin_lock(&inode->i_lock);
2110        de = __d_find_any_alias(inode);
2111        spin_unlock(&inode->i_lock);
2112        return de;
2113}
2114EXPORT_SYMBOL(d_find_any_alias);
2115
2116struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
2117{
2118        struct dentry *res;
2119        unsigned add_flags;
2120
2121        spin_lock(&inode->i_lock);
2122        res = __d_find_any_alias(inode);
2123        if (res) {
2124                spin_unlock(&inode->i_lock);
2125                security_d_instantiate(res, inode);
2126                dput(dentry);
2127                goto out_iput;
2128        }
2129
2130        /* attach a disconnected dentry */
2131        add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED;
2132
2133        spin_lock(&dentry->d_lock);
2134        dentry->d_inode = inode;
2135        dentry->d_flags |= add_flags;
2136        hlist_add_head(&dentry->d_alias, &inode->i_dentry);
2137        hlist_bl_lock(&dentry->d_sb->s_anon);
2138        hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_anon);
2139        hlist_bl_unlock(&dentry->d_sb->s_anon);
2140        spin_unlock(&dentry->d_lock);
2141        spin_unlock(&inode->i_lock);
2142        security_d_instantiate(dentry, inode);
2143
2144        return dentry;
2145
2146 out_iput:
2147        iput(inode);
2148        return res;
2149}
2150EXPORT_SYMBOL(d_instantiate_anon);
2151
2152/**
2153 * d_obtain_alias - find or allocate a dentry for a given inode
2154 * @inode: inode to allocate the dentry for
2155 *
2156 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
2157 * similar open by handle operations.  The returned dentry may be anonymous,
2158 * or may have a full name (if the inode was already in the cache).
2159 *
2160 * When called on a directory inode, we must ensure that the inode only ever
2161 * has one dentry.  If a dentry is found, that is returned instead of
2162 * allocating a new one.
2163 *
2164 * On successful return, the reference to the inode has been transferred
2165 * to the dentry.  In case of an error the reference on the inode is released.
2166 * To make it easier to use in export operations a %NULL or IS_ERR inode may
2167 * be passed in and will be the error will be propagate to the return value,
2168 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
2169 */
2170struct dentry *d_obtain_alias(struct inode *inode)
2171{
2172        struct dentry *tmp;
2173        struct dentry *res;
2174
2175        if (!inode)
2176                return ERR_PTR(-ESTALE);
2177        if (IS_ERR(inode))
2178                return ERR_CAST(inode);
2179
2180        res = d_find_any_alias(inode);
2181        if (res)
2182                goto out_iput;
2183
2184        tmp = d_alloc_anon(inode->i_sb);
2185        if (!tmp) {
2186                res = ERR_PTR(-ENOMEM);
2187                goto out_iput;
2188        }
2189
2190        return d_instantiate_anon(tmp, inode);
2191
2192 out_iput:
2193        if (res && !IS_ERR(res))
2194                security_d_instantiate(res, inode);
2195        iput(inode);
2196        return res;
2197}
2198EXPORT_SYMBOL(d_obtain_alias);
2199
2200/**
2201 * d_add_ci - lookup or allocate new dentry with case-exact name
2202 * @inode:  the inode case-insensitive lookup has found
2203 * @dentry: the negative dentry that was passed to the parent's lookup func
2204 * @name:   the case-exact name to be associated with the returned dentry
2205 *
2206 * This is to avoid filling the dcache with case-insensitive names to the
2207 * same inode, only the actual correct case is stored in the dcache for
2208 * case-insensitive filesystems.
2209 *
2210 * For a case-insensitive lookup match and if the the case-exact dentry
2211 * already exists in in the dcache, use it and return it.
2212 *
2213 * If no entry exists with the exact case name, allocate new dentry with
2214 * the exact case, and return the spliced entry.
2215 */
2216struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
2217                        struct qstr *name)
2218{
2219        struct dentry *found;
2220        struct dentry *new;
2221
2222        /*
2223         * First check if a dentry matching the name already exists,
2224         * if not go ahead and create it now.
2225         */
2226        found = d_hash_and_lookup(dentry->d_parent, name);
2227        if (unlikely(IS_ERR(found)))
2228                goto err_out;
2229        if (!found) {
2230                new = d_alloc(dentry->d_parent, name);
2231                if (!new) {
2232                        found = ERR_PTR(-ENOMEM);
2233                        goto err_out;
2234                }
2235
2236                found = d_splice_alias(inode, new);
2237                if (found) {
2238                        dput(new);
2239                        return found;
2240                }
2241                return new;
2242        }
2243
2244        /*
2245         * If a matching dentry exists, and it's not negative use it.
2246         *
2247         * Decrement the reference count to balance the iget() done
2248         * earlier on.
2249         */
2250        if (found->d_inode) {
2251                if (unlikely(found->d_inode != inode)) {
2252                        /* This can't happen because bad inodes are unhashed. */
2253                        BUG_ON(!is_bad_inode(inode));
2254                        BUG_ON(!is_bad_inode(found->d_inode));
2255                }
2256                iput(inode);
2257                return found;
2258        }
2259
2260        /*
2261         * Negative dentry: instantiate it unless the inode is a directory and
2262         * already has a dentry.
2263         */
2264        new = d_splice_alias(inode, found);
2265        if (new) {
2266                dput(found);
2267                found = new;
2268        }
2269        return found;
2270
2271err_out:
2272        iput(inode);
2273        return found;
2274}
2275EXPORT_SYMBOL(d_add_ci);
2276
2277/*
2278 * Do the slow-case of the dentry name compare.
2279 *
2280 * Unlike the dentry_cmp() function, we need to atomically
2281 * load the name and length information, so that the
2282 * filesystem can rely on them, and can use the 'name' and
2283 * 'len' information without worrying about walking off the
2284 * end of memory etc.
2285 *
2286 * Thus the read_seqcount_retry() and the "duplicate" info
2287 * in arguments (the low-level filesystem should not look
2288 * at the dentry inode or name contents directly, since
2289 * rename can change them while we're in RCU mode).
2290 */
2291enum slow_d_compare {
2292        D_COMP_OK,
2293        D_COMP_NOMATCH,
2294        D_COMP_SEQRETRY,
2295};
2296
2297static noinline enum slow_d_compare slow_dentry_cmp(
2298                const struct dentry *parent,
2299                struct dentry *dentry,
2300                unsigned int seq,
2301                const struct qstr *name)
2302{
2303        int tlen = dentry->d_name.len;
2304        const char *tname = dentry->d_name.name;
2305
2306        if (read_seqcount_retry(&dentry->d_seq, seq)) {
2307                cpu_relax();
2308                return D_COMP_SEQRETRY;
2309        }
2310        if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
2311                return D_COMP_NOMATCH;
2312        return D_COMP_OK;
2313}
2314
2315/**
2316 * __d_lookup_rcu - search for a dentry (racy, store-free)
2317 * @parent: parent dentry
2318 * @name: qstr of name we wish to find
2319 * @seqp: returns d_seq value at the point where the dentry was found
2320 * Returns: dentry, or NULL
2321 *
2322 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
2323 * resolution (store-free path walking) design described in
2324 * Documentation/filesystems/path-lookup.txt.
2325 *
2326 * This is not to be used outside core vfs.
2327 *
2328 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
2329 * held, and rcu_read_lock held. The returned dentry must not be stored into
2330 * without taking d_lock and checking d_seq sequence count against @seq
2331 * returned here.
2332 *
2333 * A refcount may be taken on the found dentry with the d_rcu_to_refcount
2334 * function.
2335 *
2336 * Alternatively, __d_lookup_rcu may be called again to look up the child of
2337 * the returned dentry, so long as its parent's seqlock is checked after the
2338 * child is looked up. Thus, an interlocking stepping of sequence lock checks
2339 * is formed, giving integrity down the path walk.
2340 *
2341 * NOTE! The caller *has* to check the resulting dentry against the sequence
2342 * number we've returned before using any of the resulting dentry state!
2343 */
2344struct dentry *__d_lookup_rcu(const struct dentry *parent,
2345                                const struct qstr *name,
2346                                unsigned *seqp)
2347{
2348        u64 hashlen = name->hash_len;
2349        const unsigned char *str = name->name;
2350        struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
2351        struct hlist_bl_node *node;
2352        struct dentry *dentry;
2353
2354        /*
2355         * Note: There is significant duplication with __d_lookup_rcu which is
2356         * required to prevent single threaded performance regressions
2357         * especially on architectures where smp_rmb (in seqcounts) are costly.
2358         * Keep the two functions in sync.
2359         */
2360
2361        /*
2362         * The hash list is protected using RCU.
2363         *
2364         * Carefully use d_seq when comparing a candidate dentry, to avoid
2365         * races with d_move().
2366         *
2367         * It is possible that concurrent renames can mess up our list
2368         * walk here and result in missing our dentry, resulting in the
2369         * false-negative result. d_lookup() protects against concurrent
2370         * renames using rename_lock seqlock.
2371         *
2372         * See Documentation/filesystems/path-lookup.txt for more details.
2373         */
2374        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2375                unsigned seq;
2376
2377seqretry:
2378                /*
2379                 * The dentry sequence count protects us from concurrent
2380                 * renames, and thus protects parent and name fields.
2381                 *
2382                 * The caller must perform a seqcount check in order
2383                 * to do anything useful with the returned dentry.
2384                 *
2385                 * NOTE! We do a "raw" seqcount_begin here. That means that
2386                 * we don't wait for the sequence count to stabilize if it
2387                 * is in the middle of a sequence change. If we do the slow
2388                 * dentry compare, we will do seqretries until it is stable,
2389                 * and if we end up with a successful lookup, we actually
2390                 * want to exit RCU lookup anyway.
2391                 */
2392                seq = raw_seqcount_begin(&dentry->d_seq);
2393                if (dentry->d_parent != parent)
2394                        continue;
2395                if (d_unhashed(dentry))
2396                        continue;
2397
2398                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
2399                        if (dentry->d_name.hash != hashlen_hash(hashlen))
2400                                continue;
2401                        *seqp = seq;
2402                        switch (slow_dentry_cmp(parent, dentry, seq, name)) {
2403                        case D_COMP_OK:
2404                                return dentry;
2405                        case D_COMP_NOMATCH:
2406                                continue;
2407                        default:
2408                                goto seqretry;
2409                        }
2410                }
2411
2412                if (dentry->d_name.hash_len != hashlen)
2413                        continue;
2414                *seqp = seq;
2415                if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
2416                        return dentry;
2417        }
2418        return NULL;
2419}
2420
2421/**
2422 * d_lookup - search for a dentry
2423 * @parent: parent dentry
2424 * @name: qstr of name we wish to find
2425 * Returns: dentry, or NULL
2426 *
2427 * d_lookup searches the children of the parent dentry for the name in
2428 * question. If the dentry is found its reference count is incremented and the
2429 * dentry is returned. The caller must use dput to free the entry when it has
2430 * finished using it. %NULL is returned if the dentry does not exist.
2431 */
2432struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
2433{
2434        struct dentry *dentry;
2435        unsigned seq;
2436
2437        do {
2438                seq = read_seqbegin(&rename_lock);
2439                dentry = __d_lookup(parent, name);
2440                if (dentry)
2441                        break;
2442        } while (read_seqretry(&rename_lock, seq));
2443        return dentry;
2444}
2445EXPORT_SYMBOL(d_lookup);
2446
2447/**
2448 * __d_lookup - search for a dentry (racy)
2449 * @parent: parent dentry
2450 * @name: qstr of name we wish to find
2451 * Returns: dentry, or NULL
2452 *
2453 * __d_lookup is like d_lookup, however it may (rarely) return a
2454 * false-negative result due to unrelated rename activity.
2455 *
2456 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
2457 * however it must be used carefully, eg. with a following d_lookup in
2458 * the case of failure.
2459 *
2460 * __d_lookup callers must be commented.
2461 */
2462struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
2463{
2464        unsigned int len = name->len;
2465        unsigned int hash = name->hash;
2466        const unsigned char *str = name->name;
2467        struct hlist_bl_head *b = d_hash(parent, hash);
2468        struct hlist_bl_node *node;
2469        struct dentry *found = NULL;
2470        struct dentry *dentry;
2471
2472        /*
2473         * Note: There is significant duplication with __d_lookup_rcu which is
2474         * required to prevent single threaded performance regressions
2475         * especially on architectures where smp_rmb (in seqcounts) are costly.
2476         * Keep the two functions in sync.
2477         */
2478
2479        /*
2480         * The hash list is protected using RCU.
2481         *
2482         * Take d_lock when comparing a candidate dentry, to avoid races
2483         * with d_move().
2484         *
2485         * It is possible that concurrent renames can mess up our list
2486         * walk here and result in missing our dentry, resulting in the
2487         * false-negative result. d_lookup() protects against concurrent
2488         * renames using rename_lock seqlock.
2489         *
2490         * See Documentation/filesystems/path-lookup.txt for more details.
2491         */
2492        rcu_read_lock();
2493        
2494        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2495
2496                if (dentry->d_name.hash != hash)
2497                        continue;
2498
2499                spin_lock(&dentry->d_lock);
2500                if (dentry->d_parent != parent)
2501                        goto next;
2502                if (d_unhashed(dentry))
2503                        goto next;
2504
2505                /*
2506                 * It is safe to compare names since d_move() cannot
2507                 * change the qstr (protected by d_lock).
2508                 */
2509                if (parent->d_flags & DCACHE_OP_COMPARE) {
2510                        int tlen = dentry->d_name.len;
2511                        const char *tname = dentry->d_name.name;
2512                        if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
2513                                goto next;
2514                } else {
2515                        if (dentry->d_name.len != len)
2516                                goto next;
2517                        if (dentry_cmp(dentry, str, len))
2518                                goto next;
2519                }
2520
2521                dentry->d_lockref.count++;
2522                found = dentry;
2523                spin_unlock(&dentry->d_lock);
2524                break;
2525next:
2526                spin_unlock(&dentry->d_lock);
2527        }
2528        rcu_read_unlock();
2529
2530        return found;
2531}
2532
2533/**
2534 * d_hash_and_lookup - hash the qstr then search for a dentry
2535 * @dir: Directory to search in
2536 * @name: qstr of name we wish to find
2537 *
2538 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2539 */
2540struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2541{
2542        /*
2543         * Check for a fs-specific hash function. Note that we must
2544         * calculate the standard hash first, as the d_op->d_hash()
2545         * routine may choose to leave the hash value unchanged.
2546         */
2547        name->hash = full_name_hash(name->name, name->len);
2548        if (dir->d_flags & DCACHE_OP_HASH) {
2549                int err = dir->d_op->d_hash(dir, name);
2550                if (unlikely(err < 0))
2551                        return ERR_PTR(err);
2552        }
2553        return d_lookup(dir, name);
2554}
2555EXPORT_SYMBOL(d_hash_and_lookup);
2556
2557/**
2558 * d_validate - verify dentry provided from insecure source (deprecated)
2559 * @dentry: The dentry alleged to be valid child of @dparent
2560 * @dparent: The parent dentry (known to be valid)
2561 *
2562 * An insecure source has sent us a dentry, here we verify it and dget() it.
2563 * This is used by ncpfs in its readdir implementation.
2564 * Zero is returned in the dentry is invalid.
2565 *
2566 * This function is slow for big directories, and deprecated, do not use it.
2567 */
2568int d_validate(struct dentry *dentry, struct dentry *dparent)
2569{
2570        struct dentry *child;
2571
2572        spin_lock(&dparent->d_lock);
2573        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2574                if (dentry == child) {
2575                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2576                        __dget_dlock(dentry);
2577                        spin_unlock(&dentry->d_lock);
2578                        spin_unlock(&dparent->d_lock);
2579                        return 1;
2580                }
2581        }
2582        spin_unlock(&dparent->d_lock);
2583
2584        return 0;
2585}
2586EXPORT_SYMBOL(d_validate);
2587
2588/*
2589 * When a file is deleted, we have two options:
2590 * - turn this dentry into a negative dentry
2591 * - unhash this dentry and free it.
2592 *
2593 * Usually, we want to just turn this into
2594 * a negative dentry, but if anybody else is
2595 * currently using the dentry or the inode
2596 * we can't do that and we fall back on removing
2597 * it from the hash queues and waiting for
2598 * it to be deleted later when it has no users
2599 */
2600 
2601/**
2602 * d_delete - delete a dentry
2603 * @dentry: The dentry to delete
2604 *
2605 * Turn the dentry into a negative dentry if possible, otherwise
2606 * remove it from the hash queues so it can be deleted later
2607 */
2608 
2609void d_delete(struct dentry * dentry)
2610{
2611        struct inode *inode;
2612        int isdir = 0;
2613        /*
2614         * Are we the only user?
2615         */
2616again:
2617        spin_lock(&dentry->d_lock);
2618        inode = dentry->d_inode;
2619        isdir = S_ISDIR(inode->i_mode);
2620        if (dentry->d_lockref.count == 1) {
2621                if (!spin_trylock(&inode->i_lock)) {
2622                        spin_unlock(&dentry->d_lock);
2623                        cpu_relax();
2624                        goto again;
2625                }
2626                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2627                dentry_unlink_inode(dentry);
2628                fsnotify_nameremove(dentry, isdir);
2629                return;
2630        }
2631
2632        if (!d_unhashed(dentry))
2633                __d_drop(dentry);
2634
2635        spin_unlock(&dentry->d_lock);
2636
2637        fsnotify_nameremove(dentry, isdir);
2638}
2639EXPORT_SYMBOL(d_delete);
2640
2641static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2642{
2643        BUG_ON(!d_unhashed(entry));
2644        hlist_bl_lock(b);
2645        hlist_bl_add_head_rcu(&entry->d_hash, b);
2646        hlist_bl_unlock(b);
2647}
2648
2649static void _d_rehash(struct dentry * entry)
2650{
2651        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2652}
2653
2654/**
2655 * d_rehash     - add an entry back to the hash
2656 * @entry: dentry to add to the hash
2657 *
2658 * Adds a dentry to the hash according to its name.
2659 */
2660 
2661void d_rehash(struct dentry * entry)
2662{
2663        spin_lock(&entry->d_lock);
2664        _d_rehash(entry);
2665        spin_unlock(&entry->d_lock);
2666}
2667EXPORT_SYMBOL(d_rehash);
2668
2669/**
2670 * dentry_update_name_case - update case insensitive dentry with a new name
2671 * @dentry: dentry to be updated
2672 * @name: new name
2673 *
2674 * Update a case insensitive dentry with new case of name.
2675 *
2676 * dentry must have been returned by d_lookup with name @name. Old and new
2677 * name lengths must match (ie. no d_compare which allows mismatched name
2678 * lengths).
2679 *
2680 * Parent inode i_mutex must be held over d_lookup and into this call (to
2681 * keep renames and concurrent inserts, and readdir(2) away).
2682 */
2683void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2684{
2685        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2686        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2687
2688        spin_lock(&dentry->d_lock);
2689        write_seqcount_begin(&dentry->d_seq);
2690        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2691        write_seqcount_end(&dentry->d_seq);
2692        spin_unlock(&dentry->d_lock);
2693}
2694EXPORT_SYMBOL(dentry_update_name_case);
2695
2696static void switch_names(struct dentry *dentry, struct dentry *target)
2697{
2698        if (dname_external(target)) {
2699                if (dname_external(dentry)) {
2700                        /*
2701                         * Both external: swap the pointers
2702                         */
2703                        swap(target->d_name.name, dentry->d_name.name);
2704                } else {
2705                        /*
2706                         * dentry:internal, target:external.  Steal target's
2707                         * storage and make target internal.
2708                         */
2709                        memcpy(target->d_iname, dentry->d_name.name,
2710                                        dentry->d_name.len + 1);
2711                        dentry->d_name.name = target->d_name.name;
2712                        target->d_name.name = target->d_iname;
2713                }
2714        } else {
2715                if (dname_external(dentry)) {
2716                        /*
2717                         * dentry:external, target:internal.  Give dentry's
2718                         * storage to target and make dentry internal
2719                         */
2720                        memcpy(dentry->d_iname, target->d_name.name,
2721                                        target->d_name.len + 1);
2722                        target->d_name.name = dentry->d_name.name;
2723                        dentry->d_name.name = dentry->d_iname;
2724                } else {
2725                        /*
2726                         * Both are internal.
2727                         */
2728                        unsigned int i;
2729                        BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
2730                        for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
2731                                swap(((long *) &dentry->d_iname)[i],
2732                                     ((long *) &target->d_iname)[i]);
2733                        }
2734                }
2735        }
2736        swap(dentry->d_name.len, target->d_name.len);
2737}
2738
2739static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2740{
2741        /*
2742         * XXXX: do we really need to take target->d_lock?
2743         */
2744        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2745                spin_lock(&target->d_parent->d_lock);
2746        else {
2747                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2748                        spin_lock(&dentry->d_parent->d_lock);
2749                        spin_lock_nested(&target->d_parent->d_lock,
2750                                                DENTRY_D_LOCK_NESTED);
2751                } else {
2752                        spin_lock(&target->d_parent->d_lock);
2753                        spin_lock_nested(&dentry->d_parent->d_lock,
2754                                                DENTRY_D_LOCK_NESTED);
2755                }
2756        }
2757        if (target < dentry) {
2758                spin_lock_nested(&target->d_lock, 2);
2759                spin_lock_nested(&dentry->d_lock, 3);
2760        } else {
2761                spin_lock_nested(&dentry->d_lock, 2);
2762                spin_lock_nested(&target->d_lock, 3);
2763        }
2764}
2765
2766static void dentry_unlock_parents_for_move(struct dentry *dentry,
2767                                        struct dentry *target)
2768{
2769        if (target->d_parent != dentry->d_parent)
2770                spin_unlock(&dentry->d_parent->d_lock);
2771        if (target->d_parent != target)
2772                spin_unlock(&target->d_parent->d_lock);
2773}
2774
2775/*
2776 * When switching names, the actual string doesn't strictly have to
2777 * be preserved in the target - because we're dropping the target
2778 * anyway. As such, we can just do a simple memcpy() to copy over
2779 * the new name before we switch.
2780 *
2781 * Note that we have to be a lot more careful about getting the hash
2782 * switched - we have to switch the hash value properly even if it
2783 * then no longer matches the actual (corrupted) string of the target.
2784 * The hash value has to match the hash queue that the dentry is on..
2785 */
2786/*
2787 * __d_move - move a dentry
2788 * @dentry: entry to move
2789 * @target: new dentry
2790 * @exchange: exchange the two dentries
2791 *
2792 * Update the dcache to reflect the move of a file name. Negative
2793 * dcache entries should not be moved in this way. Caller must hold
2794 * rename_lock, the i_mutex of the source and target directories,
2795 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2796 */
2797static void __d_move(struct dentry *dentry, struct dentry *target,
2798                     bool exchange)
2799{
2800        if (!dentry->d_inode)
2801                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2802
2803        BUG_ON(d_ancestor(dentry, target));
2804        BUG_ON(d_ancestor(target, dentry));
2805
2806        dentry_lock_for_move(dentry, target);
2807
2808        write_seqcount_begin(&dentry->d_seq);
2809        write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2810
2811        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2812
2813        /*
2814         * Move the dentry to the target hash queue. Don't bother checking
2815         * for the same hash queue because of how unlikely it is.
2816         */
2817        __d_hash_move(dentry, d_hash(target->d_parent, target->d_name.hash));
2818
2819        /*
2820         * Unhash the target (d_delete() is not usable here).  If exchanging
2821         * the two dentries, then rehash onto the other's hash queue.
2822         */
2823        if (exchange) {
2824                __d_hash_move(target,
2825                           d_hash(dentry->d_parent, dentry->d_name.hash));
2826        } else {
2827                __d_drop(target);
2828        }
2829
2830        list_del(&dentry->d_u.d_child);
2831        list_del(&target->d_u.d_child);
2832
2833        /* Switch the names.. */
2834        switch_names(dentry, target);
2835        swap(dentry->d_name.hash, target->d_name.hash);
2836
2837        /* ... and switch the parents */
2838        if (IS_ROOT(dentry)) {
2839                dentry->d_flags |= DCACHE_RCUACCESS;
2840                dentry->d_parent = target->d_parent;
2841                target->d_parent = target;
2842                INIT_LIST_HEAD(&target->d_u.d_child);
2843        } else {
2844                swap(dentry->d_parent, target->d_parent);
2845
2846                /* And add them back to the (new) parent lists */
2847                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2848        }
2849
2850        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2851
2852        write_seqcount_end(&target->d_seq);
2853        write_seqcount_end(&dentry->d_seq);
2854
2855        dentry_unlock_parents_for_move(dentry, target);
2856        if (exchange)
2857                fsnotify_update_flags(target);
2858        spin_unlock(&target->d_lock);
2859        fsnotify_update_flags(dentry);
2860        spin_unlock(&dentry->d_lock);
2861}
2862
2863/*
2864 * d_move - move a dentry
2865 * @dentry: entry to move
2866 * @target: new dentry
2867 *
2868 * Update the dcache to reflect the move of a file name. Negative
2869 * dcache entries should not be moved in this way. See the locking
2870 * requirements for __d_move.
2871 */
2872void d_move(struct dentry *dentry, struct dentry *target)
2873{
2874        write_seqlock(&rename_lock);
2875        __d_move(dentry, target, false);
2876        write_sequnlock(&rename_lock);
2877}
2878EXPORT_SYMBOL(d_move);
2879
2880/*
2881 * d_exchange - exchange two dentries
2882 * @dentry1: first dentry
2883 * @dentry2: second dentry
2884 */
2885void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
2886{
2887        write_seqlock(&rename_lock);
2888
2889        WARN_ON(!dentry1->d_inode);
2890        WARN_ON(!dentry2->d_inode);
2891        WARN_ON(IS_ROOT(dentry1));
2892        WARN_ON(IS_ROOT(dentry2));
2893
2894        __d_move(dentry1, dentry2, true);
2895
2896        write_sequnlock(&rename_lock);
2897}
2898
2899/**
2900 * d_ancestor - search for an ancestor
2901 * @p1: ancestor dentry
2902 * @p2: child dentry
2903 *
2904 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2905 * an ancestor of p2, else NULL.
2906 */
2907struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2908{
2909        struct dentry *p;
2910
2911        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2912                if (p->d_parent == p1)
2913                        return p;
2914        }
2915        return NULL;
2916}
2917
2918/*
2919 * This helper attempts to cope with remotely renamed directories
2920 *
2921 * It assumes that the caller is already holding
2922 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2923 *
2924 * Note: If ever the locking in lock_rename() changes, then please
2925 * remember to update this too...
2926 */
2927static struct dentry *__d_unalias(struct inode *inode,
2928                struct dentry *dentry, struct dentry *alias)
2929{
2930        struct mutex *m1 = NULL, *m2 = NULL;
2931        struct dentry *ret = ERR_PTR(-EBUSY);
2932
2933        /* If alias and dentry share a parent, then no extra locks required */
2934        if (alias->d_parent == dentry->d_parent)
2935                goto out_unalias;
2936
2937        /* See lock_rename() */
2938        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2939                goto out_err;
2940        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2941        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2942                goto out_err;
2943        m2 = &alias->d_parent->d_inode->i_mutex;
2944out_unalias:
2945        __d_move(alias, dentry, false);
2946        ret = alias;
2947out_err:
2948        spin_unlock(&inode->i_lock);
2949        if (m2)
2950                mutex_unlock(m2);
2951        if (m1)
2952                mutex_unlock(m1);
2953        return ret;
2954}
2955
2956/*
2957 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2958 * named dentry in place of the dentry to be replaced.
2959 * returns with anon->d_lock held!
2960 */
2961static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2962{
2963        struct dentry *dparent;
2964
2965        dentry_lock_for_move(anon, dentry);
2966
2967        write_seqcount_begin(&dentry->d_seq);
2968        write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED);
2969
2970        dparent = dentry->d_parent;
2971
2972        switch_names(dentry, anon);
2973        swap(dentry->d_name.hash, anon->d_name.hash);
2974
2975        dentry->d_parent = dentry;
2976        list_del_init(&dentry->d_u.d_child);
2977        anon->d_flags |= DCACHE_RCUACCESS;
2978        anon->d_parent = dparent;
2979        if (likely(!d_unhashed(anon))) {
2980                hlist_bl_lock(&anon->d_sb->s_anon);
2981                __hlist_bl_del(&anon->d_hash);
2982                anon->d_hash.pprev = NULL;
2983                hlist_bl_unlock(&anon->d_sb->s_anon);
2984        }
2985        list_move(&anon->d_u.d_child, &dparent->d_subdirs);
2986
2987        write_seqcount_end(&dentry->d_seq);
2988        write_seqcount_end(&anon->d_seq);
2989
2990        dentry_unlock_parents_for_move(anon, dentry);
2991        spin_unlock(&dentry->d_lock);
2992
2993        /* anon->d_lock still locked, returns locked */
2994}
2995
2996/**
2997 * d_splice_alias - splice a disconnected dentry into the tree if one exists
2998 * @inode:  the inode which may have a disconnected dentry
2999 * @dentry: a negative dentry which we want to point to the inode.
3000 *
3001 * If inode is a directory and has an IS_ROOT alias, then d_move that in
3002 * place of the given dentry and return it, else simply d_add the inode
3003 * to the dentry and return NULL.
3004 *
3005 * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
3006 * we should error out: directories can't have multiple aliases.
3007 *
3008 * This is needed in the lookup routine of any filesystem that is exportable
3009 * (via knfsd) so that we can build dcache paths to directories effectively.
3010 *
3011 * If a dentry was found and moved, then it is returned.  Otherwise NULL
3012 * is returned.  This matches the expected return value of ->lookup.
3013 *
3014 * Cluster filesystems may call this function with a negative, hashed dentry.
3015 * In that case, we know that the inode will be a regular file, and also this
3016 * will only occur during atomic_open. So we need to check for the dentry
3017 * being already hashed only in the final case.
3018 */
3019struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
3020{
3021        struct dentry *new = NULL;
3022
3023        if (IS_ERR(inode))
3024                return ERR_CAST(inode);
3025
3026        if (inode && S_ISDIR(inode->i_mode)) {
3027                spin_lock(&inode->i_lock);
3028                new = __d_find_any_alias(inode);
3029                if (new) {
3030                        if (!IS_ROOT(new)) {
3031                                spin_unlock(&inode->i_lock);
3032                                dput(new);
3033                                iput(inode);
3034                                return ERR_PTR(-EIO);
3035                        }
3036                        if (d_ancestor(new, dentry)) {
3037                                spin_unlock(&inode->i_lock);
3038                                dput(new);
3039                                iput(inode);
3040                                return ERR_PTR(-EIO);
3041                        }
3042                        write_seqlock(&rename_lock);
3043                        __d_materialise_dentry(dentry, new);
3044                        write_sequnlock(&rename_lock);
3045                        _d_rehash(new);
3046                        spin_unlock(&new->d_lock);
3047                        spin_unlock(&inode->i_lock);
3048                        security_d_instantiate(new, inode);
3049                        iput(inode);
3050                } else {
3051                        /* already taking inode->i_lock, so d_add() by hand */
3052                        __d_instantiate(dentry, inode);
3053                        spin_unlock(&inode->i_lock);
3054                        security_d_instantiate(dentry, inode);
3055                        d_rehash(dentry);
3056                }
3057        } else {
3058                d_instantiate(dentry, inode);
3059                if (d_unhashed(dentry))
3060                        d_rehash(dentry);
3061        }
3062        return new;
3063}
3064EXPORT_SYMBOL(d_splice_alias);
3065
3066/**
3067 * d_materialise_unique - introduce an inode into the tree
3068 * @dentry: candidate dentry
3069 * @inode: inode to bind to the dentry, to which aliases may be attached
3070 *
3071 * Introduces an dentry into the tree, substituting an extant disconnected
3072 * root directory alias in its place if there is one. Caller must hold the
3073 * i_mutex of the parent directory.
3074 */
3075struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
3076{
3077        struct dentry *actual;
3078
3079        BUG_ON(!d_unhashed(dentry));
3080
3081        if (!inode) {
3082                actual = dentry;
3083                __d_instantiate(dentry, NULL);
3084                d_rehash(actual);
3085                goto out_nolock;
3086        }
3087
3088        spin_lock(&inode->i_lock);
3089
3090        if (S_ISDIR(inode->i_mode)) {
3091                struct dentry *alias;
3092
3093                /* Does an aliased dentry already exist? */
3094                alias = __d_find_alias(inode, 0);
3095                if (alias) {
3096                        actual = alias;
3097                        write_seqlock(&rename_lock);
3098
3099                        if (d_ancestor(alias, dentry)) {
3100                                /* Check for loops */
3101                                actual = ERR_PTR(-ELOOP);
3102                                spin_unlock(&inode->i_lock);
3103                        } else if (IS_ROOT(alias)) {
3104                                /* Is this an anonymous mountpoint that we
3105                                 * could splice into our tree? */
3106                                __d_materialise_dentry(dentry, alias);
3107                                write_sequnlock(&rename_lock);
3108                                goto found;
3109                        } else {
3110                                /* Nope, but we must(!) avoid directory
3111                                 * aliasing. This drops inode->i_lock */
3112                                actual = __d_unalias(inode, dentry, alias);
3113                        }
3114                        write_sequnlock(&rename_lock);
3115                        if (IS_ERR(actual)) {
3116                                if (PTR_ERR(actual) == -ELOOP)
3117                                        pr_warn_ratelimited(
3118                                                "VFS: Lookup of '%s' in %s %s"
3119                                                " would have caused loop\n",
3120                                                dentry->d_name.name,
3121                                                inode->i_sb->s_type->name,
3122                                                inode->i_sb->s_id);
3123                                dput(alias);
3124                        }
3125                        goto out_nolock;
3126                }
3127        }
3128
3129        /* Add a unique reference */
3130        actual = __d_instantiate_unique(dentry, inode);
3131        if (!actual)
3132                actual = dentry;
3133        else
3134                BUG_ON(!d_unhashed(actual));
3135
3136        spin_lock(&actual->d_lock);
3137found:
3138        _d_rehash(actual);
3139        spin_unlock(&actual->d_lock);
3140        spin_unlock(&inode->i_lock);
3141out_nolock:
3142        if (actual == dentry) {
3143                security_d_instantiate(dentry, inode);
3144                return NULL;
3145        }
3146
3147        iput(inode);
3148        return actual;
3149}
3150EXPORT_SYMBOL_GPL(d_materialise_unique);
3151
3152static int prepend(char **buffer, int *buflen, const char *str, int namelen)
3153{
3154        *buflen -= namelen;
3155        if (*buflen < 0)
3156                return -ENAMETOOLONG;
3157        *buffer -= namelen;
3158        memcpy(*buffer, str, namelen);
3159        return 0;
3160}
3161
3162/**
3163 * prepend_name - prepend a pathname in front of current buffer pointer
3164 * @buffer: buffer pointer
3165 * @buflen: allocated length of the buffer
3166 * @name:   name string and length qstr structure
3167 *
3168 * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
3169 * make sure that either the old or the new name pointer and length are
3170 * fetched. However, there may be mismatch between length and pointer.
3171 * The length cannot be trusted, we need to copy it byte-by-byte until
3172 * the length is reached or a null byte is found. It also prepends "/" at
3173 * the beginning of the name. The sequence number check at the caller will
3174 * retry it again when a d_move() does happen. So any garbage in the buffer
3175 * due to mismatched pointer and length will be discarded.
3176 */
3177static int prepend_name(char **buffer, int *buflen, struct qstr *name)
3178{
3179        const char *dname = ACCESS_ONCE(name->name);
3180        u32 dlen = ACCESS_ONCE(name->len);
3181        char *p;
3182
3183        *buflen -= dlen + 1;
3184        if (*buflen < 0)
3185                return -ENAMETOOLONG;
3186        p = *buffer -= dlen + 1;
3187        *p++ = '/';
3188        while (dlen--) {
3189                char c = *dname++;
3190                if (!c)
3191                        break;
3192                *p++ = c;
3193        }
3194        return 0;
3195}
3196
3197/**
3198 * prepend_path - Prepend path string to a buffer
3199 * @path: the dentry/vfsmount to report
3200 * @root: root vfsmnt/dentry
3201 * @buffer: pointer to the end of the buffer
3202 * @buflen: pointer to buffer length
3203 *
3204 * The function will first try to write out the pathname without taking any
3205 * lock other than the RCU read lock to make sure that dentries won't go away.
3206 * It only checks the sequence number of the global rename_lock as any change
3207 * in the dentry's d_seq will be preceded by changes in the rename_lock
3208 * sequence number. If the sequence number had been changed, it will restart
3209 * the whole pathname back-tracing sequence again by taking the rename_lock.
3210 * In this case, there is no need to take the RCU read lock as the recursive
3211 * parent pointer references will keep the dentry chain alive as long as no
3212 * rename operation is performed.
3213 */
3214static int prepend_path(const struct path *path,
3215                        const struct path *root,
3216                        char **buffer, int *buflen)
3217{
3218        struct dentry *dentry;
3219        struct vfsmount *vfsmnt;
3220        struct mount *mnt;
3221        int error = 0;
3222        unsigned seq, m_seq = 0;
3223        char *bptr;
3224        int blen;
3225
3226        rcu_read_lock();
3227restart_mnt:
3228        read_seqbegin_or_lock(&mount_lock, &m_seq);
3229        seq = 0;
3230        rcu_read_lock();
3231restart:
3232        bptr = *buffer;
3233        blen = *buflen;
3234        error = 0;
3235        dentry = path->dentry;
3236        vfsmnt = path->mnt;
3237        mnt = real_mount(vfsmnt);
3238        read_seqbegin_or_lock(&rename_lock, &seq);
3239        while (dentry != root->dentry || vfsmnt != root->mnt) {
3240                struct dentry * parent;
3241
3242                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
3243                        struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
3244                        /* Escaped? */
3245                        if (dentry != vfsmnt->mnt_root) {
3246                                bptr = *buffer;
3247                                blen = *buflen;
3248                                error = 3;
3249                                break;
3250                        }
3251                        /* Global root? */
3252                        if (mnt != parent) {
3253                                dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
3254                                mnt = parent;
3255                                vfsmnt = &mnt->mnt;
3256                                continue;
3257                        }
3258                        if (!error)
3259                                error = is_mounted(vfsmnt) ? 1 : 2;
3260                        break;
3261                }
3262                parent = dentry->d_parent;
3263                prefetch(parent);
3264                error = prepend_name(&bptr, &blen, &dentry->d_name);
3265                if (error)
3266                        break;
3267
3268                dentry = parent;
3269        }
3270        if (!(seq & 1))
3271                rcu_read_unlock();
3272        if (need_seqretry(&rename_lock, seq)) {
3273                seq = 1;
3274                goto restart;
3275        }
3276        done_seqretry(&rename_lock, seq);
3277
3278        if (!(m_seq & 1))
3279                rcu_read_unlock();
3280        if (need_seqretry(&mount_lock, m_seq)) {
3281                m_seq = 1;
3282                goto restart_mnt;
3283        }
3284        done_seqretry(&mount_lock, m_seq);
3285
3286        if (error >= 0 && bptr == *buffer) {
3287                if (--blen < 0)
3288                        error = -ENAMETOOLONG;
3289                else
3290                        *--bptr = '/';
3291        }
3292        *buffer = bptr;
3293        *buflen = blen;
3294        return error;
3295}
3296
3297/**
3298 * __d_path - return the path of a dentry
3299 * @path: the dentry/vfsmount to report
3300 * @root: root vfsmnt/dentry
3301 * @buf: buffer to return value in
3302 * @buflen: buffer length
3303 *
3304 * Convert a dentry into an ASCII path name.
3305 *
3306 * Returns a pointer into the buffer or an error code if the
3307 * path was too long.
3308 *
3309 * "buflen" should be positive.
3310 *
3311 * If the path is not reachable from the supplied root, return %NULL.
3312 */
3313char *__d_path(const struct path *path,
3314               const struct path *root,
3315               char *buf, int buflen)
3316{
3317        char *res = buf + buflen;
3318        int error;
3319
3320        prepend(&res, &buflen, "\0", 1);
3321        error = prepend_path(path, root, &res, &buflen);
3322
3323        if (error < 0)
3324                return ERR_PTR(error);
3325        if (error > 0)
3326                return NULL;
3327        return res;
3328}
3329
3330char *d_absolute_path(const struct path *path,
3331               char *buf, int buflen)
3332{
3333        struct path root = {};
3334        char *res = buf + buflen;
3335        int error;
3336
3337        prepend(&res, &buflen, "\0", 1);
3338        error = prepend_path(path, &root, &res, &buflen);
3339
3340        if (error > 1)
3341                error = -EINVAL;
3342        if (error < 0)
3343                return ERR_PTR(error);
3344        return res;
3345}
3346
3347/*
3348 * same as __d_path but appends "(deleted)" for unlinked files.
3349 */
3350static int path_with_deleted(const struct path *path,
3351                             const struct path *root,
3352                             char **buf, int *buflen)
3353{
3354        prepend(buf, buflen, "\0", 1);
3355        if (d_unlinked(path->dentry)) {
3356                int error = prepend(buf, buflen, " (deleted)", 10);
3357                if (error)
3358                        return error;
3359        }
3360
3361        return prepend_path(path, root, buf, buflen);
3362}
3363
3364static int prepend_unreachable(char **buffer, int *buflen)
3365{
3366        return prepend(buffer, buflen, "(unreachable)", 13);
3367}
3368
3369static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
3370{
3371        unsigned seq;
3372
3373        do {
3374                seq = read_seqcount_begin(&fs->seq);
3375                *root = fs->root;
3376        } while (read_seqcount_retry(&fs->seq, seq));
3377}
3378
3379/**
3380 * d_path - return the path of a dentry
3381 * @path: path to report
3382 * @buf: buffer to return value in
3383 * @buflen: buffer length
3384 *
3385 * Convert a dentry into an ASCII path name. If the entry has been deleted
3386 * the string " (deleted)" is appended. Note that this is ambiguous.
3387 *
3388 * Returns a pointer into the buffer or an error code if the path was
3389 * too long. Note: Callers should use the returned pointer, not the passed
3390 * in buffer, to use the name! The implementation often starts at an offset
3391 * into the buffer, and may leave 0 bytes at the start.
3392 *
3393 * "buflen" should be positive.
3394 */
3395char *d_path(const struct path *path, char *buf, int buflen)
3396{
3397        char *res = buf + buflen;
3398        struct path root;
3399        int error;
3400
3401        /*
3402         * We have various synthetic filesystems that never get mounted.  On
3403         * these filesystems dentries are never used for lookup purposes, and
3404         * thus don't need to be hashed.  They also don't need a name until a
3405         * user wants to identify the object in /proc/pid/fd/.  The little hack
3406         * below allows us to generate a name for these objects on demand:
3407         *
3408         * Some pseudo inodes are mountable.  When they are mounted
3409         * path->dentry == path->mnt->mnt_root.  In that case don't call d_dname
3410         * and instead have d_path return the mounted path.
3411         */
3412        if (path->dentry->d_op && path->dentry->d_op->d_dname &&
3413            (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
3414                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
3415
3416        rcu_read_lock();
3417        get_fs_root_rcu(current->fs, &root);
3418        error = path_with_deleted(path, &root, &res, &buflen);
3419        rcu_read_unlock();
3420
3421        if (error < 0)
3422                res = ERR_PTR(error);
3423        return res;
3424}
3425EXPORT_SYMBOL(d_path);
3426
3427/*
3428 * Helper function for dentry_operations.d_dname() members
3429 */
3430char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
3431                        const char *fmt, ...)
3432{
3433        va_list args;
3434        char temp[64];
3435        int sz;
3436
3437        va_start(args, fmt);
3438        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
3439        va_end(args);
3440
3441        if (sz > sizeof(temp) || sz > buflen)
3442                return ERR_PTR(-ENAMETOOLONG);
3443
3444        buffer += buflen - sz;
3445        return memcpy(buffer, temp, sz);
3446}
3447
3448char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
3449{
3450        char *end = buffer + buflen;
3451        /* these dentries are never renamed, so d_lock is not needed */
3452        if (prepend(&end, &buflen, " (deleted)", 11) ||
3453            prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
3454            prepend(&end, &buflen, "/", 1))
3455                end = ERR_PTR(-ENAMETOOLONG);
3456        return end;
3457}
3458EXPORT_SYMBOL(simple_dname);
3459
3460/*
3461 * Write full pathname from the root of the filesystem into the buffer.
3462 */
3463static char *__dentry_path(struct dentry *d, char *buf, int buflen)
3464{
3465        struct dentry *dentry;
3466        char *end, *retval;
3467        int len, seq = 0;
3468        int error = 0;
3469
3470        if (buflen < 2)
3471                goto Elong;
3472
3473        rcu_read_lock();
3474restart:
3475        dentry = d;
3476        end = buf + buflen;
3477        len = buflen;
3478        prepend(&end, &len, "\0", 1);
3479        /* Get '/' right */
3480        retval = end-1;
3481        *retval = '/';
3482        read_seqbegin_or_lock(&rename_lock, &seq);
3483        while (!IS_ROOT(dentry)) {
3484                struct dentry *parent = dentry->d_parent;
3485                int error;
3486
3487                prefetch(parent);
3488                error = prepend_name(&end, &len, &dentry->d_name);
3489                if (error)
3490                        break;
3491
3492                retval = end;
3493                dentry = parent;
3494        }
3495        if (!(seq & 1))
3496                rcu_read_unlock();
3497        if (need_seqretry(&rename_lock, seq)) {
3498                seq = 1;
3499                goto restart;
3500        }
3501        done_seqretry(&rename_lock, seq);
3502        if (error)
3503                goto Elong;
3504        return retval;
3505Elong:
3506        return ERR_PTR(-ENAMETOOLONG);
3507}
3508
3509char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
3510{
3511        return __dentry_path(dentry, buf, buflen);
3512}
3513EXPORT_SYMBOL(dentry_path_raw);
3514
3515char *dentry_path(struct dentry *dentry, char *buf, int buflen)
3516{
3517        char *p = NULL;
3518        char *retval;
3519
3520        if (d_unlinked(dentry)) {
3521                p = buf + buflen;
3522                if (prepend(&p, &buflen, "//deleted", 10) != 0)
3523                        goto Elong;
3524                buflen++;
3525        }
3526        retval = __dentry_path(dentry, buf, buflen);
3527        if (!IS_ERR(retval) && p)
3528                *p = '/';       /* restore '/' overriden with '\0' */
3529        return retval;
3530Elong:
3531        return ERR_PTR(-ENAMETOOLONG);
3532}
3533
3534static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
3535                                    struct path *pwd)
3536{
3537        unsigned seq;
3538
3539        do {
3540                seq = read_seqcount_begin(&fs->seq);
3541                *root = fs->root;
3542                *pwd = fs->pwd;
3543        } while (read_seqcount_retry(&fs->seq, seq));
3544}
3545
3546/*
3547 * NOTE! The user-level library version returns a
3548 * character pointer. The kernel system call just
3549 * returns the length of the buffer filled (which
3550 * includes the ending '\0' character), or a negative
3551 * error value. So libc would do something like
3552 *
3553 *      char *getcwd(char * buf, size_t size)
3554 *      {
3555 *              int retval;
3556 *
3557 *              retval = sys_getcwd(buf, size);
3558 *              if (retval >= 0)
3559 *                      return buf;
3560 *              errno = -retval;
3561 *              return NULL;
3562 *      }
3563 */
3564SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3565{
3566        int error;
3567        struct path pwd, root;
3568        char *page = (char *) __get_free_page(GFP_USER);
3569
3570        if (!page)
3571                return -ENOMEM;
3572
3573        rcu_read_lock();
3574        get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
3575
3576        error = -ENOENT;
3577        if (!d_unlinked(pwd.dentry)) {
3578                unsigned long len;
3579                char *cwd = page + PAGE_SIZE;
3580                int buflen = PAGE_SIZE;
3581
3582                prepend(&cwd, &buflen, "\0", 1);
3583                error = prepend_path(&pwd, &root, &cwd, &buflen);
3584                rcu_read_unlock();
3585
3586                if (error < 0)
3587                        goto out;
3588
3589                /* Unreachable from current root */
3590                if (error > 0) {
3591                        error = prepend_unreachable(&cwd, &buflen);
3592                        if (error)
3593                                goto out;
3594                }
3595
3596                error = -ERANGE;
3597                len = PAGE_SIZE + page - cwd;
3598                if (len <= size) {
3599                        error = len;
3600                        if (copy_to_user(buf, cwd, len))
3601                                error = -EFAULT;
3602                }
3603        } else {
3604                rcu_read_unlock();
3605        }
3606
3607out:
3608        free_page((unsigned long) page);
3609        return error;
3610}
3611
3612/*
3613 * Test whether new_dentry is a subdirectory of old_dentry.
3614 *
3615 * Trivially implemented using the dcache structure
3616 */
3617
3618/**
3619 * is_subdir - is new dentry a subdirectory of old_dentry
3620 * @new_dentry: new dentry
3621 * @old_dentry: old dentry
3622 *
3623 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
3624 * Returns 0 otherwise.
3625 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
3626 */
3627  
3628int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
3629{
3630        int result;
3631        unsigned seq;
3632
3633        if (new_dentry == old_dentry)
3634                return 1;
3635
3636        do {
3637                /* for restarting inner loop in case of seq retry */
3638                seq = read_seqbegin(&rename_lock);
3639                /*
3640                 * Need rcu_readlock to protect against the d_parent trashing
3641                 * due to d_move
3642                 */
3643                rcu_read_lock();
3644                if (d_ancestor(old_dentry, new_dentry))
3645                        result = 1;
3646                else
3647                        result = 0;
3648                rcu_read_unlock();
3649        } while (read_seqretry(&rename_lock, seq));
3650
3651        return result;
3652}
3653EXPORT_SYMBOL(is_subdir);
3654
3655static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
3656{
3657        struct dentry *root = data;
3658        if (dentry != root) {
3659                if (d_unhashed(dentry) || !dentry->d_inode)
3660                        return D_WALK_SKIP;
3661
3662                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
3663                        dentry->d_flags |= DCACHE_GENOCIDE;
3664                        dentry->d_lockref.count--;
3665                }
3666        }
3667        return D_WALK_CONTINUE;
3668}
3669
3670void d_genocide(struct dentry *parent)
3671{
3672        d_walk(parent, parent, d_genocide_kill, NULL);
3673}
3674
3675void d_tmpfile(struct dentry *dentry, struct inode *inode)
3676{
3677        inode_dec_link_count(inode);
3678        BUG_ON(dentry->d_name.name != dentry->d_iname ||
3679                !hlist_unhashed(&dentry->d_alias) ||
3680                !d_unlinked(dentry));
3681        spin_lock(&dentry->d_parent->d_lock);
3682        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
3683        dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
3684                                (unsigned long long)inode->i_ino);
3685        spin_unlock(&dentry->d_lock);
3686        spin_unlock(&dentry->d_parent->d_lock);
3687        d_instantiate(dentry, inode);
3688}
3689EXPORT_SYMBOL(d_tmpfile);
3690
3691/**
3692 * find_inode_number - check for dentry with name
3693 * @dir: directory to check
3694 * @name: Name to find.
3695 *
3696 * Check whether a dentry already exists for the given name,
3697 * and return the inode number if it has an inode. Otherwise
3698 * 0 is returned.
3699 *
3700 * This routine is used to post-process directory listings for
3701 * filesystems using synthetic inode numbers, and is necessary
3702 * to keep getcwd() working.
3703 */
3704 
3705ino_t find_inode_number(struct dentry *dir, struct qstr *name)
3706{
3707        struct dentry * dentry;
3708        ino_t ino = 0;
3709
3710        dentry = d_hash_and_lookup(dir, name);
3711        if (!IS_ERR_OR_NULL(dentry)) {
3712                if (dentry->d_inode)
3713                        ino = dentry->d_inode->i_ino;
3714                dput(dentry);
3715        }
3716        return ino;
3717}
3718EXPORT_SYMBOL(find_inode_number);
3719
3720static __initdata unsigned long dhash_entries;
3721static int __init set_dhash_entries(char *str)
3722{
3723        if (!str)
3724                return 0;
3725        dhash_entries = simple_strtoul(str, &str, 0);
3726        return 1;
3727}
3728__setup("dhash_entries=", set_dhash_entries);
3729
3730static void __init dcache_init_early(void)
3731{
3732        unsigned int loop;
3733
3734        /* If hashes are distributed across NUMA nodes, defer
3735         * hash allocation until vmalloc space is available.
3736         */
3737        if (hashdist)
3738                return;
3739
3740        dentry_hashtable =
3741                alloc_large_system_hash("Dentry cache",
3742                                        sizeof(struct hlist_bl_head),
3743                                        dhash_entries,
3744                                        13,
3745                                        HASH_EARLY,
3746                                        &d_hash_shift,
3747                                        &d_hash_mask,
3748                                        0,
3749                                        0);
3750
3751        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3752                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3753}
3754
3755static void __init dcache_init(void)
3756{
3757        unsigned int loop;
3758
3759        /* 
3760         * A constructor could be added for stable state like the lists,
3761         * but it is probably not worth it because of the cache nature
3762         * of the dcache. 
3763         */
3764        dentry_cache = KMEM_CACHE(dentry,
3765                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3766
3767        /* Hash may have been set up in dcache_init_early */
3768        if (!hashdist)
3769                return;
3770
3771        dentry_hashtable =
3772                alloc_large_system_hash("Dentry cache",
3773                                        sizeof(struct hlist_bl_head),
3774                                        dhash_entries,
3775                                        13,
3776                                        0,
3777                                        &d_hash_shift,
3778                                        &d_hash_mask,
3779                                        0,
3780                                        0);
3781
3782        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3783                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3784}
3785
3786/* SLAB cache for __getname() consumers */
3787struct kmem_cache *names_cachep __read_mostly;
3788EXPORT_SYMBOL(names_cachep);
3789
3790EXPORT_SYMBOL(d_genocide);
3791
3792void __init vfs_caches_init_early(void)
3793{
3794        dcache_init_early();
3795        inode_init_early();
3796}
3797
3798void __init vfs_caches_init(void)
3799{
3800        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3801                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3802
3803        dcache_init();
3804        inode_init();
3805        files_init();
3806        files_maxfiles_init();
3807        mnt_init();
3808        bdev_cache_init();
3809        chrdev_init();
3810}
3811