linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/export.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include <linux/prefetch.h>
  39#include <linux/ratelimit.h>
  40#include "internal.h"
  41#include "mount.h"
  42
  43/*
  44 * Usage:
  45 * dcache->d_inode->i_lock protects:
  46 *   - i_dentry, d_alias, d_inode of aliases
  47 * dcache_hash_bucket lock protects:
  48 *   - the dcache hash table
  49 * s_anon bl list spinlock protects:
  50 *   - the s_anon list (see __d_drop)
  51 * dcache_lru_lock protects:
  52 *   - the dcache lru lists and counters
  53 * d_lock protects:
  54 *   - d_flags
  55 *   - d_name
  56 *   - d_lru
  57 *   - d_count
  58 *   - d_unhashed()
  59 *   - d_parent and d_subdirs
  60 *   - childrens' d_child and d_parent
  61 *   - d_alias, d_inode
  62 *
  63 * Ordering:
  64 * dentry->d_inode->i_lock
  65 *   dentry->d_lock
  66 *     dcache_lru_lock
  67 *     dcache_hash_bucket lock
  68 *     s_anon lock
  69 *
  70 * If there is an ancestor relationship:
  71 * dentry->d_parent->...->d_parent->d_lock
  72 *   ...
  73 *     dentry->d_parent->d_lock
  74 *       dentry->d_lock
  75 *
  76 * If no ancestor relationship:
  77 * if (dentry1 < dentry2)
  78 *   dentry1->d_lock
  79 *     dentry2->d_lock
  80 */
  81int sysctl_vfs_cache_pressure __read_mostly = 100;
  82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  83
  84static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  86
  87EXPORT_SYMBOL(rename_lock);
  88
  89static struct kmem_cache *dentry_cache __read_mostly;
  90
  91/*
  92 * This is the single most critical data structure when it comes
  93 * to the dcache: the hashtable for lookups. Somebody should try
  94 * to make this good - I've just made it work.
  95 *
  96 * This hash-function tries to avoid losing too many bits of hash
  97 * information, yet avoid using a prime hash-size or similar.
  98 */
  99#define D_HASHBITS     d_hash_shift
 100#define D_HASHMASK     d_hash_mask
 101
 102static unsigned int d_hash_mask __read_mostly;
 103static unsigned int d_hash_shift __read_mostly;
 104
 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
 106
 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 108                                        unsigned int hash)
 109{
 110        hash += (unsigned long) parent / L1_CACHE_BYTES;
 111        hash = hash + (hash >> D_HASHBITS);
 112        return dentry_hashtable + (hash & D_HASHMASK);
 113}
 114
 115/* Statistics gathering. */
 116struct dentry_stat_t dentry_stat = {
 117        .age_limit = 45,
 118};
 119
 120static DEFINE_PER_CPU(unsigned int, nr_dentry);
 121
 122#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 123static int get_nr_dentry(void)
 124{
 125        int i;
 126        int sum = 0;
 127        for_each_possible_cpu(i)
 128                sum += per_cpu(nr_dentry, i);
 129        return sum < 0 ? 0 : sum;
 130}
 131
 132int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 133                   size_t *lenp, loff_t *ppos)
 134{
 135        dentry_stat.nr_dentry = get_nr_dentry();
 136        return proc_dointvec(table, write, buffer, lenp, ppos);
 137}
 138#endif
 139
 140/*
 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 142 * The strings are both count bytes long, and count is non-zero.
 143 */
 144#ifdef CONFIG_DCACHE_WORD_ACCESS
 145
 146#include <asm/word-at-a-time.h>
 147/*
 148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 149 * aligned allocation for this particular component. We don't
 150 * strictly need the load_unaligned_zeropad() safety, but it
 151 * doesn't hurt either.
 152 *
 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 154 * need the careful unaligned handling.
 155 */
 156static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 157{
 158        unsigned long a,b,mask;
 159
 160        for (;;) {
 161                a = *(unsigned long *)cs;
 162                b = load_unaligned_zeropad(ct);
 163                if (tcount < sizeof(unsigned long))
 164                        break;
 165                if (unlikely(a != b))
 166                        return 1;
 167                cs += sizeof(unsigned long);
 168                ct += sizeof(unsigned long);
 169                tcount -= sizeof(unsigned long);
 170                if (!tcount)
 171                        return 0;
 172        }
 173        mask = ~(~0ul << tcount*8);
 174        return unlikely(!!((a ^ b) & mask));
 175}
 176
 177#else
 178
 179static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 180{
 181        do {
 182                if (*cs != *ct)
 183                        return 1;
 184                cs++;
 185                ct++;
 186                tcount--;
 187        } while (tcount);
 188        return 0;
 189}
 190
 191#endif
 192
 193static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 194{
 195        const unsigned char *cs;
 196        /*
 197         * Be careful about RCU walk racing with rename:
 198         * use ACCESS_ONCE to fetch the name pointer.
 199         *
 200         * NOTE! Even if a rename will mean that the length
 201         * was not loaded atomically, we don't care. The
 202         * RCU walk will check the sequence count eventually,
 203         * and catch it. And we won't overrun the buffer,
 204         * because we're reading the name pointer atomically,
 205         * and a dentry name is guaranteed to be properly
 206         * terminated with a NUL byte.
 207         *
 208         * End result: even if 'len' is wrong, we'll exit
 209         * early because the data cannot match (there can
 210         * be no NUL in the ct/tcount data)
 211         */
 212        cs = ACCESS_ONCE(dentry->d_name.name);
 213        smp_read_barrier_depends();
 214        return dentry_string_cmp(cs, ct, tcount);
 215}
 216
 217static void __d_free(struct rcu_head *head)
 218{
 219        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 220
 221        WARN_ON(!hlist_unhashed(&dentry->d_alias));
 222        if (dname_external(dentry))
 223                kfree(dentry->d_name.name);
 224        kmem_cache_free(dentry_cache, dentry); 
 225}
 226
 227/*
 228 * no locks, please.
 229 */
 230static void d_free(struct dentry *dentry)
 231{
 232        BUG_ON(dentry->d_count);
 233        this_cpu_dec(nr_dentry);
 234        if (dentry->d_op && dentry->d_op->d_release)
 235                dentry->d_op->d_release(dentry);
 236
 237        /* if dentry was never visible to RCU, immediate free is OK */
 238        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 239                __d_free(&dentry->d_u.d_rcu);
 240        else
 241                call_rcu(&dentry->d_u.d_rcu, __d_free);
 242}
 243
 244/**
 245 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 246 * @dentry: the target dentry
 247 * After this call, in-progress rcu-walk path lookup will fail. This
 248 * should be called after unhashing, and after changing d_inode (if
 249 * the dentry has not already been unhashed).
 250 */
 251static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 252{
 253        assert_spin_locked(&dentry->d_lock);
 254        /* Go through a barrier */
 255        write_seqcount_barrier(&dentry->d_seq);
 256}
 257
 258/*
 259 * Release the dentry's inode, using the filesystem
 260 * d_iput() operation if defined. Dentry has no refcount
 261 * and is unhashed.
 262 */
 263static void dentry_iput(struct dentry * dentry)
 264        __releases(dentry->d_lock)
 265        __releases(dentry->d_inode->i_lock)
 266{
 267        struct inode *inode = dentry->d_inode;
 268        if (inode) {
 269                dentry->d_inode = NULL;
 270                hlist_del_init(&dentry->d_alias);
 271                spin_unlock(&dentry->d_lock);
 272                spin_unlock(&inode->i_lock);
 273                if (!inode->i_nlink)
 274                        fsnotify_inoderemove(inode);
 275                if (dentry->d_op && dentry->d_op->d_iput)
 276                        dentry->d_op->d_iput(dentry, inode);
 277                else
 278                        iput(inode);
 279        } else {
 280                spin_unlock(&dentry->d_lock);
 281        }
 282}
 283
 284/*
 285 * Release the dentry's inode, using the filesystem
 286 * d_iput() operation if defined. dentry remains in-use.
 287 */
 288static void dentry_unlink_inode(struct dentry * dentry)
 289        __releases(dentry->d_lock)
 290        __releases(dentry->d_inode->i_lock)
 291{
 292        struct inode *inode = dentry->d_inode;
 293        dentry->d_inode = NULL;
 294        hlist_del_init(&dentry->d_alias);
 295        dentry_rcuwalk_barrier(dentry);
 296        spin_unlock(&dentry->d_lock);
 297        spin_unlock(&inode->i_lock);
 298        if (!inode->i_nlink)
 299                fsnotify_inoderemove(inode);
 300        if (dentry->d_op && dentry->d_op->d_iput)
 301                dentry->d_op->d_iput(dentry, inode);
 302        else
 303                iput(inode);
 304}
 305
 306/*
 307 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 308 */
 309static void dentry_lru_add(struct dentry *dentry)
 310{
 311        if (list_empty(&dentry->d_lru)) {
 312                spin_lock(&dcache_lru_lock);
 313                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 314                dentry->d_sb->s_nr_dentry_unused++;
 315                dentry_stat.nr_unused++;
 316                spin_unlock(&dcache_lru_lock);
 317        }
 318}
 319
 320static void __dentry_lru_del(struct dentry *dentry)
 321{
 322        list_del_init(&dentry->d_lru);
 323        dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 324        dentry->d_sb->s_nr_dentry_unused--;
 325        dentry_stat.nr_unused--;
 326}
 327
 328/*
 329 * Remove a dentry with references from the LRU.
 330 */
 331static void dentry_lru_del(struct dentry *dentry)
 332{
 333        if (!list_empty(&dentry->d_lru)) {
 334                spin_lock(&dcache_lru_lock);
 335                __dentry_lru_del(dentry);
 336                spin_unlock(&dcache_lru_lock);
 337        }
 338}
 339
 340static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 341{
 342        spin_lock(&dcache_lru_lock);
 343        if (list_empty(&dentry->d_lru)) {
 344                list_add_tail(&dentry->d_lru, list);
 345                dentry->d_sb->s_nr_dentry_unused++;
 346                dentry_stat.nr_unused++;
 347        } else {
 348                list_move_tail(&dentry->d_lru, list);
 349        }
 350        spin_unlock(&dcache_lru_lock);
 351}
 352
 353/**
 354 * d_kill - kill dentry and return parent
 355 * @dentry: dentry to kill
 356 * @parent: parent dentry
 357 *
 358 * The dentry must already be unhashed and removed from the LRU.
 359 *
 360 * If this is the root of the dentry tree, return NULL.
 361 *
 362 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 363 * d_kill.
 364 */
 365static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 366        __releases(dentry->d_lock)
 367        __releases(parent->d_lock)
 368        __releases(dentry->d_inode->i_lock)
 369{
 370        list_del(&dentry->d_u.d_child);
 371        /*
 372         * Inform try_to_ascend() that we are no longer attached to the
 373         * dentry tree
 374         */
 375        dentry->d_flags |= DCACHE_DENTRY_KILLED;
 376        if (parent)
 377                spin_unlock(&parent->d_lock);
 378        dentry_iput(dentry);
 379        /*
 380         * dentry_iput drops the locks, at which point nobody (except
 381         * transient RCU lookups) can reach this dentry.
 382         */
 383        d_free(dentry);
 384        return parent;
 385}
 386
 387/*
 388 * Unhash a dentry without inserting an RCU walk barrier or checking that
 389 * dentry->d_lock is locked.  The caller must take care of that, if
 390 * appropriate.
 391 */
 392static void __d_shrink(struct dentry *dentry)
 393{
 394        if (!d_unhashed(dentry)) {
 395                struct hlist_bl_head *b;
 396                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 397                        b = &dentry->d_sb->s_anon;
 398                else
 399                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 400
 401                hlist_bl_lock(b);
 402                __hlist_bl_del(&dentry->d_hash);
 403                dentry->d_hash.pprev = NULL;
 404                hlist_bl_unlock(b);
 405        }
 406}
 407
 408/**
 409 * d_drop - drop a dentry
 410 * @dentry: dentry to drop
 411 *
 412 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 413 * be found through a VFS lookup any more. Note that this is different from
 414 * deleting the dentry - d_delete will try to mark the dentry negative if
 415 * possible, giving a successful _negative_ lookup, while d_drop will
 416 * just make the cache lookup fail.
 417 *
 418 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 419 * reason (NFS timeouts or autofs deletes).
 420 *
 421 * __d_drop requires dentry->d_lock.
 422 */
 423void __d_drop(struct dentry *dentry)
 424{
 425        if (!d_unhashed(dentry)) {
 426                __d_shrink(dentry);
 427                dentry_rcuwalk_barrier(dentry);
 428        }
 429}
 430EXPORT_SYMBOL(__d_drop);
 431
 432void d_drop(struct dentry *dentry)
 433{
 434        spin_lock(&dentry->d_lock);
 435        __d_drop(dentry);
 436        spin_unlock(&dentry->d_lock);
 437}
 438EXPORT_SYMBOL(d_drop);
 439
 440/*
 441 * Finish off a dentry we've decided to kill.
 442 * dentry->d_lock must be held, returns with it unlocked.
 443 * If ref is non-zero, then decrement the refcount too.
 444 * Returns dentry requiring refcount drop, or NULL if we're done.
 445 */
 446static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 447        __releases(dentry->d_lock)
 448{
 449        struct inode *inode;
 450        struct dentry *parent;
 451
 452        inode = dentry->d_inode;
 453        if (inode && !spin_trylock(&inode->i_lock)) {
 454relock:
 455                spin_unlock(&dentry->d_lock);
 456                cpu_relax();
 457                return dentry; /* try again with same dentry */
 458        }
 459        if (IS_ROOT(dentry))
 460                parent = NULL;
 461        else
 462                parent = dentry->d_parent;
 463        if (parent && !spin_trylock(&parent->d_lock)) {
 464                if (inode)
 465                        spin_unlock(&inode->i_lock);
 466                goto relock;
 467        }
 468
 469        if (ref)
 470                dentry->d_count--;
 471        /*
 472         * inform the fs via d_prune that this dentry is about to be
 473         * unhashed and destroyed.
 474         */
 475        if (dentry->d_flags & DCACHE_OP_PRUNE)
 476                dentry->d_op->d_prune(dentry);
 477
 478        dentry_lru_del(dentry);
 479        /* if it was on the hash then remove it */
 480        __d_drop(dentry);
 481        return d_kill(dentry, parent);
 482}
 483
 484/* 
 485 * This is dput
 486 *
 487 * This is complicated by the fact that we do not want to put
 488 * dentries that are no longer on any hash chain on the unused
 489 * list: we'd much rather just get rid of them immediately.
 490 *
 491 * However, that implies that we have to traverse the dentry
 492 * tree upwards to the parents which might _also_ now be
 493 * scheduled for deletion (it may have been only waiting for
 494 * its last child to go away).
 495 *
 496 * This tail recursion is done by hand as we don't want to depend
 497 * on the compiler to always get this right (gcc generally doesn't).
 498 * Real recursion would eat up our stack space.
 499 */
 500
 501/*
 502 * dput - release a dentry
 503 * @dentry: dentry to release 
 504 *
 505 * Release a dentry. This will drop the usage count and if appropriate
 506 * call the dentry unlink method as well as removing it from the queues and
 507 * releasing its resources. If the parent dentries were scheduled for release
 508 * they too may now get deleted.
 509 */
 510void dput(struct dentry *dentry)
 511{
 512        if (!dentry)
 513                return;
 514
 515repeat:
 516        if (dentry->d_count == 1)
 517                might_sleep();
 518        spin_lock(&dentry->d_lock);
 519        BUG_ON(!dentry->d_count);
 520        if (dentry->d_count > 1) {
 521                dentry->d_count--;
 522                spin_unlock(&dentry->d_lock);
 523                return;
 524        }
 525
 526        if (dentry->d_flags & DCACHE_OP_DELETE) {
 527                if (dentry->d_op->d_delete(dentry))
 528                        goto kill_it;
 529        }
 530
 531        /* Unreachable? Get rid of it */
 532        if (d_unhashed(dentry))
 533                goto kill_it;
 534
 535        dentry->d_flags |= DCACHE_REFERENCED;
 536        dentry_lru_add(dentry);
 537
 538        dentry->d_count--;
 539        spin_unlock(&dentry->d_lock);
 540        return;
 541
 542kill_it:
 543        dentry = dentry_kill(dentry, 1);
 544        if (dentry)
 545                goto repeat;
 546}
 547EXPORT_SYMBOL(dput);
 548
 549/**
 550 * d_invalidate - invalidate a dentry
 551 * @dentry: dentry to invalidate
 552 *
 553 * Try to invalidate the dentry if it turns out to be
 554 * possible. If there are other dentries that can be
 555 * reached through this one we can't delete it and we
 556 * return -EBUSY. On success we return 0.
 557 *
 558 * no dcache lock.
 559 */
 560 
 561int d_invalidate(struct dentry * dentry)
 562{
 563        /*
 564         * If it's already been dropped, return OK.
 565         */
 566        spin_lock(&dentry->d_lock);
 567        if (d_unhashed(dentry)) {
 568                spin_unlock(&dentry->d_lock);
 569                return 0;
 570        }
 571        /*
 572         * Check whether to do a partial shrink_dcache
 573         * to get rid of unused child entries.
 574         */
 575        if (!list_empty(&dentry->d_subdirs)) {
 576                spin_unlock(&dentry->d_lock);
 577                shrink_dcache_parent(dentry);
 578                spin_lock(&dentry->d_lock);
 579        }
 580
 581        /*
 582         * Somebody else still using it?
 583         *
 584         * If it's a directory, we can't drop it
 585         * for fear of somebody re-populating it
 586         * with children (even though dropping it
 587         * would make it unreachable from the root,
 588         * we might still populate it if it was a
 589         * working directory or similar).
 590         * We also need to leave mountpoints alone,
 591         * directory or not.
 592         */
 593        if (dentry->d_count > 1 && dentry->d_inode) {
 594                if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
 595                        spin_unlock(&dentry->d_lock);
 596                        return -EBUSY;
 597                }
 598        }
 599
 600        __d_drop(dentry);
 601        spin_unlock(&dentry->d_lock);
 602        return 0;
 603}
 604EXPORT_SYMBOL(d_invalidate);
 605
 606/* This must be called with d_lock held */
 607static inline void __dget_dlock(struct dentry *dentry)
 608{
 609        dentry->d_count++;
 610}
 611
 612static inline void __dget(struct dentry *dentry)
 613{
 614        spin_lock(&dentry->d_lock);
 615        __dget_dlock(dentry);
 616        spin_unlock(&dentry->d_lock);
 617}
 618
 619struct dentry *dget_parent(struct dentry *dentry)
 620{
 621        struct dentry *ret;
 622
 623repeat:
 624        /*
 625         * Don't need rcu_dereference because we re-check it was correct under
 626         * the lock.
 627         */
 628        rcu_read_lock();
 629        ret = dentry->d_parent;
 630        spin_lock(&ret->d_lock);
 631        if (unlikely(ret != dentry->d_parent)) {
 632                spin_unlock(&ret->d_lock);
 633                rcu_read_unlock();
 634                goto repeat;
 635        }
 636        rcu_read_unlock();
 637        BUG_ON(!ret->d_count);
 638        ret->d_count++;
 639        spin_unlock(&ret->d_lock);
 640        return ret;
 641}
 642EXPORT_SYMBOL(dget_parent);
 643
 644/**
 645 * d_find_alias - grab a hashed alias of inode
 646 * @inode: inode in question
 647 * @want_discon:  flag, used by d_splice_alias, to request
 648 *          that only a DISCONNECTED alias be returned.
 649 *
 650 * If inode has a hashed alias, or is a directory and has any alias,
 651 * acquire the reference to alias and return it. Otherwise return NULL.
 652 * Notice that if inode is a directory there can be only one alias and
 653 * it can be unhashed only if it has no children, or if it is the root
 654 * of a filesystem.
 655 *
 656 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 657 * any other hashed alias over that one unless @want_discon is set,
 658 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 659 */
 660static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 661{
 662        struct dentry *alias, *discon_alias;
 663
 664again:
 665        discon_alias = NULL;
 666        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 667                spin_lock(&alias->d_lock);
 668                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 669                        if (IS_ROOT(alias) &&
 670                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 671                                discon_alias = alias;
 672                        } else if (!want_discon) {
 673                                __dget_dlock(alias);
 674                                spin_unlock(&alias->d_lock);
 675                                return alias;
 676                        }
 677                }
 678                spin_unlock(&alias->d_lock);
 679        }
 680        if (discon_alias) {
 681                alias = discon_alias;
 682                spin_lock(&alias->d_lock);
 683                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 684                        if (IS_ROOT(alias) &&
 685                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 686                                __dget_dlock(alias);
 687                                spin_unlock(&alias->d_lock);
 688                                return alias;
 689                        }
 690                }
 691                spin_unlock(&alias->d_lock);
 692                goto again;
 693        }
 694        return NULL;
 695}
 696
 697struct dentry *d_find_alias(struct inode *inode)
 698{
 699        struct dentry *de = NULL;
 700
 701        if (!hlist_empty(&inode->i_dentry)) {
 702                spin_lock(&inode->i_lock);
 703                de = __d_find_alias(inode, 0);
 704                spin_unlock(&inode->i_lock);
 705        }
 706        return de;
 707}
 708EXPORT_SYMBOL(d_find_alias);
 709
 710/*
 711 *      Try to kill dentries associated with this inode.
 712 * WARNING: you must own a reference to inode.
 713 */
 714void d_prune_aliases(struct inode *inode)
 715{
 716        struct dentry *dentry;
 717restart:
 718        spin_lock(&inode->i_lock);
 719        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 720                spin_lock(&dentry->d_lock);
 721                if (!dentry->d_count) {
 722                        __dget_dlock(dentry);
 723                        __d_drop(dentry);
 724                        spin_unlock(&dentry->d_lock);
 725                        spin_unlock(&inode->i_lock);
 726                        dput(dentry);
 727                        goto restart;
 728                }
 729                spin_unlock(&dentry->d_lock);
 730        }
 731        spin_unlock(&inode->i_lock);
 732}
 733EXPORT_SYMBOL(d_prune_aliases);
 734
 735/*
 736 * Try to throw away a dentry - free the inode, dput the parent.
 737 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 738 * Releases dentry->d_lock.
 739 *
 740 * This may fail if locks cannot be acquired no problem, just try again.
 741 */
 742static void try_prune_one_dentry(struct dentry *dentry)
 743        __releases(dentry->d_lock)
 744{
 745        struct dentry *parent;
 746
 747        parent = dentry_kill(dentry, 0);
 748        /*
 749         * If dentry_kill returns NULL, we have nothing more to do.
 750         * if it returns the same dentry, trylocks failed. In either
 751         * case, just loop again.
 752         *
 753         * Otherwise, we need to prune ancestors too. This is necessary
 754         * to prevent quadratic behavior of shrink_dcache_parent(), but
 755         * is also expected to be beneficial in reducing dentry cache
 756         * fragmentation.
 757         */
 758        if (!parent)
 759                return;
 760        if (parent == dentry)
 761                return;
 762
 763        /* Prune ancestors. */
 764        dentry = parent;
 765        while (dentry) {
 766                spin_lock(&dentry->d_lock);
 767                if (dentry->d_count > 1) {
 768                        dentry->d_count--;
 769                        spin_unlock(&dentry->d_lock);
 770                        return;
 771                }
 772                dentry = dentry_kill(dentry, 1);
 773        }
 774}
 775
 776static void shrink_dentry_list(struct list_head *list)
 777{
 778        struct dentry *dentry;
 779
 780        rcu_read_lock();
 781        for (;;) {
 782                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 783                if (&dentry->d_lru == list)
 784                        break; /* empty */
 785                spin_lock(&dentry->d_lock);
 786                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 787                        spin_unlock(&dentry->d_lock);
 788                        continue;
 789                }
 790
 791                /*
 792                 * We found an inuse dentry which was not removed from
 793                 * the LRU because of laziness during lookup.  Do not free
 794                 * it - just keep it off the LRU list.
 795                 */
 796                if (dentry->d_count) {
 797                        dentry_lru_del(dentry);
 798                        spin_unlock(&dentry->d_lock);
 799                        continue;
 800                }
 801
 802                rcu_read_unlock();
 803
 804                try_prune_one_dentry(dentry);
 805
 806                rcu_read_lock();
 807        }
 808        rcu_read_unlock();
 809}
 810
 811/**
 812 * prune_dcache_sb - shrink the dcache
 813 * @sb: superblock
 814 * @count: number of entries to try to free
 815 *
 816 * Attempt to shrink the superblock dcache LRU by @count entries. This is
 817 * done when we need more memory an called from the superblock shrinker
 818 * function.
 819 *
 820 * This function may fail to free any resources if all the dentries are in
 821 * use.
 822 */
 823void prune_dcache_sb(struct super_block *sb, int count)
 824{
 825        struct dentry *dentry;
 826        LIST_HEAD(referenced);
 827        LIST_HEAD(tmp);
 828
 829relock:
 830        spin_lock(&dcache_lru_lock);
 831        while (!list_empty(&sb->s_dentry_lru)) {
 832                dentry = list_entry(sb->s_dentry_lru.prev,
 833                                struct dentry, d_lru);
 834                BUG_ON(dentry->d_sb != sb);
 835
 836                if (!spin_trylock(&dentry->d_lock)) {
 837                        spin_unlock(&dcache_lru_lock);
 838                        cpu_relax();
 839                        goto relock;
 840                }
 841
 842                if (dentry->d_flags & DCACHE_REFERENCED) {
 843                        dentry->d_flags &= ~DCACHE_REFERENCED;
 844                        list_move(&dentry->d_lru, &referenced);
 845                        spin_unlock(&dentry->d_lock);
 846                } else {
 847                        list_move_tail(&dentry->d_lru, &tmp);
 848                        dentry->d_flags |= DCACHE_SHRINK_LIST;
 849                        spin_unlock(&dentry->d_lock);
 850                        if (!--count)
 851                                break;
 852                }
 853                cond_resched_lock(&dcache_lru_lock);
 854        }
 855        if (!list_empty(&referenced))
 856                list_splice(&referenced, &sb->s_dentry_lru);
 857        spin_unlock(&dcache_lru_lock);
 858
 859        shrink_dentry_list(&tmp);
 860}
 861
 862/**
 863 * shrink_dcache_sb - shrink dcache for a superblock
 864 * @sb: superblock
 865 *
 866 * Shrink the dcache for the specified super block. This is used to free
 867 * the dcache before unmounting a file system.
 868 */
 869void shrink_dcache_sb(struct super_block *sb)
 870{
 871        LIST_HEAD(tmp);
 872
 873        spin_lock(&dcache_lru_lock);
 874        while (!list_empty(&sb->s_dentry_lru)) {
 875                list_splice_init(&sb->s_dentry_lru, &tmp);
 876                spin_unlock(&dcache_lru_lock);
 877                shrink_dentry_list(&tmp);
 878                spin_lock(&dcache_lru_lock);
 879        }
 880        spin_unlock(&dcache_lru_lock);
 881}
 882EXPORT_SYMBOL(shrink_dcache_sb);
 883
 884/*
 885 * destroy a single subtree of dentries for unmount
 886 * - see the comments on shrink_dcache_for_umount() for a description of the
 887 *   locking
 888 */
 889static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 890{
 891        struct dentry *parent;
 892
 893        BUG_ON(!IS_ROOT(dentry));
 894
 895        for (;;) {
 896                /* descend to the first leaf in the current subtree */
 897                while (!list_empty(&dentry->d_subdirs))
 898                        dentry = list_entry(dentry->d_subdirs.next,
 899                                            struct dentry, d_u.d_child);
 900
 901                /* consume the dentries from this leaf up through its parents
 902                 * until we find one with children or run out altogether */
 903                do {
 904                        struct inode *inode;
 905
 906                        /*
 907                         * inform the fs that this dentry is about to be
 908                         * unhashed and destroyed.
 909                         */
 910                        if (dentry->d_flags & DCACHE_OP_PRUNE)
 911                                dentry->d_op->d_prune(dentry);
 912
 913                        dentry_lru_del(dentry);
 914                        __d_shrink(dentry);
 915
 916                        if (dentry->d_count != 0) {
 917                                printk(KERN_ERR
 918                                       "BUG: Dentry %p{i=%lx,n=%s}"
 919                                       " still in use (%d)"
 920                                       " [unmount of %s %s]\n",
 921                                       dentry,
 922                                       dentry->d_inode ?
 923                                       dentry->d_inode->i_ino : 0UL,
 924                                       dentry->d_name.name,
 925                                       dentry->d_count,
 926                                       dentry->d_sb->s_type->name,
 927                                       dentry->d_sb->s_id);
 928                                BUG();
 929                        }
 930
 931                        if (IS_ROOT(dentry)) {
 932                                parent = NULL;
 933                                list_del(&dentry->d_u.d_child);
 934                        } else {
 935                                parent = dentry->d_parent;
 936                                parent->d_count--;
 937                                list_del(&dentry->d_u.d_child);
 938                        }
 939
 940                        inode = dentry->d_inode;
 941                        if (inode) {
 942                                dentry->d_inode = NULL;
 943                                hlist_del_init(&dentry->d_alias);
 944                                if (dentry->d_op && dentry->d_op->d_iput)
 945                                        dentry->d_op->d_iput(dentry, inode);
 946                                else
 947                                        iput(inode);
 948                        }
 949
 950                        d_free(dentry);
 951
 952                        /* finished when we fall off the top of the tree,
 953                         * otherwise we ascend to the parent and move to the
 954                         * next sibling if there is one */
 955                        if (!parent)
 956                                return;
 957                        dentry = parent;
 958                } while (list_empty(&dentry->d_subdirs));
 959
 960                dentry = list_entry(dentry->d_subdirs.next,
 961                                    struct dentry, d_u.d_child);
 962        }
 963}
 964
 965/*
 966 * destroy the dentries attached to a superblock on unmounting
 967 * - we don't need to use dentry->d_lock because:
 968 *   - the superblock is detached from all mountings and open files, so the
 969 *     dentry trees will not be rearranged by the VFS
 970 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 971 *     any dentries belonging to this superblock that it comes across
 972 *   - the filesystem itself is no longer permitted to rearrange the dentries
 973 *     in this superblock
 974 */
 975void shrink_dcache_for_umount(struct super_block *sb)
 976{
 977        struct dentry *dentry;
 978
 979        if (down_read_trylock(&sb->s_umount))
 980                BUG();
 981
 982        dentry = sb->s_root;
 983        sb->s_root = NULL;
 984        dentry->d_count--;
 985        shrink_dcache_for_umount_subtree(dentry);
 986
 987        while (!hlist_bl_empty(&sb->s_anon)) {
 988                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 989                shrink_dcache_for_umount_subtree(dentry);
 990        }
 991}
 992
 993/*
 994 * This tries to ascend one level of parenthood, but
 995 * we can race with renaming, so we need to re-check
 996 * the parenthood after dropping the lock and check
 997 * that the sequence number still matches.
 998 */
 999static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1000{
1001        struct dentry *new = old->d_parent;
1002
1003        rcu_read_lock();
1004        spin_unlock(&old->d_lock);
1005        spin_lock(&new->d_lock);
1006
1007        /*
1008         * might go back up the wrong parent if we have had a rename
1009         * or deletion
1010         */
1011        if (new != old->d_parent ||
1012                 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1013                 (!locked && read_seqretry(&rename_lock, seq))) {
1014                spin_unlock(&new->d_lock);
1015                new = NULL;
1016        }
1017        rcu_read_unlock();
1018        return new;
1019}
1020
1021
1022/*
1023 * Search for at least 1 mount point in the dentry's subdirs.
1024 * We descend to the next level whenever the d_subdirs
1025 * list is non-empty and continue searching.
1026 */
1027 
1028/**
1029 * have_submounts - check for mounts over a dentry
1030 * @parent: dentry to check.
1031 *
1032 * Return true if the parent or its subdirectories contain
1033 * a mount point
1034 */
1035int have_submounts(struct dentry *parent)
1036{
1037        struct dentry *this_parent;
1038        struct list_head *next;
1039        unsigned seq;
1040        int locked = 0;
1041
1042        seq = read_seqbegin(&rename_lock);
1043again:
1044        this_parent = parent;
1045
1046        if (d_mountpoint(parent))
1047                goto positive;
1048        spin_lock(&this_parent->d_lock);
1049repeat:
1050        next = this_parent->d_subdirs.next;
1051resume:
1052        while (next != &this_parent->d_subdirs) {
1053                struct list_head *tmp = next;
1054                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1055                next = tmp->next;
1056
1057                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1058                /* Have we found a mount point ? */
1059                if (d_mountpoint(dentry)) {
1060                        spin_unlock(&dentry->d_lock);
1061                        spin_unlock(&this_parent->d_lock);
1062                        goto positive;
1063                }
1064                if (!list_empty(&dentry->d_subdirs)) {
1065                        spin_unlock(&this_parent->d_lock);
1066                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1067                        this_parent = dentry;
1068                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1069                        goto repeat;
1070                }
1071                spin_unlock(&dentry->d_lock);
1072        }
1073        /*
1074         * All done at this level ... ascend and resume the search.
1075         */
1076        if (this_parent != parent) {
1077                struct dentry *child = this_parent;
1078                this_parent = try_to_ascend(this_parent, locked, seq);
1079                if (!this_parent)
1080                        goto rename_retry;
1081                next = child->d_u.d_child.next;
1082                goto resume;
1083        }
1084        spin_unlock(&this_parent->d_lock);
1085        if (!locked && read_seqretry(&rename_lock, seq))
1086                goto rename_retry;
1087        if (locked)
1088                write_sequnlock(&rename_lock);
1089        return 0; /* No mount points found in tree */
1090positive:
1091        if (!locked && read_seqretry(&rename_lock, seq))
1092                goto rename_retry;
1093        if (locked)
1094                write_sequnlock(&rename_lock);
1095        return 1;
1096
1097rename_retry:
1098        if (locked)
1099                goto again;
1100        locked = 1;
1101        write_seqlock(&rename_lock);
1102        goto again;
1103}
1104EXPORT_SYMBOL(have_submounts);
1105
1106/*
1107 * Search the dentry child list of the specified parent,
1108 * and move any unused dentries to the end of the unused
1109 * list for prune_dcache(). We descend to the next level
1110 * whenever the d_subdirs list is non-empty and continue
1111 * searching.
1112 *
1113 * It returns zero iff there are no unused children,
1114 * otherwise  it returns the number of children moved to
1115 * the end of the unused list. This may not be the total
1116 * number of unused children, because select_parent can
1117 * drop the lock and return early due to latency
1118 * constraints.
1119 */
1120static int select_parent(struct dentry *parent, struct list_head *dispose)
1121{
1122        struct dentry *this_parent;
1123        struct list_head *next;
1124        unsigned seq;
1125        int found = 0;
1126        int locked = 0;
1127
1128        seq = read_seqbegin(&rename_lock);
1129again:
1130        this_parent = parent;
1131        spin_lock(&this_parent->d_lock);
1132repeat:
1133        next = this_parent->d_subdirs.next;
1134resume:
1135        while (next != &this_parent->d_subdirs) {
1136                struct list_head *tmp = next;
1137                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1138                next = tmp->next;
1139
1140                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1141
1142                /*
1143                 * move only zero ref count dentries to the dispose list.
1144                 *
1145                 * Those which are presently on the shrink list, being processed
1146                 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1147                 * loop in shrink_dcache_parent() might not make any progress
1148                 * and loop forever.
1149                 */
1150                if (dentry->d_count) {
1151                        dentry_lru_del(dentry);
1152                } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1153                        dentry_lru_move_list(dentry, dispose);
1154                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1155                        found++;
1156                }
1157                /*
1158                 * We can return to the caller if we have found some (this
1159                 * ensures forward progress). We'll be coming back to find
1160                 * the rest.
1161                 */
1162                if (found && need_resched()) {
1163                        spin_unlock(&dentry->d_lock);
1164                        goto out;
1165                }
1166
1167                /*
1168                 * Descend a level if the d_subdirs list is non-empty.
1169                 */
1170                if (!list_empty(&dentry->d_subdirs)) {
1171                        spin_unlock(&this_parent->d_lock);
1172                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1173                        this_parent = dentry;
1174                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1175                        goto repeat;
1176                }
1177
1178                spin_unlock(&dentry->d_lock);
1179        }
1180        /*
1181         * All done at this level ... ascend and resume the search.
1182         */
1183        if (this_parent != parent) {
1184                struct dentry *child = this_parent;
1185                this_parent = try_to_ascend(this_parent, locked, seq);
1186                if (!this_parent)
1187                        goto rename_retry;
1188                next = child->d_u.d_child.next;
1189                goto resume;
1190        }
1191out:
1192        spin_unlock(&this_parent->d_lock);
1193        if (!locked && read_seqretry(&rename_lock, seq))
1194                goto rename_retry;
1195        if (locked)
1196                write_sequnlock(&rename_lock);
1197        return found;
1198
1199rename_retry:
1200        if (found)
1201                return found;
1202        if (locked)
1203                goto again;
1204        locked = 1;
1205        write_seqlock(&rename_lock);
1206        goto again;
1207}
1208
1209/**
1210 * shrink_dcache_parent - prune dcache
1211 * @parent: parent of entries to prune
1212 *
1213 * Prune the dcache to remove unused children of the parent dentry.
1214 */
1215void shrink_dcache_parent(struct dentry * parent)
1216{
1217        LIST_HEAD(dispose);
1218        int found;
1219
1220        while ((found = select_parent(parent, &dispose)) != 0) {
1221                shrink_dentry_list(&dispose);
1222                cond_resched();
1223        }
1224}
1225EXPORT_SYMBOL(shrink_dcache_parent);
1226
1227/**
1228 * __d_alloc    -       allocate a dcache entry
1229 * @sb: filesystem it will belong to
1230 * @name: qstr of the name
1231 *
1232 * Allocates a dentry. It returns %NULL if there is insufficient memory
1233 * available. On a success the dentry is returned. The name passed in is
1234 * copied and the copy passed in may be reused after this call.
1235 */
1236 
1237struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1238{
1239        struct dentry *dentry;
1240        char *dname;
1241
1242        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1243        if (!dentry)
1244                return NULL;
1245
1246        /*
1247         * We guarantee that the inline name is always NUL-terminated.
1248         * This way the memcpy() done by the name switching in rename
1249         * will still always have a NUL at the end, even if we might
1250         * be overwriting an internal NUL character
1251         */
1252        dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1253        if (name->len > DNAME_INLINE_LEN-1) {
1254                dname = kmalloc(name->len + 1, GFP_KERNEL);
1255                if (!dname) {
1256                        kmem_cache_free(dentry_cache, dentry); 
1257                        return NULL;
1258                }
1259        } else  {
1260                dname = dentry->d_iname;
1261        }       
1262
1263        dentry->d_name.len = name->len;
1264        dentry->d_name.hash = name->hash;
1265        memcpy(dname, name->name, name->len);
1266        dname[name->len] = 0;
1267
1268        /* Make sure we always see the terminating NUL character */
1269        smp_wmb();
1270        dentry->d_name.name = dname;
1271
1272        dentry->d_count = 1;
1273        dentry->d_flags = 0;
1274        spin_lock_init(&dentry->d_lock);
1275        seqcount_init(&dentry->d_seq);
1276        dentry->d_inode = NULL;
1277        dentry->d_parent = dentry;
1278        dentry->d_sb = sb;
1279        dentry->d_op = NULL;
1280        dentry->d_fsdata = NULL;
1281        INIT_HLIST_BL_NODE(&dentry->d_hash);
1282        INIT_LIST_HEAD(&dentry->d_lru);
1283        INIT_LIST_HEAD(&dentry->d_subdirs);
1284        INIT_HLIST_NODE(&dentry->d_alias);
1285        INIT_LIST_HEAD(&dentry->d_u.d_child);
1286        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1287
1288        this_cpu_inc(nr_dentry);
1289
1290        return dentry;
1291}
1292
1293/**
1294 * d_alloc      -       allocate a dcache entry
1295 * @parent: parent of entry to allocate
1296 * @name: qstr of the name
1297 *
1298 * Allocates a dentry. It returns %NULL if there is insufficient memory
1299 * available. On a success the dentry is returned. The name passed in is
1300 * copied and the copy passed in may be reused after this call.
1301 */
1302struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1303{
1304        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1305        if (!dentry)
1306                return NULL;
1307
1308        spin_lock(&parent->d_lock);
1309        /*
1310         * don't need child lock because it is not subject
1311         * to concurrency here
1312         */
1313        __dget_dlock(parent);
1314        dentry->d_parent = parent;
1315        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1316        spin_unlock(&parent->d_lock);
1317
1318        return dentry;
1319}
1320EXPORT_SYMBOL(d_alloc);
1321
1322struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1323{
1324        struct dentry *dentry = __d_alloc(sb, name);
1325        if (dentry)
1326                dentry->d_flags |= DCACHE_DISCONNECTED;
1327        return dentry;
1328}
1329EXPORT_SYMBOL(d_alloc_pseudo);
1330
1331struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1332{
1333        struct qstr q;
1334
1335        q.name = name;
1336        q.len = strlen(name);
1337        q.hash = full_name_hash(q.name, q.len);
1338        return d_alloc(parent, &q);
1339}
1340EXPORT_SYMBOL(d_alloc_name);
1341
1342void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1343{
1344        WARN_ON_ONCE(dentry->d_op);
1345        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1346                                DCACHE_OP_COMPARE       |
1347                                DCACHE_OP_REVALIDATE    |
1348                                DCACHE_OP_WEAK_REVALIDATE       |
1349                                DCACHE_OP_DELETE ));
1350        dentry->d_op = op;
1351        if (!op)
1352                return;
1353        if (op->d_hash)
1354                dentry->d_flags |= DCACHE_OP_HASH;
1355        if (op->d_compare)
1356                dentry->d_flags |= DCACHE_OP_COMPARE;
1357        if (op->d_revalidate)
1358                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1359        if (op->d_weak_revalidate)
1360                dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1361        if (op->d_delete)
1362                dentry->d_flags |= DCACHE_OP_DELETE;
1363        if (op->d_prune)
1364                dentry->d_flags |= DCACHE_OP_PRUNE;
1365
1366}
1367EXPORT_SYMBOL(d_set_d_op);
1368
1369static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1370{
1371        spin_lock(&dentry->d_lock);
1372        if (inode) {
1373                if (unlikely(IS_AUTOMOUNT(inode)))
1374                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1375                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1376        }
1377        dentry->d_inode = inode;
1378        dentry_rcuwalk_barrier(dentry);
1379        spin_unlock(&dentry->d_lock);
1380        fsnotify_d_instantiate(dentry, inode);
1381}
1382
1383/**
1384 * d_instantiate - fill in inode information for a dentry
1385 * @entry: dentry to complete
1386 * @inode: inode to attach to this dentry
1387 *
1388 * Fill in inode information in the entry.
1389 *
1390 * This turns negative dentries into productive full members
1391 * of society.
1392 *
1393 * NOTE! This assumes that the inode count has been incremented
1394 * (or otherwise set) by the caller to indicate that it is now
1395 * in use by the dcache.
1396 */
1397 
1398void d_instantiate(struct dentry *entry, struct inode * inode)
1399{
1400        BUG_ON(!hlist_unhashed(&entry->d_alias));
1401        if (inode)
1402                spin_lock(&inode->i_lock);
1403        __d_instantiate(entry, inode);
1404        if (inode)
1405                spin_unlock(&inode->i_lock);
1406        security_d_instantiate(entry, inode);
1407}
1408EXPORT_SYMBOL(d_instantiate);
1409
1410/**
1411 * d_instantiate_unique - instantiate a non-aliased dentry
1412 * @entry: dentry to instantiate
1413 * @inode: inode to attach to this dentry
1414 *
1415 * Fill in inode information in the entry. On success, it returns NULL.
1416 * If an unhashed alias of "entry" already exists, then we return the
1417 * aliased dentry instead and drop one reference to inode.
1418 *
1419 * Note that in order to avoid conflicts with rename() etc, the caller
1420 * had better be holding the parent directory semaphore.
1421 *
1422 * This also assumes that the inode count has been incremented
1423 * (or otherwise set) by the caller to indicate that it is now
1424 * in use by the dcache.
1425 */
1426static struct dentry *__d_instantiate_unique(struct dentry *entry,
1427                                             struct inode *inode)
1428{
1429        struct dentry *alias;
1430        int len = entry->d_name.len;
1431        const char *name = entry->d_name.name;
1432        unsigned int hash = entry->d_name.hash;
1433
1434        if (!inode) {
1435                __d_instantiate(entry, NULL);
1436                return NULL;
1437        }
1438
1439        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
1440                /*
1441                 * Don't need alias->d_lock here, because aliases with
1442                 * d_parent == entry->d_parent are not subject to name or
1443                 * parent changes, because the parent inode i_mutex is held.
1444                 */
1445                if (alias->d_name.hash != hash)
1446                        continue;
1447                if (alias->d_parent != entry->d_parent)
1448                        continue;
1449                if (alias->d_name.len != len)
1450                        continue;
1451                if (dentry_cmp(alias, name, len))
1452                        continue;
1453                __dget(alias);
1454                return alias;
1455        }
1456
1457        __d_instantiate(entry, inode);
1458        return NULL;
1459}
1460
1461struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1462{
1463        struct dentry *result;
1464
1465        BUG_ON(!hlist_unhashed(&entry->d_alias));
1466
1467        if (inode)
1468                spin_lock(&inode->i_lock);
1469        result = __d_instantiate_unique(entry, inode);
1470        if (inode)
1471                spin_unlock(&inode->i_lock);
1472
1473        if (!result) {
1474                security_d_instantiate(entry, inode);
1475                return NULL;
1476        }
1477
1478        BUG_ON(!d_unhashed(result));
1479        iput(inode);
1480        return result;
1481}
1482
1483EXPORT_SYMBOL(d_instantiate_unique);
1484
1485struct dentry *d_make_root(struct inode *root_inode)
1486{
1487        struct dentry *res = NULL;
1488
1489        if (root_inode) {
1490                static const struct qstr name = QSTR_INIT("/", 1);
1491
1492                res = __d_alloc(root_inode->i_sb, &name);
1493                if (res)
1494                        d_instantiate(res, root_inode);
1495                else
1496                        iput(root_inode);
1497        }
1498        return res;
1499}
1500EXPORT_SYMBOL(d_make_root);
1501
1502static struct dentry * __d_find_any_alias(struct inode *inode)
1503{
1504        struct dentry *alias;
1505
1506        if (hlist_empty(&inode->i_dentry))
1507                return NULL;
1508        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
1509        __dget(alias);
1510        return alias;
1511}
1512
1513/**
1514 * d_find_any_alias - find any alias for a given inode
1515 * @inode: inode to find an alias for
1516 *
1517 * If any aliases exist for the given inode, take and return a
1518 * reference for one of them.  If no aliases exist, return %NULL.
1519 */
1520struct dentry *d_find_any_alias(struct inode *inode)
1521{
1522        struct dentry *de;
1523
1524        spin_lock(&inode->i_lock);
1525        de = __d_find_any_alias(inode);
1526        spin_unlock(&inode->i_lock);
1527        return de;
1528}
1529EXPORT_SYMBOL(d_find_any_alias);
1530
1531/**
1532 * d_obtain_alias - find or allocate a dentry for a given inode
1533 * @inode: inode to allocate the dentry for
1534 *
1535 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1536 * similar open by handle operations.  The returned dentry may be anonymous,
1537 * or may have a full name (if the inode was already in the cache).
1538 *
1539 * When called on a directory inode, we must ensure that the inode only ever
1540 * has one dentry.  If a dentry is found, that is returned instead of
1541 * allocating a new one.
1542 *
1543 * On successful return, the reference to the inode has been transferred
1544 * to the dentry.  In case of an error the reference on the inode is released.
1545 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1546 * be passed in and will be the error will be propagate to the return value,
1547 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1548 */
1549struct dentry *d_obtain_alias(struct inode *inode)
1550{
1551        static const struct qstr anonstring = QSTR_INIT("/", 1);
1552        struct dentry *tmp;
1553        struct dentry *res;
1554
1555        if (!inode)
1556                return ERR_PTR(-ESTALE);
1557        if (IS_ERR(inode))
1558                return ERR_CAST(inode);
1559
1560        res = d_find_any_alias(inode);
1561        if (res)
1562                goto out_iput;
1563
1564        tmp = __d_alloc(inode->i_sb, &anonstring);
1565        if (!tmp) {
1566                res = ERR_PTR(-ENOMEM);
1567                goto out_iput;
1568        }
1569
1570        spin_lock(&inode->i_lock);
1571        res = __d_find_any_alias(inode);
1572        if (res) {
1573                spin_unlock(&inode->i_lock);
1574                dput(tmp);
1575                goto out_iput;
1576        }
1577
1578        /* attach a disconnected dentry */
1579        spin_lock(&tmp->d_lock);
1580        tmp->d_inode = inode;
1581        tmp->d_flags |= DCACHE_DISCONNECTED;
1582        hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1583        hlist_bl_lock(&tmp->d_sb->s_anon);
1584        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1585        hlist_bl_unlock(&tmp->d_sb->s_anon);
1586        spin_unlock(&tmp->d_lock);
1587        spin_unlock(&inode->i_lock);
1588        security_d_instantiate(tmp, inode);
1589
1590        return tmp;
1591
1592 out_iput:
1593        if (res && !IS_ERR(res))
1594                security_d_instantiate(res, inode);
1595        iput(inode);
1596        return res;
1597}
1598EXPORT_SYMBOL(d_obtain_alias);
1599
1600/**
1601 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1602 * @inode:  the inode which may have a disconnected dentry
1603 * @dentry: a negative dentry which we want to point to the inode.
1604 *
1605 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1606 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1607 * and return it, else simply d_add the inode to the dentry and return NULL.
1608 *
1609 * This is needed in the lookup routine of any filesystem that is exportable
1610 * (via knfsd) so that we can build dcache paths to directories effectively.
1611 *
1612 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1613 * is returned.  This matches the expected return value of ->lookup.
1614 *
1615 */
1616struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1617{
1618        struct dentry *new = NULL;
1619
1620        if (IS_ERR(inode))
1621                return ERR_CAST(inode);
1622
1623        if (inode && S_ISDIR(inode->i_mode)) {
1624                spin_lock(&inode->i_lock);
1625                new = __d_find_alias(inode, 1);
1626                if (new) {
1627                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1628                        spin_unlock(&inode->i_lock);
1629                        security_d_instantiate(new, inode);
1630                        d_move(new, dentry);
1631                        iput(inode);
1632                } else {
1633                        /* already taking inode->i_lock, so d_add() by hand */
1634                        __d_instantiate(dentry, inode);
1635                        spin_unlock(&inode->i_lock);
1636                        security_d_instantiate(dentry, inode);
1637                        d_rehash(dentry);
1638                }
1639        } else
1640                d_add(dentry, inode);
1641        return new;
1642}
1643EXPORT_SYMBOL(d_splice_alias);
1644
1645/**
1646 * d_add_ci - lookup or allocate new dentry with case-exact name
1647 * @inode:  the inode case-insensitive lookup has found
1648 * @dentry: the negative dentry that was passed to the parent's lookup func
1649 * @name:   the case-exact name to be associated with the returned dentry
1650 *
1651 * This is to avoid filling the dcache with case-insensitive names to the
1652 * same inode, only the actual correct case is stored in the dcache for
1653 * case-insensitive filesystems.
1654 *
1655 * For a case-insensitive lookup match and if the the case-exact dentry
1656 * already exists in in the dcache, use it and return it.
1657 *
1658 * If no entry exists with the exact case name, allocate new dentry with
1659 * the exact case, and return the spliced entry.
1660 */
1661struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1662                        struct qstr *name)
1663{
1664        struct dentry *found;
1665        struct dentry *new;
1666
1667        /*
1668         * First check if a dentry matching the name already exists,
1669         * if not go ahead and create it now.
1670         */
1671        found = d_hash_and_lookup(dentry->d_parent, name);
1672        if (unlikely(IS_ERR(found)))
1673                goto err_out;
1674        if (!found) {
1675                new = d_alloc(dentry->d_parent, name);
1676                if (!new) {
1677                        found = ERR_PTR(-ENOMEM);
1678                        goto err_out;
1679                }
1680
1681                found = d_splice_alias(inode, new);
1682                if (found) {
1683                        dput(new);
1684                        return found;
1685                }
1686                return new;
1687        }
1688
1689        /*
1690         * If a matching dentry exists, and it's not negative use it.
1691         *
1692         * Decrement the reference count to balance the iget() done
1693         * earlier on.
1694         */
1695        if (found->d_inode) {
1696                if (unlikely(found->d_inode != inode)) {
1697                        /* This can't happen because bad inodes are unhashed. */
1698                        BUG_ON(!is_bad_inode(inode));
1699                        BUG_ON(!is_bad_inode(found->d_inode));
1700                }
1701                iput(inode);
1702                return found;
1703        }
1704
1705        /*
1706         * Negative dentry: instantiate it unless the inode is a directory and
1707         * already has a dentry.
1708         */
1709        new = d_splice_alias(inode, found);
1710        if (new) {
1711                dput(found);
1712                found = new;
1713        }
1714        return found;
1715
1716err_out:
1717        iput(inode);
1718        return found;
1719}
1720EXPORT_SYMBOL(d_add_ci);
1721
1722/*
1723 * Do the slow-case of the dentry name compare.
1724 *
1725 * Unlike the dentry_cmp() function, we need to atomically
1726 * load the name, length and inode information, so that the
1727 * filesystem can rely on them, and can use the 'name' and
1728 * 'len' information without worrying about walking off the
1729 * end of memory etc.
1730 *
1731 * Thus the read_seqcount_retry() and the "duplicate" info
1732 * in arguments (the low-level filesystem should not look
1733 * at the dentry inode or name contents directly, since
1734 * rename can change them while we're in RCU mode).
1735 */
1736enum slow_d_compare {
1737        D_COMP_OK,
1738        D_COMP_NOMATCH,
1739        D_COMP_SEQRETRY,
1740};
1741
1742static noinline enum slow_d_compare slow_dentry_cmp(
1743                const struct dentry *parent,
1744                struct inode *inode,
1745                struct dentry *dentry,
1746                unsigned int seq,
1747                const struct qstr *name)
1748{
1749        int tlen = dentry->d_name.len;
1750        const char *tname = dentry->d_name.name;
1751        struct inode *i = dentry->d_inode;
1752
1753        if (read_seqcount_retry(&dentry->d_seq, seq)) {
1754                cpu_relax();
1755                return D_COMP_SEQRETRY;
1756        }
1757        if (parent->d_op->d_compare(parent, inode,
1758                                dentry, i,
1759                                tlen, tname, name))
1760                return D_COMP_NOMATCH;
1761        return D_COMP_OK;
1762}
1763
1764/**
1765 * __d_lookup_rcu - search for a dentry (racy, store-free)
1766 * @parent: parent dentry
1767 * @name: qstr of name we wish to find
1768 * @seqp: returns d_seq value at the point where the dentry was found
1769 * @inode: returns dentry->d_inode when the inode was found valid.
1770 * Returns: dentry, or NULL
1771 *
1772 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1773 * resolution (store-free path walking) design described in
1774 * Documentation/filesystems/path-lookup.txt.
1775 *
1776 * This is not to be used outside core vfs.
1777 *
1778 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1779 * held, and rcu_read_lock held. The returned dentry must not be stored into
1780 * without taking d_lock and checking d_seq sequence count against @seq
1781 * returned here.
1782 *
1783 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1784 * function.
1785 *
1786 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1787 * the returned dentry, so long as its parent's seqlock is checked after the
1788 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1789 * is formed, giving integrity down the path walk.
1790 *
1791 * NOTE! The caller *has* to check the resulting dentry against the sequence
1792 * number we've returned before using any of the resulting dentry state!
1793 */
1794struct dentry *__d_lookup_rcu(const struct dentry *parent,
1795                                const struct qstr *name,
1796                                unsigned *seqp, struct inode *inode)
1797{
1798        u64 hashlen = name->hash_len;
1799        const unsigned char *str = name->name;
1800        struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
1801        struct hlist_bl_node *node;
1802        struct dentry *dentry;
1803
1804        /*
1805         * Note: There is significant duplication with __d_lookup_rcu which is
1806         * required to prevent single threaded performance regressions
1807         * especially on architectures where smp_rmb (in seqcounts) are costly.
1808         * Keep the two functions in sync.
1809         */
1810
1811        /*
1812         * The hash list is protected using RCU.
1813         *
1814         * Carefully use d_seq when comparing a candidate dentry, to avoid
1815         * races with d_move().
1816         *
1817         * It is possible that concurrent renames can mess up our list
1818         * walk here and result in missing our dentry, resulting in the
1819         * false-negative result. d_lookup() protects against concurrent
1820         * renames using rename_lock seqlock.
1821         *
1822         * See Documentation/filesystems/path-lookup.txt for more details.
1823         */
1824        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1825                unsigned seq;
1826
1827seqretry:
1828                /*
1829                 * The dentry sequence count protects us from concurrent
1830                 * renames, and thus protects inode, parent and name fields.
1831                 *
1832                 * The caller must perform a seqcount check in order
1833                 * to do anything useful with the returned dentry,
1834                 * including using the 'd_inode' pointer.
1835                 *
1836                 * NOTE! We do a "raw" seqcount_begin here. That means that
1837                 * we don't wait for the sequence count to stabilize if it
1838                 * is in the middle of a sequence change. If we do the slow
1839                 * dentry compare, we will do seqretries until it is stable,
1840                 * and if we end up with a successful lookup, we actually
1841                 * want to exit RCU lookup anyway.
1842                 */
1843                seq = raw_seqcount_begin(&dentry->d_seq);
1844                if (dentry->d_parent != parent)
1845                        continue;
1846                if (d_unhashed(dentry))
1847                        continue;
1848                *seqp = seq;
1849
1850                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1851                        if (dentry->d_name.hash != hashlen_hash(hashlen))
1852                                continue;
1853                        switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
1854                        case D_COMP_OK:
1855                                return dentry;
1856                        case D_COMP_NOMATCH:
1857                                continue;
1858                        default:
1859                                goto seqretry;
1860                        }
1861                }
1862
1863                if (dentry->d_name.hash_len != hashlen)
1864                        continue;
1865                if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
1866                        return dentry;
1867        }
1868        return NULL;
1869}
1870
1871/**
1872 * d_lookup - search for a dentry
1873 * @parent: parent dentry
1874 * @name: qstr of name we wish to find
1875 * Returns: dentry, or NULL
1876 *
1877 * d_lookup searches the children of the parent dentry for the name in
1878 * question. If the dentry is found its reference count is incremented and the
1879 * dentry is returned. The caller must use dput to free the entry when it has
1880 * finished using it. %NULL is returned if the dentry does not exist.
1881 */
1882struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
1883{
1884        struct dentry *dentry;
1885        unsigned seq;
1886
1887        do {
1888                seq = read_seqbegin(&rename_lock);
1889                dentry = __d_lookup(parent, name);
1890                if (dentry)
1891                        break;
1892        } while (read_seqretry(&rename_lock, seq));
1893        return dentry;
1894}
1895EXPORT_SYMBOL(d_lookup);
1896
1897/**
1898 * __d_lookup - search for a dentry (racy)
1899 * @parent: parent dentry
1900 * @name: qstr of name we wish to find
1901 * Returns: dentry, or NULL
1902 *
1903 * __d_lookup is like d_lookup, however it may (rarely) return a
1904 * false-negative result due to unrelated rename activity.
1905 *
1906 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1907 * however it must be used carefully, eg. with a following d_lookup in
1908 * the case of failure.
1909 *
1910 * __d_lookup callers must be commented.
1911 */
1912struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
1913{
1914        unsigned int len = name->len;
1915        unsigned int hash = name->hash;
1916        const unsigned char *str = name->name;
1917        struct hlist_bl_head *b = d_hash(parent, hash);
1918        struct hlist_bl_node *node;
1919        struct dentry *found = NULL;
1920        struct dentry *dentry;
1921
1922        /*
1923         * Note: There is significant duplication with __d_lookup_rcu which is
1924         * required to prevent single threaded performance regressions
1925         * especially on architectures where smp_rmb (in seqcounts) are costly.
1926         * Keep the two functions in sync.
1927         */
1928
1929        /*
1930         * The hash list is protected using RCU.
1931         *
1932         * Take d_lock when comparing a candidate dentry, to avoid races
1933         * with d_move().
1934         *
1935         * It is possible that concurrent renames can mess up our list
1936         * walk here and result in missing our dentry, resulting in the
1937         * false-negative result. d_lookup() protects against concurrent
1938         * renames using rename_lock seqlock.
1939         *
1940         * See Documentation/filesystems/path-lookup.txt for more details.
1941         */
1942        rcu_read_lock();
1943        
1944        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1945
1946                if (dentry->d_name.hash != hash)
1947                        continue;
1948
1949                spin_lock(&dentry->d_lock);
1950                if (dentry->d_parent != parent)
1951                        goto next;
1952                if (d_unhashed(dentry))
1953                        goto next;
1954
1955                /*
1956                 * It is safe to compare names since d_move() cannot
1957                 * change the qstr (protected by d_lock).
1958                 */
1959                if (parent->d_flags & DCACHE_OP_COMPARE) {
1960                        int tlen = dentry->d_name.len;
1961                        const char *tname = dentry->d_name.name;
1962                        if (parent->d_op->d_compare(parent, parent->d_inode,
1963                                                dentry, dentry->d_inode,
1964                                                tlen, tname, name))
1965                                goto next;
1966                } else {
1967                        if (dentry->d_name.len != len)
1968                                goto next;
1969                        if (dentry_cmp(dentry, str, len))
1970                                goto next;
1971                }
1972
1973                dentry->d_count++;
1974                found = dentry;
1975                spin_unlock(&dentry->d_lock);
1976                break;
1977next:
1978                spin_unlock(&dentry->d_lock);
1979        }
1980        rcu_read_unlock();
1981
1982        return found;
1983}
1984
1985/**
1986 * d_hash_and_lookup - hash the qstr then search for a dentry
1987 * @dir: Directory to search in
1988 * @name: qstr of name we wish to find
1989 *
1990 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
1991 */
1992struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1993{
1994        /*
1995         * Check for a fs-specific hash function. Note that we must
1996         * calculate the standard hash first, as the d_op->d_hash()
1997         * routine may choose to leave the hash value unchanged.
1998         */
1999        name->hash = full_name_hash(name->name, name->len);
2000        if (dir->d_flags & DCACHE_OP_HASH) {
2001                int err = dir->d_op->d_hash(dir, dir->d_inode, name);
2002                if (unlikely(err < 0))
2003                        return ERR_PTR(err);
2004        }
2005        return d_lookup(dir, name);
2006}
2007EXPORT_SYMBOL(d_hash_and_lookup);
2008
2009/**
2010 * d_validate - verify dentry provided from insecure source (deprecated)
2011 * @dentry: The dentry alleged to be valid child of @dparent
2012 * @dparent: The parent dentry (known to be valid)
2013 *
2014 * An insecure source has sent us a dentry, here we verify it and dget() it.
2015 * This is used by ncpfs in its readdir implementation.
2016 * Zero is returned in the dentry is invalid.
2017 *
2018 * This function is slow for big directories, and deprecated, do not use it.
2019 */
2020int d_validate(struct dentry *dentry, struct dentry *dparent)
2021{
2022        struct dentry *child;
2023
2024        spin_lock(&dparent->d_lock);
2025        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2026                if (dentry == child) {
2027                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2028                        __dget_dlock(dentry);
2029                        spin_unlock(&dentry->d_lock);
2030                        spin_unlock(&dparent->d_lock);
2031                        return 1;
2032                }
2033        }
2034        spin_unlock(&dparent->d_lock);
2035
2036        return 0;
2037}
2038EXPORT_SYMBOL(d_validate);
2039
2040/*
2041 * When a file is deleted, we have two options:
2042 * - turn this dentry into a negative dentry
2043 * - unhash this dentry and free it.
2044 *
2045 * Usually, we want to just turn this into
2046 * a negative dentry, but if anybody else is
2047 * currently using the dentry or the inode
2048 * we can't do that and we fall back on removing
2049 * it from the hash queues and waiting for
2050 * it to be deleted later when it has no users
2051 */
2052 
2053/**
2054 * d_delete - delete a dentry
2055 * @dentry: The dentry to delete
2056 *
2057 * Turn the dentry into a negative dentry if possible, otherwise
2058 * remove it from the hash queues so it can be deleted later
2059 */
2060 
2061void d_delete(struct dentry * dentry)
2062{
2063        struct inode *inode;
2064        int isdir = 0;
2065        /*
2066         * Are we the only user?
2067         */
2068again:
2069        spin_lock(&dentry->d_lock);
2070        inode = dentry->d_inode;
2071        isdir = S_ISDIR(inode->i_mode);
2072        if (dentry->d_count == 1) {
2073                if (!spin_trylock(&inode->i_lock)) {
2074                        spin_unlock(&dentry->d_lock);
2075                        cpu_relax();
2076                        goto again;
2077                }
2078                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2079                dentry_unlink_inode(dentry);
2080                fsnotify_nameremove(dentry, isdir);
2081                return;
2082        }
2083
2084        if (!d_unhashed(dentry))
2085                __d_drop(dentry);
2086
2087        spin_unlock(&dentry->d_lock);
2088
2089        fsnotify_nameremove(dentry, isdir);
2090}
2091EXPORT_SYMBOL(d_delete);
2092
2093static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2094{
2095        BUG_ON(!d_unhashed(entry));
2096        hlist_bl_lock(b);
2097        entry->d_flags |= DCACHE_RCUACCESS;
2098        hlist_bl_add_head_rcu(&entry->d_hash, b);
2099        hlist_bl_unlock(b);
2100}
2101
2102static void _d_rehash(struct dentry * entry)
2103{
2104        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2105}
2106
2107/**
2108 * d_rehash     - add an entry back to the hash
2109 * @entry: dentry to add to the hash
2110 *
2111 * Adds a dentry to the hash according to its name.
2112 */
2113 
2114void d_rehash(struct dentry * entry)
2115{
2116        spin_lock(&entry->d_lock);
2117        _d_rehash(entry);
2118        spin_unlock(&entry->d_lock);
2119}
2120EXPORT_SYMBOL(d_rehash);
2121
2122/**
2123 * dentry_update_name_case - update case insensitive dentry with a new name
2124 * @dentry: dentry to be updated
2125 * @name: new name
2126 *
2127 * Update a case insensitive dentry with new case of name.
2128 *
2129 * dentry must have been returned by d_lookup with name @name. Old and new
2130 * name lengths must match (ie. no d_compare which allows mismatched name
2131 * lengths).
2132 *
2133 * Parent inode i_mutex must be held over d_lookup and into this call (to
2134 * keep renames and concurrent inserts, and readdir(2) away).
2135 */
2136void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2137{
2138        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2139        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2140
2141        spin_lock(&dentry->d_lock);
2142        write_seqcount_begin(&dentry->d_seq);
2143        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2144        write_seqcount_end(&dentry->d_seq);
2145        spin_unlock(&dentry->d_lock);
2146}
2147EXPORT_SYMBOL(dentry_update_name_case);
2148
2149static void switch_names(struct dentry *dentry, struct dentry *target)
2150{
2151        if (dname_external(target)) {
2152                if (dname_external(dentry)) {
2153                        /*
2154                         * Both external: swap the pointers
2155                         */
2156                        swap(target->d_name.name, dentry->d_name.name);
2157                } else {
2158                        /*
2159                         * dentry:internal, target:external.  Steal target's
2160                         * storage and make target internal.
2161                         */
2162                        memcpy(target->d_iname, dentry->d_name.name,
2163                                        dentry->d_name.len + 1);
2164                        dentry->d_name.name = target->d_name.name;
2165                        target->d_name.name = target->d_iname;
2166                }
2167        } else {
2168                if (dname_external(dentry)) {
2169                        /*
2170                         * dentry:external, target:internal.  Give dentry's
2171                         * storage to target and make dentry internal
2172                         */
2173                        memcpy(dentry->d_iname, target->d_name.name,
2174                                        target->d_name.len + 1);
2175                        target->d_name.name = dentry->d_name.name;
2176                        dentry->d_name.name = dentry->d_iname;
2177                } else {
2178                        /*
2179                         * Both are internal.  Just copy target to dentry
2180                         */
2181                        memcpy(dentry->d_iname, target->d_name.name,
2182                                        target->d_name.len + 1);
2183                        dentry->d_name.len = target->d_name.len;
2184                        return;
2185                }
2186        }
2187        swap(dentry->d_name.len, target->d_name.len);
2188}
2189
2190static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2191{
2192        /*
2193         * XXXX: do we really need to take target->d_lock?
2194         */
2195        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2196                spin_lock(&target->d_parent->d_lock);
2197        else {
2198                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2199                        spin_lock(&dentry->d_parent->d_lock);
2200                        spin_lock_nested(&target->d_parent->d_lock,
2201                                                DENTRY_D_LOCK_NESTED);
2202                } else {
2203                        spin_lock(&target->d_parent->d_lock);
2204                        spin_lock_nested(&dentry->d_parent->d_lock,
2205                                                DENTRY_D_LOCK_NESTED);
2206                }
2207        }
2208        if (target < dentry) {
2209                spin_lock_nested(&target->d_lock, 2);
2210                spin_lock_nested(&dentry->d_lock, 3);
2211        } else {
2212                spin_lock_nested(&dentry->d_lock, 2);
2213                spin_lock_nested(&target->d_lock, 3);
2214        }
2215}
2216
2217static void dentry_unlock_parents_for_move(struct dentry *dentry,
2218                                        struct dentry *target)
2219{
2220        if (target->d_parent != dentry->d_parent)
2221                spin_unlock(&dentry->d_parent->d_lock);
2222        if (target->d_parent != target)
2223                spin_unlock(&target->d_parent->d_lock);
2224}
2225
2226/*
2227 * When switching names, the actual string doesn't strictly have to
2228 * be preserved in the target - because we're dropping the target
2229 * anyway. As such, we can just do a simple memcpy() to copy over
2230 * the new name before we switch.
2231 *
2232 * Note that we have to be a lot more careful about getting the hash
2233 * switched - we have to switch the hash value properly even if it
2234 * then no longer matches the actual (corrupted) string of the target.
2235 * The hash value has to match the hash queue that the dentry is on..
2236 */
2237/*
2238 * __d_move - move a dentry
2239 * @dentry: entry to move
2240 * @target: new dentry
2241 *
2242 * Update the dcache to reflect the move of a file name. Negative
2243 * dcache entries should not be moved in this way. Caller must hold
2244 * rename_lock, the i_mutex of the source and target directories,
2245 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2246 */
2247static void __d_move(struct dentry * dentry, struct dentry * target)
2248{
2249        if (!dentry->d_inode)
2250                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2251
2252        BUG_ON(d_ancestor(dentry, target));
2253        BUG_ON(d_ancestor(target, dentry));
2254
2255        dentry_lock_for_move(dentry, target);
2256
2257        write_seqcount_begin(&dentry->d_seq);
2258        write_seqcount_begin(&target->d_seq);
2259
2260        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2261
2262        /*
2263         * Move the dentry to the target hash queue. Don't bother checking
2264         * for the same hash queue because of how unlikely it is.
2265         */
2266        __d_drop(dentry);
2267        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2268
2269        /* Unhash the target: dput() will then get rid of it */
2270        __d_drop(target);
2271
2272        list_del(&dentry->d_u.d_child);
2273        list_del(&target->d_u.d_child);
2274
2275        /* Switch the names.. */
2276        switch_names(dentry, target);
2277        swap(dentry->d_name.hash, target->d_name.hash);
2278
2279        /* ... and switch the parents */
2280        if (IS_ROOT(dentry)) {
2281                dentry->d_parent = target->d_parent;
2282                target->d_parent = target;
2283                INIT_LIST_HEAD(&target->d_u.d_child);
2284        } else {
2285                swap(dentry->d_parent, target->d_parent);
2286
2287                /* And add them back to the (new) parent lists */
2288                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2289        }
2290
2291        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2292
2293        write_seqcount_end(&target->d_seq);
2294        write_seqcount_end(&dentry->d_seq);
2295
2296        dentry_unlock_parents_for_move(dentry, target);
2297        spin_unlock(&target->d_lock);
2298        fsnotify_d_move(dentry);
2299        spin_unlock(&dentry->d_lock);
2300}
2301
2302/*
2303 * d_move - move a dentry
2304 * @dentry: entry to move
2305 * @target: new dentry
2306 *
2307 * Update the dcache to reflect the move of a file name. Negative
2308 * dcache entries should not be moved in this way. See the locking
2309 * requirements for __d_move.
2310 */
2311void d_move(struct dentry *dentry, struct dentry *target)
2312{
2313        write_seqlock(&rename_lock);
2314        __d_move(dentry, target);
2315        write_sequnlock(&rename_lock);
2316}
2317EXPORT_SYMBOL(d_move);
2318
2319/**
2320 * d_ancestor - search for an ancestor
2321 * @p1: ancestor dentry
2322 * @p2: child dentry
2323 *
2324 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2325 * an ancestor of p2, else NULL.
2326 */
2327struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2328{
2329        struct dentry *p;
2330
2331        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2332                if (p->d_parent == p1)
2333                        return p;
2334        }
2335        return NULL;
2336}
2337
2338/*
2339 * This helper attempts to cope with remotely renamed directories
2340 *
2341 * It assumes that the caller is already holding
2342 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2343 *
2344 * Note: If ever the locking in lock_rename() changes, then please
2345 * remember to update this too...
2346 */
2347static struct dentry *__d_unalias(struct inode *inode,
2348                struct dentry *dentry, struct dentry *alias)
2349{
2350        struct mutex *m1 = NULL, *m2 = NULL;
2351        struct dentry *ret = ERR_PTR(-EBUSY);
2352
2353        /* If alias and dentry share a parent, then no extra locks required */
2354        if (alias->d_parent == dentry->d_parent)
2355                goto out_unalias;
2356
2357        /* See lock_rename() */
2358        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2359                goto out_err;
2360        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2361        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2362                goto out_err;
2363        m2 = &alias->d_parent->d_inode->i_mutex;
2364out_unalias:
2365        if (likely(!d_mountpoint(alias))) {
2366                __d_move(alias, dentry);
2367                ret = alias;
2368        }
2369out_err:
2370        spin_unlock(&inode->i_lock);
2371        if (m2)
2372                mutex_unlock(m2);
2373        if (m1)
2374                mutex_unlock(m1);
2375        return ret;
2376}
2377
2378/*
2379 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2380 * named dentry in place of the dentry to be replaced.
2381 * returns with anon->d_lock held!
2382 */
2383static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2384{
2385        struct dentry *dparent;
2386
2387        dentry_lock_for_move(anon, dentry);
2388
2389        write_seqcount_begin(&dentry->d_seq);
2390        write_seqcount_begin(&anon->d_seq);
2391
2392        dparent = dentry->d_parent;
2393
2394        switch_names(dentry, anon);
2395        swap(dentry->d_name.hash, anon->d_name.hash);
2396
2397        dentry->d_parent = dentry;
2398        list_del_init(&dentry->d_u.d_child);
2399        anon->d_parent = dparent;
2400        list_move(&anon->d_u.d_child, &dparent->d_subdirs);
2401
2402        write_seqcount_end(&dentry->d_seq);
2403        write_seqcount_end(&anon->d_seq);
2404
2405        dentry_unlock_parents_for_move(anon, dentry);
2406        spin_unlock(&dentry->d_lock);
2407
2408        /* anon->d_lock still locked, returns locked */
2409        anon->d_flags &= ~DCACHE_DISCONNECTED;
2410}
2411
2412/**
2413 * d_materialise_unique - introduce an inode into the tree
2414 * @dentry: candidate dentry
2415 * @inode: inode to bind to the dentry, to which aliases may be attached
2416 *
2417 * Introduces an dentry into the tree, substituting an extant disconnected
2418 * root directory alias in its place if there is one. Caller must hold the
2419 * i_mutex of the parent directory.
2420 */
2421struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2422{
2423        struct dentry *actual;
2424
2425        BUG_ON(!d_unhashed(dentry));
2426
2427        if (!inode) {
2428                actual = dentry;
2429                __d_instantiate(dentry, NULL);
2430                d_rehash(actual);
2431                goto out_nolock;
2432        }
2433
2434        spin_lock(&inode->i_lock);
2435
2436        if (S_ISDIR(inode->i_mode)) {
2437                struct dentry *alias;
2438
2439                /* Does an aliased dentry already exist? */
2440                alias = __d_find_alias(inode, 0);
2441                if (alias) {
2442                        actual = alias;
2443                        write_seqlock(&rename_lock);
2444
2445                        if (d_ancestor(alias, dentry)) {
2446                                /* Check for loops */
2447                                actual = ERR_PTR(-ELOOP);
2448                                spin_unlock(&inode->i_lock);
2449                        } else if (IS_ROOT(alias)) {
2450                                /* Is this an anonymous mountpoint that we
2451                                 * could splice into our tree? */
2452                                __d_materialise_dentry(dentry, alias);
2453                                write_sequnlock(&rename_lock);
2454                                __d_drop(alias);
2455                                goto found;
2456                        } else {
2457                                /* Nope, but we must(!) avoid directory
2458                                 * aliasing. This drops inode->i_lock */
2459                                actual = __d_unalias(inode, dentry, alias);
2460                        }
2461                        write_sequnlock(&rename_lock);
2462                        if (IS_ERR(actual)) {
2463                                if (PTR_ERR(actual) == -ELOOP)
2464                                        pr_warn_ratelimited(
2465                                                "VFS: Lookup of '%s' in %s %s"
2466                                                " would have caused loop\n",
2467                                                dentry->d_name.name,
2468                                                inode->i_sb->s_type->name,
2469                                                inode->i_sb->s_id);
2470                                dput(alias);
2471                        }
2472                        goto out_nolock;
2473                }
2474        }
2475
2476        /* Add a unique reference */
2477        actual = __d_instantiate_unique(dentry, inode);
2478        if (!actual)
2479                actual = dentry;
2480        else
2481                BUG_ON(!d_unhashed(actual));
2482
2483        spin_lock(&actual->d_lock);
2484found:
2485        _d_rehash(actual);
2486        spin_unlock(&actual->d_lock);
2487        spin_unlock(&inode->i_lock);
2488out_nolock:
2489        if (actual == dentry) {
2490                security_d_instantiate(dentry, inode);
2491                return NULL;
2492        }
2493
2494        iput(inode);
2495        return actual;
2496}
2497EXPORT_SYMBOL_GPL(d_materialise_unique);
2498
2499static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2500{
2501        *buflen -= namelen;
2502        if (*buflen < 0)
2503                return -ENAMETOOLONG;
2504        *buffer -= namelen;
2505        memcpy(*buffer, str, namelen);
2506        return 0;
2507}
2508
2509static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2510{
2511        return prepend(buffer, buflen, name->name, name->len);
2512}
2513
2514/**
2515 * prepend_path - Prepend path string to a buffer
2516 * @path: the dentry/vfsmount to report
2517 * @root: root vfsmnt/dentry
2518 * @buffer: pointer to the end of the buffer
2519 * @buflen: pointer to buffer length
2520 *
2521 * Caller holds the rename_lock.
2522 */
2523static int prepend_path(const struct path *path,
2524                        const struct path *root,
2525                        char **buffer, int *buflen)
2526{
2527        struct dentry *dentry = path->dentry;
2528        struct vfsmount *vfsmnt = path->mnt;
2529        struct mount *mnt = real_mount(vfsmnt);
2530        bool slash = false;
2531        int error = 0;
2532
2533        while (dentry != root->dentry || vfsmnt != root->mnt) {
2534                struct dentry * parent;
2535
2536                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2537                        /* Global root? */
2538                        if (!mnt_has_parent(mnt))
2539                                goto global_root;
2540                        dentry = mnt->mnt_mountpoint;
2541                        mnt = mnt->mnt_parent;
2542                        vfsmnt = &mnt->mnt;
2543                        continue;
2544                }
2545                parent = dentry->d_parent;
2546                prefetch(parent);
2547                spin_lock(&dentry->d_lock);
2548                error = prepend_name(buffer, buflen, &dentry->d_name);
2549                spin_unlock(&dentry->d_lock);
2550                if (!error)
2551                        error = prepend(buffer, buflen, "/", 1);
2552                if (error)
2553                        break;
2554
2555                slash = true;
2556                dentry = parent;
2557        }
2558
2559        if (!error && !slash)
2560                error = prepend(buffer, buflen, "/", 1);
2561
2562        return error;
2563
2564global_root:
2565        /*
2566         * Filesystems needing to implement special "root names"
2567         * should do so with ->d_dname()
2568         */
2569        if (IS_ROOT(dentry) &&
2570            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2571                WARN(1, "Root dentry has weird name <%.*s>\n",
2572                     (int) dentry->d_name.len, dentry->d_name.name);
2573        }
2574        if (!slash)
2575                error = prepend(buffer, buflen, "/", 1);
2576        if (!error)
2577                error = is_mounted(vfsmnt) ? 1 : 2;
2578        return error;
2579}
2580
2581/**
2582 * __d_path - return the path of a dentry
2583 * @path: the dentry/vfsmount to report
2584 * @root: root vfsmnt/dentry
2585 * @buf: buffer to return value in
2586 * @buflen: buffer length
2587 *
2588 * Convert a dentry into an ASCII path name.
2589 *
2590 * Returns a pointer into the buffer or an error code if the
2591 * path was too long.
2592 *
2593 * "buflen" should be positive.
2594 *
2595 * If the path is not reachable from the supplied root, return %NULL.
2596 */
2597char *__d_path(const struct path *path,
2598               const struct path *root,
2599               char *buf, int buflen)
2600{
2601        char *res = buf + buflen;
2602        int error;
2603
2604        prepend(&res, &buflen, "\0", 1);
2605        br_read_lock(&vfsmount_lock);
2606        write_seqlock(&rename_lock);
2607        error = prepend_path(path, root, &res, &buflen);
2608        write_sequnlock(&rename_lock);
2609        br_read_unlock(&vfsmount_lock);
2610
2611        if (error < 0)
2612                return ERR_PTR(error);
2613        if (error > 0)
2614                return NULL;
2615        return res;
2616}
2617
2618char *d_absolute_path(const struct path *path,
2619               char *buf, int buflen)
2620{
2621        struct path root = {};
2622        char *res = buf + buflen;
2623        int error;
2624
2625        prepend(&res, &buflen, "\0", 1);
2626        br_read_lock(&vfsmount_lock);
2627        write_seqlock(&rename_lock);
2628        error = prepend_path(path, &root, &res, &buflen);
2629        write_sequnlock(&rename_lock);
2630        br_read_unlock(&vfsmount_lock);
2631
2632        if (error > 1)
2633                error = -EINVAL;
2634        if (error < 0)
2635                return ERR_PTR(error);
2636        return res;
2637}
2638
2639/*
2640 * same as __d_path but appends "(deleted)" for unlinked files.
2641 */
2642static int path_with_deleted(const struct path *path,
2643                             const struct path *root,
2644                             char **buf, int *buflen)
2645{
2646        prepend(buf, buflen, "\0", 1);
2647        if (d_unlinked(path->dentry)) {
2648                int error = prepend(buf, buflen, " (deleted)", 10);
2649                if (error)
2650                        return error;
2651        }
2652
2653        return prepend_path(path, root, buf, buflen);
2654}
2655
2656static int prepend_unreachable(char **buffer, int *buflen)
2657{
2658        return prepend(buffer, buflen, "(unreachable)", 13);
2659}
2660
2661/**
2662 * d_path - return the path of a dentry
2663 * @path: path to report
2664 * @buf: buffer to return value in
2665 * @buflen: buffer length
2666 *
2667 * Convert a dentry into an ASCII path name. If the entry has been deleted
2668 * the string " (deleted)" is appended. Note that this is ambiguous.
2669 *
2670 * Returns a pointer into the buffer or an error code if the path was
2671 * too long. Note: Callers should use the returned pointer, not the passed
2672 * in buffer, to use the name! The implementation often starts at an offset
2673 * into the buffer, and may leave 0 bytes at the start.
2674 *
2675 * "buflen" should be positive.
2676 */
2677char *d_path(const struct path *path, char *buf, int buflen)
2678{
2679        char *res = buf + buflen;
2680        struct path root;
2681        int error;
2682
2683        /*
2684         * We have various synthetic filesystems that never get mounted.  On
2685         * these filesystems dentries are never used for lookup purposes, and
2686         * thus don't need to be hashed.  They also don't need a name until a
2687         * user wants to identify the object in /proc/pid/fd/.  The little hack
2688         * below allows us to generate a name for these objects on demand:
2689         */
2690        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2691                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2692
2693        get_fs_root(current->fs, &root);
2694        br_read_lock(&vfsmount_lock);
2695        write_seqlock(&rename_lock);
2696        error = path_with_deleted(path, &root, &res, &buflen);
2697        write_sequnlock(&rename_lock);
2698        br_read_unlock(&vfsmount_lock);
2699        if (error < 0)
2700                res = ERR_PTR(error);
2701        path_put(&root);
2702        return res;
2703}
2704EXPORT_SYMBOL(d_path);
2705
2706/*
2707 * Helper function for dentry_operations.d_dname() members
2708 */
2709char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2710                        const char *fmt, ...)
2711{
2712        va_list args;
2713        char temp[64];
2714        int sz;
2715
2716        va_start(args, fmt);
2717        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2718        va_end(args);
2719
2720        if (sz > sizeof(temp) || sz > buflen)
2721                return ERR_PTR(-ENAMETOOLONG);
2722
2723        buffer += buflen - sz;
2724        return memcpy(buffer, temp, sz);
2725}
2726
2727/*
2728 * Write full pathname from the root of the filesystem into the buffer.
2729 */
2730static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2731{
2732        char *end = buf + buflen;
2733        char *retval;
2734
2735        prepend(&end, &buflen, "\0", 1);
2736        if (buflen < 1)
2737                goto Elong;
2738        /* Get '/' right */
2739        retval = end-1;
2740        *retval = '/';
2741
2742        while (!IS_ROOT(dentry)) {
2743                struct dentry *parent = dentry->d_parent;
2744                int error;
2745
2746                prefetch(parent);
2747                spin_lock(&dentry->d_lock);
2748                error = prepend_name(&end, &buflen, &dentry->d_name);
2749                spin_unlock(&dentry->d_lock);
2750                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2751                        goto Elong;
2752
2753                retval = end;
2754                dentry = parent;
2755        }
2756        return retval;
2757Elong:
2758        return ERR_PTR(-ENAMETOOLONG);
2759}
2760
2761char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2762{
2763        char *retval;
2764
2765        write_seqlock(&rename_lock);
2766        retval = __dentry_path(dentry, buf, buflen);
2767        write_sequnlock(&rename_lock);
2768
2769        return retval;
2770}
2771EXPORT_SYMBOL(dentry_path_raw);
2772
2773char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2774{
2775        char *p = NULL;
2776        char *retval;
2777
2778        write_seqlock(&rename_lock);
2779        if (d_unlinked(dentry)) {
2780                p = buf + buflen;
2781                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2782                        goto Elong;
2783                buflen++;
2784        }
2785        retval = __dentry_path(dentry, buf, buflen);
2786        write_sequnlock(&rename_lock);
2787        if (!IS_ERR(retval) && p)
2788                *p = '/';       /* restore '/' overriden with '\0' */
2789        return retval;
2790Elong:
2791        return ERR_PTR(-ENAMETOOLONG);
2792}
2793
2794/*
2795 * NOTE! The user-level library version returns a
2796 * character pointer. The kernel system call just
2797 * returns the length of the buffer filled (which
2798 * includes the ending '\0' character), or a negative
2799 * error value. So libc would do something like
2800 *
2801 *      char *getcwd(char * buf, size_t size)
2802 *      {
2803 *              int retval;
2804 *
2805 *              retval = sys_getcwd(buf, size);
2806 *              if (retval >= 0)
2807 *                      return buf;
2808 *              errno = -retval;
2809 *              return NULL;
2810 *      }
2811 */
2812SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2813{
2814        int error;
2815        struct path pwd, root;
2816        char *page = (char *) __get_free_page(GFP_USER);
2817
2818        if (!page)
2819                return -ENOMEM;
2820
2821        get_fs_root_and_pwd(current->fs, &root, &pwd);
2822
2823        error = -ENOENT;
2824        br_read_lock(&vfsmount_lock);
2825        write_seqlock(&rename_lock);
2826        if (!d_unlinked(pwd.dentry)) {
2827                unsigned long len;
2828                char *cwd = page + PAGE_SIZE;
2829                int buflen = PAGE_SIZE;
2830
2831                prepend(&cwd, &buflen, "\0", 1);
2832                error = prepend_path(&pwd, &root, &cwd, &buflen);
2833                write_sequnlock(&rename_lock);
2834                br_read_unlock(&vfsmount_lock);
2835
2836                if (error < 0)
2837                        goto out;
2838
2839                /* Unreachable from current root */
2840                if (error > 0) {
2841                        error = prepend_unreachable(&cwd, &buflen);
2842                        if (error)
2843                                goto out;
2844                }
2845
2846                error = -ERANGE;
2847                len = PAGE_SIZE + page - cwd;
2848                if (len <= size) {
2849                        error = len;
2850                        if (copy_to_user(buf, cwd, len))
2851                                error = -EFAULT;
2852                }
2853        } else {
2854                write_sequnlock(&rename_lock);
2855                br_read_unlock(&vfsmount_lock);
2856        }
2857
2858out:
2859        path_put(&pwd);
2860        path_put(&root);
2861        free_page((unsigned long) page);
2862        return error;
2863}
2864
2865/*
2866 * Test whether new_dentry is a subdirectory of old_dentry.
2867 *
2868 * Trivially implemented using the dcache structure
2869 */
2870
2871/**
2872 * is_subdir - is new dentry a subdirectory of old_dentry
2873 * @new_dentry: new dentry
2874 * @old_dentry: old dentry
2875 *
2876 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2877 * Returns 0 otherwise.
2878 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2879 */
2880  
2881int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2882{
2883        int result;
2884        unsigned seq;
2885
2886        if (new_dentry == old_dentry)
2887                return 1;
2888
2889        do {
2890                /* for restarting inner loop in case of seq retry */
2891                seq = read_seqbegin(&rename_lock);
2892                /*
2893                 * Need rcu_readlock to protect against the d_parent trashing
2894                 * due to d_move
2895                 */
2896                rcu_read_lock();
2897                if (d_ancestor(old_dentry, new_dentry))
2898                        result = 1;
2899                else
2900                        result = 0;
2901                rcu_read_unlock();
2902        } while (read_seqretry(&rename_lock, seq));
2903
2904        return result;
2905}
2906
2907void d_genocide(struct dentry *root)
2908{
2909        struct dentry *this_parent;
2910        struct list_head *next;
2911        unsigned seq;
2912        int locked = 0;
2913
2914        seq = read_seqbegin(&rename_lock);
2915again:
2916        this_parent = root;
2917        spin_lock(&this_parent->d_lock);
2918repeat:
2919        next = this_parent->d_subdirs.next;
2920resume:
2921        while (next != &this_parent->d_subdirs) {
2922                struct list_head *tmp = next;
2923                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2924                next = tmp->next;
2925
2926                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2927                if (d_unhashed(dentry) || !dentry->d_inode) {
2928                        spin_unlock(&dentry->d_lock);
2929                        continue;
2930                }
2931                if (!list_empty(&dentry->d_subdirs)) {
2932                        spin_unlock(&this_parent->d_lock);
2933                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2934                        this_parent = dentry;
2935                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2936                        goto repeat;
2937                }
2938                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2939                        dentry->d_flags |= DCACHE_GENOCIDE;
2940                        dentry->d_count--;
2941                }
2942                spin_unlock(&dentry->d_lock);
2943        }
2944        if (this_parent != root) {
2945                struct dentry *child = this_parent;
2946                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2947                        this_parent->d_flags |= DCACHE_GENOCIDE;
2948                        this_parent->d_count--;
2949                }
2950                this_parent = try_to_ascend(this_parent, locked, seq);
2951                if (!this_parent)
2952                        goto rename_retry;
2953                next = child->d_u.d_child.next;
2954                goto resume;
2955        }
2956        spin_unlock(&this_parent->d_lock);
2957        if (!locked && read_seqretry(&rename_lock, seq))
2958                goto rename_retry;
2959        if (locked)
2960                write_sequnlock(&rename_lock);
2961        return;
2962
2963rename_retry:
2964        if (locked)
2965                goto again;
2966        locked = 1;
2967        write_seqlock(&rename_lock);
2968        goto again;
2969}
2970
2971/**
2972 * find_inode_number - check for dentry with name
2973 * @dir: directory to check
2974 * @name: Name to find.
2975 *
2976 * Check whether a dentry already exists for the given name,
2977 * and return the inode number if it has an inode. Otherwise
2978 * 0 is returned.
2979 *
2980 * This routine is used to post-process directory listings for
2981 * filesystems using synthetic inode numbers, and is necessary
2982 * to keep getcwd() working.
2983 */
2984 
2985ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2986{
2987        struct dentry * dentry;
2988        ino_t ino = 0;
2989
2990        dentry = d_hash_and_lookup(dir, name);
2991        if (!IS_ERR_OR_NULL(dentry)) {
2992                if (dentry->d_inode)
2993                        ino = dentry->d_inode->i_ino;
2994                dput(dentry);
2995        }
2996        return ino;
2997}
2998EXPORT_SYMBOL(find_inode_number);
2999
3000static __initdata unsigned long dhash_entries;
3001static int __init set_dhash_entries(char *str)
3002{
3003        if (!str)
3004                return 0;
3005        dhash_entries = simple_strtoul(str, &str, 0);
3006        return 1;
3007}
3008__setup("dhash_entries=", set_dhash_entries);
3009
3010static void __init dcache_init_early(void)
3011{
3012        unsigned int loop;
3013
3014        /* If hashes are distributed across NUMA nodes, defer
3015         * hash allocation until vmalloc space is available.
3016         */
3017        if (hashdist)
3018                return;
3019
3020        dentry_hashtable =
3021                alloc_large_system_hash("Dentry cache",
3022                                        sizeof(struct hlist_bl_head),
3023                                        dhash_entries,
3024                                        13,
3025                                        HASH_EARLY,
3026                                        &d_hash_shift,
3027                                        &d_hash_mask,
3028                                        0,
3029                                        0);
3030
3031        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3032                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3033}
3034
3035static void __init dcache_init(void)
3036{
3037        unsigned int loop;
3038
3039        /* 
3040         * A constructor could be added for stable state like the lists,
3041         * but it is probably not worth it because of the cache nature
3042         * of the dcache. 
3043         */
3044        dentry_cache = KMEM_CACHE(dentry,
3045                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3046
3047        /* Hash may have been set up in dcache_init_early */
3048        if (!hashdist)
3049                return;
3050
3051        dentry_hashtable =
3052                alloc_large_system_hash("Dentry cache",
3053                                        sizeof(struct hlist_bl_head),
3054                                        dhash_entries,
3055                                        13,
3056                                        0,
3057                                        &d_hash_shift,
3058                                        &d_hash_mask,
3059                                        0,
3060                                        0);
3061
3062        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3063                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3064}
3065
3066/* SLAB cache for __getname() consumers */
3067struct kmem_cache *names_cachep __read_mostly;
3068EXPORT_SYMBOL(names_cachep);
3069
3070EXPORT_SYMBOL(d_genocide);
3071
3072void __init vfs_caches_init_early(void)
3073{
3074        dcache_init_early();
3075        inode_init_early();
3076}
3077
3078void __init vfs_caches_init(unsigned long mempages)
3079{
3080        unsigned long reserve;
3081
3082        /* Base hash sizes on available memory, with a reserve equal to
3083           150% of current kernel size */
3084
3085        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3086        mempages -= reserve;
3087
3088        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3089                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3090
3091        dcache_init();
3092        inode_init();
3093        files_init(mempages);
3094        mnt_init();
3095        bdev_cache_init();
3096        chrdev_init();
3097}
3098