linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/module.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include "internal.h"
  39
  40/*
  41 * Usage:
  42 * dcache->d_inode->i_lock protects:
  43 *   - i_dentry, d_alias, d_inode of aliases
  44 * dcache_hash_bucket lock protects:
  45 *   - the dcache hash table
  46 * s_anon bl list spinlock protects:
  47 *   - the s_anon list (see __d_drop)
  48 * dcache_lru_lock protects:
  49 *   - the dcache lru lists and counters
  50 * d_lock protects:
  51 *   - d_flags
  52 *   - d_name
  53 *   - d_lru
  54 *   - d_count
  55 *   - d_unhashed()
  56 *   - d_parent and d_subdirs
  57 *   - childrens' d_child and d_parent
  58 *   - d_alias, d_inode
  59 *
  60 * Ordering:
  61 * dentry->d_inode->i_lock
  62 *   dentry->d_lock
  63 *     dcache_lru_lock
  64 *     dcache_hash_bucket lock
  65 *     s_anon lock
  66 *
  67 * If there is an ancestor relationship:
  68 * dentry->d_parent->...->d_parent->d_lock
  69 *   ...
  70 *     dentry->d_parent->d_lock
  71 *       dentry->d_lock
  72 *
  73 * If no ancestor relationship:
  74 * if (dentry1 < dentry2)
  75 *   dentry1->d_lock
  76 *     dentry2->d_lock
  77 */
  78int sysctl_vfs_cache_pressure __read_mostly = 100;
  79EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  80
  81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  82__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  83
  84EXPORT_SYMBOL(rename_lock);
  85
  86static struct kmem_cache *dentry_cache __read_mostly;
  87
  88/*
  89 * This is the single most critical data structure when it comes
  90 * to the dcache: the hashtable for lookups. Somebody should try
  91 * to make this good - I've just made it work.
  92 *
  93 * This hash-function tries to avoid losing too many bits of hash
  94 * information, yet avoid using a prime hash-size or similar.
  95 */
  96#define D_HASHBITS     d_hash_shift
  97#define D_HASHMASK     d_hash_mask
  98
  99static unsigned int d_hash_mask __read_mostly;
 100static unsigned int d_hash_shift __read_mostly;
 101
 102struct dcache_hash_bucket {
 103        struct hlist_bl_head head;
 104};
 105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
 106
 107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
 108                                        unsigned long hash)
 109{
 110        hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
 111        hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
 112        return dentry_hashtable + (hash & D_HASHMASK);
 113}
 114
 115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
 116{
 117        bit_spin_lock(0, (unsigned long *)&b->head.first);
 118}
 119
 120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
 121{
 122        __bit_spin_unlock(0, (unsigned long *)&b->head.first);
 123}
 124
 125/* Statistics gathering. */
 126struct dentry_stat_t dentry_stat = {
 127        .age_limit = 45,
 128};
 129
 130static DEFINE_PER_CPU(unsigned int, nr_dentry);
 131
 132#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 133static int get_nr_dentry(void)
 134{
 135        int i;
 136        int sum = 0;
 137        for_each_possible_cpu(i)
 138                sum += per_cpu(nr_dentry, i);
 139        return sum < 0 ? 0 : sum;
 140}
 141
 142int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 143                   size_t *lenp, loff_t *ppos)
 144{
 145        dentry_stat.nr_dentry = get_nr_dentry();
 146        return proc_dointvec(table, write, buffer, lenp, ppos);
 147}
 148#endif
 149
 150static void __d_free(struct rcu_head *head)
 151{
 152        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 153
 154        WARN_ON(!list_empty(&dentry->d_alias));
 155        if (dname_external(dentry))
 156                kfree(dentry->d_name.name);
 157        kmem_cache_free(dentry_cache, dentry); 
 158}
 159
 160/*
 161 * no locks, please.
 162 */
 163static void d_free(struct dentry *dentry)
 164{
 165        BUG_ON(dentry->d_count);
 166        this_cpu_dec(nr_dentry);
 167        if (dentry->d_op && dentry->d_op->d_release)
 168                dentry->d_op->d_release(dentry);
 169
 170        /* if dentry was never inserted into hash, immediate free is OK */
 171        if (hlist_bl_unhashed(&dentry->d_hash))
 172                __d_free(&dentry->d_u.d_rcu);
 173        else
 174                call_rcu(&dentry->d_u.d_rcu, __d_free);
 175}
 176
 177/**
 178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 179 * @dentry: the target dentry
 180 * After this call, in-progress rcu-walk path lookup will fail. This
 181 * should be called after unhashing, and after changing d_inode (if
 182 * the dentry has not already been unhashed).
 183 */
 184static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 185{
 186        assert_spin_locked(&dentry->d_lock);
 187        /* Go through a barrier */
 188        write_seqcount_barrier(&dentry->d_seq);
 189}
 190
 191/*
 192 * Release the dentry's inode, using the filesystem
 193 * d_iput() operation if defined. Dentry has no refcount
 194 * and is unhashed.
 195 */
 196static void dentry_iput(struct dentry * dentry)
 197        __releases(dentry->d_lock)
 198        __releases(dentry->d_inode->i_lock)
 199{
 200        struct inode *inode = dentry->d_inode;
 201        if (inode) {
 202                dentry->d_inode = NULL;
 203                list_del_init(&dentry->d_alias);
 204                spin_unlock(&dentry->d_lock);
 205                spin_unlock(&inode->i_lock);
 206                if (!inode->i_nlink)
 207                        fsnotify_inoderemove(inode);
 208                if (dentry->d_op && dentry->d_op->d_iput)
 209                        dentry->d_op->d_iput(dentry, inode);
 210                else
 211                        iput(inode);
 212        } else {
 213                spin_unlock(&dentry->d_lock);
 214        }
 215}
 216
 217/*
 218 * Release the dentry's inode, using the filesystem
 219 * d_iput() operation if defined. dentry remains in-use.
 220 */
 221static void dentry_unlink_inode(struct dentry * dentry)
 222        __releases(dentry->d_lock)
 223        __releases(dentry->d_inode->i_lock)
 224{
 225        struct inode *inode = dentry->d_inode;
 226        dentry->d_inode = NULL;
 227        list_del_init(&dentry->d_alias);
 228        dentry_rcuwalk_barrier(dentry);
 229        spin_unlock(&dentry->d_lock);
 230        spin_unlock(&inode->i_lock);
 231        if (!inode->i_nlink)
 232                fsnotify_inoderemove(inode);
 233        if (dentry->d_op && dentry->d_op->d_iput)
 234                dentry->d_op->d_iput(dentry, inode);
 235        else
 236                iput(inode);
 237}
 238
 239/*
 240 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
 241 */
 242static void dentry_lru_add(struct dentry *dentry)
 243{
 244        if (list_empty(&dentry->d_lru)) {
 245                spin_lock(&dcache_lru_lock);
 246                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 247                dentry->d_sb->s_nr_dentry_unused++;
 248                dentry_stat.nr_unused++;
 249                spin_unlock(&dcache_lru_lock);
 250        }
 251}
 252
 253static void __dentry_lru_del(struct dentry *dentry)
 254{
 255        list_del_init(&dentry->d_lru);
 256        dentry->d_sb->s_nr_dentry_unused--;
 257        dentry_stat.nr_unused--;
 258}
 259
 260static void dentry_lru_del(struct dentry *dentry)
 261{
 262        if (!list_empty(&dentry->d_lru)) {
 263                spin_lock(&dcache_lru_lock);
 264                __dentry_lru_del(dentry);
 265                spin_unlock(&dcache_lru_lock);
 266        }
 267}
 268
 269static void dentry_lru_move_tail(struct dentry *dentry)
 270{
 271        spin_lock(&dcache_lru_lock);
 272        if (list_empty(&dentry->d_lru)) {
 273                list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 274                dentry->d_sb->s_nr_dentry_unused++;
 275                dentry_stat.nr_unused++;
 276        } else {
 277                list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 278        }
 279        spin_unlock(&dcache_lru_lock);
 280}
 281
 282/**
 283 * d_kill - kill dentry and return parent
 284 * @dentry: dentry to kill
 285 * @parent: parent dentry
 286 *
 287 * The dentry must already be unhashed and removed from the LRU.
 288 *
 289 * If this is the root of the dentry tree, return NULL.
 290 *
 291 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 292 * d_kill.
 293 */
 294static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 295        __releases(dentry->d_lock)
 296        __releases(parent->d_lock)
 297        __releases(dentry->d_inode->i_lock)
 298{
 299        dentry->d_parent = NULL;
 300        list_del(&dentry->d_u.d_child);
 301        if (parent)
 302                spin_unlock(&parent->d_lock);
 303        dentry_iput(dentry);
 304        /*
 305         * dentry_iput drops the locks, at which point nobody (except
 306         * transient RCU lookups) can reach this dentry.
 307         */
 308        d_free(dentry);
 309        return parent;
 310}
 311
 312/**
 313 * d_drop - drop a dentry
 314 * @dentry: dentry to drop
 315 *
 316 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 317 * be found through a VFS lookup any more. Note that this is different from
 318 * deleting the dentry - d_delete will try to mark the dentry negative if
 319 * possible, giving a successful _negative_ lookup, while d_drop will
 320 * just make the cache lookup fail.
 321 *
 322 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 323 * reason (NFS timeouts or autofs deletes).
 324 *
 325 * __d_drop requires dentry->d_lock.
 326 */
 327void __d_drop(struct dentry *dentry)
 328{
 329        if (!(dentry->d_flags & DCACHE_UNHASHED)) {
 330                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
 331                        bit_spin_lock(0,
 332                                (unsigned long *)&dentry->d_sb->s_anon.first);
 333                        dentry->d_flags |= DCACHE_UNHASHED;
 334                        hlist_bl_del_init(&dentry->d_hash);
 335                        __bit_spin_unlock(0,
 336                                (unsigned long *)&dentry->d_sb->s_anon.first);
 337                } else {
 338                        struct dcache_hash_bucket *b;
 339                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 340                        spin_lock_bucket(b);
 341                        /*
 342                         * We may not actually need to put DCACHE_UNHASHED
 343                         * manipulations under the hash lock, but follow
 344                         * the principle of least surprise.
 345                         */
 346                        dentry->d_flags |= DCACHE_UNHASHED;
 347                        hlist_bl_del_rcu(&dentry->d_hash);
 348                        spin_unlock_bucket(b);
 349                        dentry_rcuwalk_barrier(dentry);
 350                }
 351        }
 352}
 353EXPORT_SYMBOL(__d_drop);
 354
 355void d_drop(struct dentry *dentry)
 356{
 357        spin_lock(&dentry->d_lock);
 358        __d_drop(dentry);
 359        spin_unlock(&dentry->d_lock);
 360}
 361EXPORT_SYMBOL(d_drop);
 362
 363/*
 364 * Finish off a dentry we've decided to kill.
 365 * dentry->d_lock must be held, returns with it unlocked.
 366 * If ref is non-zero, then decrement the refcount too.
 367 * Returns dentry requiring refcount drop, or NULL if we're done.
 368 */
 369static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 370        __releases(dentry->d_lock)
 371{
 372        struct inode *inode;
 373        struct dentry *parent;
 374
 375        inode = dentry->d_inode;
 376        if (inode && !spin_trylock(&inode->i_lock)) {
 377relock:
 378                spin_unlock(&dentry->d_lock);
 379                cpu_relax();
 380                return dentry; /* try again with same dentry */
 381        }
 382        if (IS_ROOT(dentry))
 383                parent = NULL;
 384        else
 385                parent = dentry->d_parent;
 386        if (parent && !spin_trylock(&parent->d_lock)) {
 387                if (inode)
 388                        spin_unlock(&inode->i_lock);
 389                goto relock;
 390        }
 391
 392        if (ref)
 393                dentry->d_count--;
 394        /* if dentry was on the d_lru list delete it from there */
 395        dentry_lru_del(dentry);
 396        /* if it was on the hash then remove it */
 397        __d_drop(dentry);
 398        return d_kill(dentry, parent);
 399}
 400
 401/* 
 402 * This is dput
 403 *
 404 * This is complicated by the fact that we do not want to put
 405 * dentries that are no longer on any hash chain on the unused
 406 * list: we'd much rather just get rid of them immediately.
 407 *
 408 * However, that implies that we have to traverse the dentry
 409 * tree upwards to the parents which might _also_ now be
 410 * scheduled for deletion (it may have been only waiting for
 411 * its last child to go away).
 412 *
 413 * This tail recursion is done by hand as we don't want to depend
 414 * on the compiler to always get this right (gcc generally doesn't).
 415 * Real recursion would eat up our stack space.
 416 */
 417
 418/*
 419 * dput - release a dentry
 420 * @dentry: dentry to release 
 421 *
 422 * Release a dentry. This will drop the usage count and if appropriate
 423 * call the dentry unlink method as well as removing it from the queues and
 424 * releasing its resources. If the parent dentries were scheduled for release
 425 * they too may now get deleted.
 426 */
 427void dput(struct dentry *dentry)
 428{
 429        if (!dentry)
 430                return;
 431
 432repeat:
 433        if (dentry->d_count == 1)
 434                might_sleep();
 435        spin_lock(&dentry->d_lock);
 436        BUG_ON(!dentry->d_count);
 437        if (dentry->d_count > 1) {
 438                dentry->d_count--;
 439                spin_unlock(&dentry->d_lock);
 440                return;
 441        }
 442
 443        if (dentry->d_flags & DCACHE_OP_DELETE) {
 444                if (dentry->d_op->d_delete(dentry))
 445                        goto kill_it;
 446        }
 447
 448        /* Unreachable? Get rid of it */
 449        if (d_unhashed(dentry))
 450                goto kill_it;
 451
 452        /* Otherwise leave it cached and ensure it's on the LRU */
 453        dentry->d_flags |= DCACHE_REFERENCED;
 454        dentry_lru_add(dentry);
 455
 456        dentry->d_count--;
 457        spin_unlock(&dentry->d_lock);
 458        return;
 459
 460kill_it:
 461        dentry = dentry_kill(dentry, 1);
 462        if (dentry)
 463                goto repeat;
 464}
 465EXPORT_SYMBOL(dput);
 466
 467/**
 468 * d_invalidate - invalidate a dentry
 469 * @dentry: dentry to invalidate
 470 *
 471 * Try to invalidate the dentry if it turns out to be
 472 * possible. If there are other dentries that can be
 473 * reached through this one we can't delete it and we
 474 * return -EBUSY. On success we return 0.
 475 *
 476 * no dcache lock.
 477 */
 478 
 479int d_invalidate(struct dentry * dentry)
 480{
 481        /*
 482         * If it's already been dropped, return OK.
 483         */
 484        spin_lock(&dentry->d_lock);
 485        if (d_unhashed(dentry)) {
 486                spin_unlock(&dentry->d_lock);
 487                return 0;
 488        }
 489        /*
 490         * Check whether to do a partial shrink_dcache
 491         * to get rid of unused child entries.
 492         */
 493        if (!list_empty(&dentry->d_subdirs)) {
 494                spin_unlock(&dentry->d_lock);
 495                shrink_dcache_parent(dentry);
 496                spin_lock(&dentry->d_lock);
 497        }
 498
 499        /*
 500         * Somebody else still using it?
 501         *
 502         * If it's a directory, we can't drop it
 503         * for fear of somebody re-populating it
 504         * with children (even though dropping it
 505         * would make it unreachable from the root,
 506         * we might still populate it if it was a
 507         * working directory or similar).
 508         */
 509        if (dentry->d_count > 1) {
 510                if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 511                        spin_unlock(&dentry->d_lock);
 512                        return -EBUSY;
 513                }
 514        }
 515
 516        __d_drop(dentry);
 517        spin_unlock(&dentry->d_lock);
 518        return 0;
 519}
 520EXPORT_SYMBOL(d_invalidate);
 521
 522/* This must be called with d_lock held */
 523static inline void __dget_dlock(struct dentry *dentry)
 524{
 525        dentry->d_count++;
 526}
 527
 528static inline void __dget(struct dentry *dentry)
 529{
 530        spin_lock(&dentry->d_lock);
 531        __dget_dlock(dentry);
 532        spin_unlock(&dentry->d_lock);
 533}
 534
 535struct dentry *dget_parent(struct dentry *dentry)
 536{
 537        struct dentry *ret;
 538
 539repeat:
 540        /*
 541         * Don't need rcu_dereference because we re-check it was correct under
 542         * the lock.
 543         */
 544        rcu_read_lock();
 545        ret = dentry->d_parent;
 546        if (!ret) {
 547                rcu_read_unlock();
 548                goto out;
 549        }
 550        spin_lock(&ret->d_lock);
 551        if (unlikely(ret != dentry->d_parent)) {
 552                spin_unlock(&ret->d_lock);
 553                rcu_read_unlock();
 554                goto repeat;
 555        }
 556        rcu_read_unlock();
 557        BUG_ON(!ret->d_count);
 558        ret->d_count++;
 559        spin_unlock(&ret->d_lock);
 560out:
 561        return ret;
 562}
 563EXPORT_SYMBOL(dget_parent);
 564
 565/**
 566 * d_find_alias - grab a hashed alias of inode
 567 * @inode: inode in question
 568 * @want_discon:  flag, used by d_splice_alias, to request
 569 *          that only a DISCONNECTED alias be returned.
 570 *
 571 * If inode has a hashed alias, or is a directory and has any alias,
 572 * acquire the reference to alias and return it. Otherwise return NULL.
 573 * Notice that if inode is a directory there can be only one alias and
 574 * it can be unhashed only if it has no children, or if it is the root
 575 * of a filesystem.
 576 *
 577 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 578 * any other hashed alias over that one unless @want_discon is set,
 579 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 580 */
 581static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 582{
 583        struct dentry *alias, *discon_alias;
 584
 585again:
 586        discon_alias = NULL;
 587        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 588                spin_lock(&alias->d_lock);
 589                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 590                        if (IS_ROOT(alias) &&
 591                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 592                                discon_alias = alias;
 593                        } else if (!want_discon) {
 594                                __dget_dlock(alias);
 595                                spin_unlock(&alias->d_lock);
 596                                return alias;
 597                        }
 598                }
 599                spin_unlock(&alias->d_lock);
 600        }
 601        if (discon_alias) {
 602                alias = discon_alias;
 603                spin_lock(&alias->d_lock);
 604                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 605                        if (IS_ROOT(alias) &&
 606                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 607                                __dget_dlock(alias);
 608                                spin_unlock(&alias->d_lock);
 609                                return alias;
 610                        }
 611                }
 612                spin_unlock(&alias->d_lock);
 613                goto again;
 614        }
 615        return NULL;
 616}
 617
 618struct dentry *d_find_alias(struct inode *inode)
 619{
 620        struct dentry *de = NULL;
 621
 622        if (!list_empty(&inode->i_dentry)) {
 623                spin_lock(&inode->i_lock);
 624                de = __d_find_alias(inode, 0);
 625                spin_unlock(&inode->i_lock);
 626        }
 627        return de;
 628}
 629EXPORT_SYMBOL(d_find_alias);
 630
 631/*
 632 *      Try to kill dentries associated with this inode.
 633 * WARNING: you must own a reference to inode.
 634 */
 635void d_prune_aliases(struct inode *inode)
 636{
 637        struct dentry *dentry;
 638restart:
 639        spin_lock(&inode->i_lock);
 640        list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 641                spin_lock(&dentry->d_lock);
 642                if (!dentry->d_count) {
 643                        __dget_dlock(dentry);
 644                        __d_drop(dentry);
 645                        spin_unlock(&dentry->d_lock);
 646                        spin_unlock(&inode->i_lock);
 647                        dput(dentry);
 648                        goto restart;
 649                }
 650                spin_unlock(&dentry->d_lock);
 651        }
 652        spin_unlock(&inode->i_lock);
 653}
 654EXPORT_SYMBOL(d_prune_aliases);
 655
 656/*
 657 * Try to throw away a dentry - free the inode, dput the parent.
 658 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 659 * Releases dentry->d_lock.
 660 *
 661 * This may fail if locks cannot be acquired no problem, just try again.
 662 */
 663static void try_prune_one_dentry(struct dentry *dentry)
 664        __releases(dentry->d_lock)
 665{
 666        struct dentry *parent;
 667
 668        parent = dentry_kill(dentry, 0);
 669        /*
 670         * If dentry_kill returns NULL, we have nothing more to do.
 671         * if it returns the same dentry, trylocks failed. In either
 672         * case, just loop again.
 673         *
 674         * Otherwise, we need to prune ancestors too. This is necessary
 675         * to prevent quadratic behavior of shrink_dcache_parent(), but
 676         * is also expected to be beneficial in reducing dentry cache
 677         * fragmentation.
 678         */
 679        if (!parent)
 680                return;
 681        if (parent == dentry)
 682                return;
 683
 684        /* Prune ancestors. */
 685        dentry = parent;
 686        while (dentry) {
 687                spin_lock(&dentry->d_lock);
 688                if (dentry->d_count > 1) {
 689                        dentry->d_count--;
 690                        spin_unlock(&dentry->d_lock);
 691                        return;
 692                }
 693                dentry = dentry_kill(dentry, 1);
 694        }
 695}
 696
 697static void shrink_dentry_list(struct list_head *list)
 698{
 699        struct dentry *dentry;
 700
 701        rcu_read_lock();
 702        for (;;) {
 703                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 704                if (&dentry->d_lru == list)
 705                        break; /* empty */
 706                spin_lock(&dentry->d_lock);
 707                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 708                        spin_unlock(&dentry->d_lock);
 709                        continue;
 710                }
 711
 712                /*
 713                 * We found an inuse dentry which was not removed from
 714                 * the LRU because of laziness during lookup.  Do not free
 715                 * it - just keep it off the LRU list.
 716                 */
 717                if (dentry->d_count) {
 718                        dentry_lru_del(dentry);
 719                        spin_unlock(&dentry->d_lock);
 720                        continue;
 721                }
 722
 723                rcu_read_unlock();
 724
 725                try_prune_one_dentry(dentry);
 726
 727                rcu_read_lock();
 728        }
 729        rcu_read_unlock();
 730}
 731
 732/**
 733 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
 734 * @sb:         superblock to shrink dentry LRU.
 735 * @count:      number of entries to prune
 736 * @flags:      flags to control the dentry processing
 737 *
 738 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
 739 */
 740static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 741{
 742        /* called from prune_dcache() and shrink_dcache_parent() */
 743        struct dentry *dentry;
 744        LIST_HEAD(referenced);
 745        LIST_HEAD(tmp);
 746        int cnt = *count;
 747
 748relock:
 749        spin_lock(&dcache_lru_lock);
 750        while (!list_empty(&sb->s_dentry_lru)) {
 751                dentry = list_entry(sb->s_dentry_lru.prev,
 752                                struct dentry, d_lru);
 753                BUG_ON(dentry->d_sb != sb);
 754
 755                if (!spin_trylock(&dentry->d_lock)) {
 756                        spin_unlock(&dcache_lru_lock);
 757                        cpu_relax();
 758                        goto relock;
 759                }
 760
 761                /*
 762                 * If we are honouring the DCACHE_REFERENCED flag and the
 763                 * dentry has this flag set, don't free it.  Clear the flag
 764                 * and put it back on the LRU.
 765                 */
 766                if (flags & DCACHE_REFERENCED &&
 767                                dentry->d_flags & DCACHE_REFERENCED) {
 768                        dentry->d_flags &= ~DCACHE_REFERENCED;
 769                        list_move(&dentry->d_lru, &referenced);
 770                        spin_unlock(&dentry->d_lock);
 771                } else {
 772                        list_move_tail(&dentry->d_lru, &tmp);
 773                        spin_unlock(&dentry->d_lock);
 774                        if (!--cnt)
 775                                break;
 776                }
 777                cond_resched_lock(&dcache_lru_lock);
 778        }
 779        if (!list_empty(&referenced))
 780                list_splice(&referenced, &sb->s_dentry_lru);
 781        spin_unlock(&dcache_lru_lock);
 782
 783        shrink_dentry_list(&tmp);
 784
 785        *count = cnt;
 786}
 787
 788/**
 789 * prune_dcache - shrink the dcache
 790 * @count: number of entries to try to free
 791 *
 792 * Shrink the dcache. This is done when we need more memory, or simply when we
 793 * need to unmount something (at which point we need to unuse all dentries).
 794 *
 795 * This function may fail to free any resources if all the dentries are in use.
 796 */
 797static void prune_dcache(int count)
 798{
 799        struct super_block *sb, *p = NULL;
 800        int w_count;
 801        int unused = dentry_stat.nr_unused;
 802        int prune_ratio;
 803        int pruned;
 804
 805        if (unused == 0 || count == 0)
 806                return;
 807        if (count >= unused)
 808                prune_ratio = 1;
 809        else
 810                prune_ratio = unused / count;
 811        spin_lock(&sb_lock);
 812        list_for_each_entry(sb, &super_blocks, s_list) {
 813                if (list_empty(&sb->s_instances))
 814                        continue;
 815                if (sb->s_nr_dentry_unused == 0)
 816                        continue;
 817                sb->s_count++;
 818                /* Now, we reclaim unused dentrins with fairness.
 819                 * We reclaim them same percentage from each superblock.
 820                 * We calculate number of dentries to scan on this sb
 821                 * as follows, but the implementation is arranged to avoid
 822                 * overflows:
 823                 * number of dentries to scan on this sb =
 824                 * count * (number of dentries on this sb /
 825                 * number of dentries in the machine)
 826                 */
 827                spin_unlock(&sb_lock);
 828                if (prune_ratio != 1)
 829                        w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
 830                else
 831                        w_count = sb->s_nr_dentry_unused;
 832                pruned = w_count;
 833                /*
 834                 * We need to be sure this filesystem isn't being unmounted,
 835                 * otherwise we could race with generic_shutdown_super(), and
 836                 * end up holding a reference to an inode while the filesystem
 837                 * is unmounted.  So we try to get s_umount, and make sure
 838                 * s_root isn't NULL.
 839                 */
 840                if (down_read_trylock(&sb->s_umount)) {
 841                        if ((sb->s_root != NULL) &&
 842                            (!list_empty(&sb->s_dentry_lru))) {
 843                                __shrink_dcache_sb(sb, &w_count,
 844                                                DCACHE_REFERENCED);
 845                                pruned -= w_count;
 846                        }
 847                        up_read(&sb->s_umount);
 848                }
 849                spin_lock(&sb_lock);
 850                if (p)
 851                        __put_super(p);
 852                count -= pruned;
 853                p = sb;
 854                /* more work left to do? */
 855                if (count <= 0)
 856                        break;
 857        }
 858        if (p)
 859                __put_super(p);
 860        spin_unlock(&sb_lock);
 861}
 862
 863/**
 864 * shrink_dcache_sb - shrink dcache for a superblock
 865 * @sb: superblock
 866 *
 867 * Shrink the dcache for the specified super block. This is used to free
 868 * the dcache before unmounting a file system.
 869 */
 870void shrink_dcache_sb(struct super_block *sb)
 871{
 872        LIST_HEAD(tmp);
 873
 874        spin_lock(&dcache_lru_lock);
 875        while (!list_empty(&sb->s_dentry_lru)) {
 876                list_splice_init(&sb->s_dentry_lru, &tmp);
 877                spin_unlock(&dcache_lru_lock);
 878                shrink_dentry_list(&tmp);
 879                spin_lock(&dcache_lru_lock);
 880        }
 881        spin_unlock(&dcache_lru_lock);
 882}
 883EXPORT_SYMBOL(shrink_dcache_sb);
 884
 885/*
 886 * destroy a single subtree of dentries for unmount
 887 * - see the comments on shrink_dcache_for_umount() for a description of the
 888 *   locking
 889 */
 890static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 891{
 892        struct dentry *parent;
 893        unsigned detached = 0;
 894
 895        BUG_ON(!IS_ROOT(dentry));
 896
 897        /* detach this root from the system */
 898        spin_lock(&dentry->d_lock);
 899        dentry_lru_del(dentry);
 900        __d_drop(dentry);
 901        spin_unlock(&dentry->d_lock);
 902
 903        for (;;) {
 904                /* descend to the first leaf in the current subtree */
 905                while (!list_empty(&dentry->d_subdirs)) {
 906                        struct dentry *loop;
 907
 908                        /* this is a branch with children - detach all of them
 909                         * from the system in one go */
 910                        spin_lock(&dentry->d_lock);
 911                        list_for_each_entry(loop, &dentry->d_subdirs,
 912                                            d_u.d_child) {
 913                                spin_lock_nested(&loop->d_lock,
 914                                                DENTRY_D_LOCK_NESTED);
 915                                dentry_lru_del(loop);
 916                                __d_drop(loop);
 917                                spin_unlock(&loop->d_lock);
 918                        }
 919                        spin_unlock(&dentry->d_lock);
 920
 921                        /* move to the first child */
 922                        dentry = list_entry(dentry->d_subdirs.next,
 923                                            struct dentry, d_u.d_child);
 924                }
 925
 926                /* consume the dentries from this leaf up through its parents
 927                 * until we find one with children or run out altogether */
 928                do {
 929                        struct inode *inode;
 930
 931                        if (dentry->d_count != 0) {
 932                                printk(KERN_ERR
 933                                       "BUG: Dentry %p{i=%lx,n=%s}"
 934                                       " still in use (%d)"
 935                                       " [unmount of %s %s]\n",
 936                                       dentry,
 937                                       dentry->d_inode ?
 938                                       dentry->d_inode->i_ino : 0UL,
 939                                       dentry->d_name.name,
 940                                       dentry->d_count,
 941                                       dentry->d_sb->s_type->name,
 942                                       dentry->d_sb->s_id);
 943                                BUG();
 944                        }
 945
 946                        if (IS_ROOT(dentry)) {
 947                                parent = NULL;
 948                                list_del(&dentry->d_u.d_child);
 949                        } else {
 950                                parent = dentry->d_parent;
 951                                spin_lock(&parent->d_lock);
 952                                parent->d_count--;
 953                                list_del(&dentry->d_u.d_child);
 954                                spin_unlock(&parent->d_lock);
 955                        }
 956
 957                        detached++;
 958
 959                        inode = dentry->d_inode;
 960                        if (inode) {
 961                                dentry->d_inode = NULL;
 962                                list_del_init(&dentry->d_alias);
 963                                if (dentry->d_op && dentry->d_op->d_iput)
 964                                        dentry->d_op->d_iput(dentry, inode);
 965                                else
 966                                        iput(inode);
 967                        }
 968
 969                        d_free(dentry);
 970
 971                        /* finished when we fall off the top of the tree,
 972                         * otherwise we ascend to the parent and move to the
 973                         * next sibling if there is one */
 974                        if (!parent)
 975                                return;
 976                        dentry = parent;
 977                } while (list_empty(&dentry->d_subdirs));
 978
 979                dentry = list_entry(dentry->d_subdirs.next,
 980                                    struct dentry, d_u.d_child);
 981        }
 982}
 983
 984/*
 985 * destroy the dentries attached to a superblock on unmounting
 986 * - we don't need to use dentry->d_lock because:
 987 *   - the superblock is detached from all mountings and open files, so the
 988 *     dentry trees will not be rearranged by the VFS
 989 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 990 *     any dentries belonging to this superblock that it comes across
 991 *   - the filesystem itself is no longer permitted to rearrange the dentries
 992 *     in this superblock
 993 */
 994void shrink_dcache_for_umount(struct super_block *sb)
 995{
 996        struct dentry *dentry;
 997
 998        if (down_read_trylock(&sb->s_umount))
 999                BUG();
1000
1001        dentry = sb->s_root;
1002        sb->s_root = NULL;
1003        spin_lock(&dentry->d_lock);
1004        dentry->d_count--;
1005        spin_unlock(&dentry->d_lock);
1006        shrink_dcache_for_umount_subtree(dentry);
1007
1008        while (!hlist_bl_empty(&sb->s_anon)) {
1009                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
1010                shrink_dcache_for_umount_subtree(dentry);
1011        }
1012}
1013
1014/*
1015 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching.
1018 */
1019 
1020/**
1021 * have_submounts - check for mounts over a dentry
1022 * @parent: dentry to check.
1023 *
1024 * Return true if the parent or its subdirectories contain
1025 * a mount point
1026 */
1027int have_submounts(struct dentry *parent)
1028{
1029        struct dentry *this_parent;
1030        struct list_head *next;
1031        unsigned seq;
1032        int locked = 0;
1033
1034        seq = read_seqbegin(&rename_lock);
1035again:
1036        this_parent = parent;
1037
1038        if (d_mountpoint(parent))
1039                goto positive;
1040        spin_lock(&this_parent->d_lock);
1041repeat:
1042        next = this_parent->d_subdirs.next;
1043resume:
1044        while (next != &this_parent->d_subdirs) {
1045                struct list_head *tmp = next;
1046                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1047                next = tmp->next;
1048
1049                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1050                /* Have we found a mount point ? */
1051                if (d_mountpoint(dentry)) {
1052                        spin_unlock(&dentry->d_lock);
1053                        spin_unlock(&this_parent->d_lock);
1054                        goto positive;
1055                }
1056                if (!list_empty(&dentry->d_subdirs)) {
1057                        spin_unlock(&this_parent->d_lock);
1058                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1059                        this_parent = dentry;
1060                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1061                        goto repeat;
1062                }
1063                spin_unlock(&dentry->d_lock);
1064        }
1065        /*
1066         * All done at this level ... ascend and resume the search.
1067         */
1068        if (this_parent != parent) {
1069                struct dentry *tmp;
1070                struct dentry *child;
1071
1072                tmp = this_parent->d_parent;
1073                rcu_read_lock();
1074                spin_unlock(&this_parent->d_lock);
1075                child = this_parent;
1076                this_parent = tmp;
1077                spin_lock(&this_parent->d_lock);
1078                /* might go back up the wrong parent if we have had a rename
1079                 * or deletion */
1080                if (this_parent != child->d_parent ||
1081                         (!locked && read_seqretry(&rename_lock, seq))) {
1082                        spin_unlock(&this_parent->d_lock);
1083                        rcu_read_unlock();
1084                        goto rename_retry;
1085                }
1086                rcu_read_unlock();
1087                next = child->d_u.d_child.next;
1088                goto resume;
1089        }
1090        spin_unlock(&this_parent->d_lock);
1091        if (!locked && read_seqretry(&rename_lock, seq))
1092                goto rename_retry;
1093        if (locked)
1094                write_sequnlock(&rename_lock);
1095        return 0; /* No mount points found in tree */
1096positive:
1097        if (!locked && read_seqretry(&rename_lock, seq))
1098                goto rename_retry;
1099        if (locked)
1100                write_sequnlock(&rename_lock);
1101        return 1;
1102
1103rename_retry:
1104        locked = 1;
1105        write_seqlock(&rename_lock);
1106        goto again;
1107}
1108EXPORT_SYMBOL(have_submounts);
1109
1110/*
1111 * Search the dentry child list for the specified parent,
1112 * and move any unused dentries to the end of the unused
1113 * list for prune_dcache(). We descend to the next level
1114 * whenever the d_subdirs list is non-empty and continue
1115 * searching.
1116 *
1117 * It returns zero iff there are no unused children,
1118 * otherwise  it returns the number of children moved to
1119 * the end of the unused list. This may not be the total
1120 * number of unused children, because select_parent can
1121 * drop the lock and return early due to latency
1122 * constraints.
1123 */
1124static int select_parent(struct dentry * parent)
1125{
1126        struct dentry *this_parent;
1127        struct list_head *next;
1128        unsigned seq;
1129        int found = 0;
1130        int locked = 0;
1131
1132        seq = read_seqbegin(&rename_lock);
1133again:
1134        this_parent = parent;
1135        spin_lock(&this_parent->d_lock);
1136repeat:
1137        next = this_parent->d_subdirs.next;
1138resume:
1139        while (next != &this_parent->d_subdirs) {
1140                struct list_head *tmp = next;
1141                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1142                next = tmp->next;
1143
1144                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1145
1146                /* 
1147                 * move only zero ref count dentries to the end 
1148                 * of the unused list for prune_dcache
1149                 */
1150                if (!dentry->d_count) {
1151                        dentry_lru_move_tail(dentry);
1152                        found++;
1153                } else {
1154                        dentry_lru_del(dentry);
1155                }
1156
1157                /*
1158                 * We can return to the caller if we have found some (this
1159                 * ensures forward progress). We'll be coming back to find
1160                 * the rest.
1161                 */
1162                if (found && need_resched()) {
1163                        spin_unlock(&dentry->d_lock);
1164                        goto out;
1165                }
1166
1167                /*
1168                 * Descend a level if the d_subdirs list is non-empty.
1169                 */
1170                if (!list_empty(&dentry->d_subdirs)) {
1171                        spin_unlock(&this_parent->d_lock);
1172                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1173                        this_parent = dentry;
1174                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1175                        goto repeat;
1176                }
1177
1178                spin_unlock(&dentry->d_lock);
1179        }
1180        /*
1181         * All done at this level ... ascend and resume the search.
1182         */
1183        if (this_parent != parent) {
1184                struct dentry *tmp;
1185                struct dentry *child;
1186
1187                tmp = this_parent->d_parent;
1188                rcu_read_lock();
1189                spin_unlock(&this_parent->d_lock);
1190                child = this_parent;
1191                this_parent = tmp;
1192                spin_lock(&this_parent->d_lock);
1193                /* might go back up the wrong parent if we have had a rename
1194                 * or deletion */
1195                if (this_parent != child->d_parent ||
1196                        (!locked && read_seqretry(&rename_lock, seq))) {
1197                        spin_unlock(&this_parent->d_lock);
1198                        rcu_read_unlock();
1199                        goto rename_retry;
1200                }
1201                rcu_read_unlock();
1202                next = child->d_u.d_child.next;
1203                goto resume;
1204        }
1205out:
1206        spin_unlock(&this_parent->d_lock);
1207        if (!locked && read_seqretry(&rename_lock, seq))
1208                goto rename_retry;
1209        if (locked)
1210                write_sequnlock(&rename_lock);
1211        return found;
1212
1213rename_retry:
1214        if (found)
1215                return found;
1216        locked = 1;
1217        write_seqlock(&rename_lock);
1218        goto again;
1219}
1220
1221/**
1222 * shrink_dcache_parent - prune dcache
1223 * @parent: parent of entries to prune
1224 *
1225 * Prune the dcache to remove unused children of the parent dentry.
1226 */
1227 
1228void shrink_dcache_parent(struct dentry * parent)
1229{
1230        struct super_block *sb = parent->d_sb;
1231        int found;
1232
1233        while ((found = select_parent(parent)) != 0)
1234                __shrink_dcache_sb(sb, &found, 0);
1235}
1236EXPORT_SYMBOL(shrink_dcache_parent);
1237
1238/*
1239 * Scan `nr' dentries and return the number which remain.
1240 *
1241 * We need to avoid reentering the filesystem if the caller is performing a
1242 * GFP_NOFS allocation attempt.  One example deadlock is:
1243 *
1244 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1245 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1246 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1247 *
1248 * In this case we return -1 to tell the caller that we baled.
1249 */
1250static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1251{
1252        if (nr) {
1253                if (!(gfp_mask & __GFP_FS))
1254                        return -1;
1255                prune_dcache(nr);
1256        }
1257
1258        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1259}
1260
1261static struct shrinker dcache_shrinker = {
1262        .shrink = shrink_dcache_memory,
1263        .seeks = DEFAULT_SEEKS,
1264};
1265
1266/**
1267 * d_alloc      -       allocate a dcache entry
1268 * @parent: parent of entry to allocate
1269 * @name: qstr of the name
1270 *
1271 * Allocates a dentry. It returns %NULL if there is insufficient memory
1272 * available. On a success the dentry is returned. The name passed in is
1273 * copied and the copy passed in may be reused after this call.
1274 */
1275 
1276struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1277{
1278        struct dentry *dentry;
1279        char *dname;
1280
1281        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1282        if (!dentry)
1283                return NULL;
1284
1285        if (name->len > DNAME_INLINE_LEN-1) {
1286                dname = kmalloc(name->len + 1, GFP_KERNEL);
1287                if (!dname) {
1288                        kmem_cache_free(dentry_cache, dentry); 
1289                        return NULL;
1290                }
1291        } else  {
1292                dname = dentry->d_iname;
1293        }       
1294        dentry->d_name.name = dname;
1295
1296        dentry->d_name.len = name->len;
1297        dentry->d_name.hash = name->hash;
1298        memcpy(dname, name->name, name->len);
1299        dname[name->len] = 0;
1300
1301        dentry->d_count = 1;
1302        dentry->d_flags = DCACHE_UNHASHED;
1303        spin_lock_init(&dentry->d_lock);
1304        seqcount_init(&dentry->d_seq);
1305        dentry->d_inode = NULL;
1306        dentry->d_parent = NULL;
1307        dentry->d_sb = NULL;
1308        dentry->d_op = NULL;
1309        dentry->d_fsdata = NULL;
1310        INIT_HLIST_BL_NODE(&dentry->d_hash);
1311        INIT_LIST_HEAD(&dentry->d_lru);
1312        INIT_LIST_HEAD(&dentry->d_subdirs);
1313        INIT_LIST_HEAD(&dentry->d_alias);
1314        INIT_LIST_HEAD(&dentry->d_u.d_child);
1315
1316        if (parent) {
1317                spin_lock(&parent->d_lock);
1318                /*
1319                 * don't need child lock because it is not subject
1320                 * to concurrency here
1321                 */
1322                __dget_dlock(parent);
1323                dentry->d_parent = parent;
1324                dentry->d_sb = parent->d_sb;
1325                d_set_d_op(dentry, dentry->d_sb->s_d_op);
1326                list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1327                spin_unlock(&parent->d_lock);
1328        }
1329
1330        this_cpu_inc(nr_dentry);
1331
1332        return dentry;
1333}
1334EXPORT_SYMBOL(d_alloc);
1335
1336struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1337{
1338        struct dentry *dentry = d_alloc(NULL, name);
1339        if (dentry) {
1340                dentry->d_sb = sb;
1341                d_set_d_op(dentry, dentry->d_sb->s_d_op);
1342                dentry->d_parent = dentry;
1343                dentry->d_flags |= DCACHE_DISCONNECTED;
1344        }
1345        return dentry;
1346}
1347EXPORT_SYMBOL(d_alloc_pseudo);
1348
1349struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1350{
1351        struct qstr q;
1352
1353        q.name = name;
1354        q.len = strlen(name);
1355        q.hash = full_name_hash(q.name, q.len);
1356        return d_alloc(parent, &q);
1357}
1358EXPORT_SYMBOL(d_alloc_name);
1359
1360void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1361{
1362        WARN_ON_ONCE(dentry->d_op);
1363        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1364                                DCACHE_OP_COMPARE       |
1365                                DCACHE_OP_REVALIDATE    |
1366                                DCACHE_OP_DELETE ));
1367        dentry->d_op = op;
1368        if (!op)
1369                return;
1370        if (op->d_hash)
1371                dentry->d_flags |= DCACHE_OP_HASH;
1372        if (op->d_compare)
1373                dentry->d_flags |= DCACHE_OP_COMPARE;
1374        if (op->d_revalidate)
1375                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1376        if (op->d_delete)
1377                dentry->d_flags |= DCACHE_OP_DELETE;
1378
1379}
1380EXPORT_SYMBOL(d_set_d_op);
1381
1382static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1383{
1384        spin_lock(&dentry->d_lock);
1385        if (inode) {
1386                if (unlikely(IS_AUTOMOUNT(inode)))
1387                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1388                list_add(&dentry->d_alias, &inode->i_dentry);
1389        }
1390        dentry->d_inode = inode;
1391        dentry_rcuwalk_barrier(dentry);
1392        spin_unlock(&dentry->d_lock);
1393        fsnotify_d_instantiate(dentry, inode);
1394}
1395
1396/**
1397 * d_instantiate - fill in inode information for a dentry
1398 * @entry: dentry to complete
1399 * @inode: inode to attach to this dentry
1400 *
1401 * Fill in inode information in the entry.
1402 *
1403 * This turns negative dentries into productive full members
1404 * of society.
1405 *
1406 * NOTE! This assumes that the inode count has been incremented
1407 * (or otherwise set) by the caller to indicate that it is now
1408 * in use by the dcache.
1409 */
1410 
1411void d_instantiate(struct dentry *entry, struct inode * inode)
1412{
1413        BUG_ON(!list_empty(&entry->d_alias));
1414        if (inode)
1415                spin_lock(&inode->i_lock);
1416        __d_instantiate(entry, inode);
1417        if (inode)
1418                spin_unlock(&inode->i_lock);
1419        security_d_instantiate(entry, inode);
1420}
1421EXPORT_SYMBOL(d_instantiate);
1422
1423/**
1424 * d_instantiate_unique - instantiate a non-aliased dentry
1425 * @entry: dentry to instantiate
1426 * @inode: inode to attach to this dentry
1427 *
1428 * Fill in inode information in the entry. On success, it returns NULL.
1429 * If an unhashed alias of "entry" already exists, then we return the
1430 * aliased dentry instead and drop one reference to inode.
1431 *
1432 * Note that in order to avoid conflicts with rename() etc, the caller
1433 * had better be holding the parent directory semaphore.
1434 *
1435 * This also assumes that the inode count has been incremented
1436 * (or otherwise set) by the caller to indicate that it is now
1437 * in use by the dcache.
1438 */
1439static struct dentry *__d_instantiate_unique(struct dentry *entry,
1440                                             struct inode *inode)
1441{
1442        struct dentry *alias;
1443        int len = entry->d_name.len;
1444        const char *name = entry->d_name.name;
1445        unsigned int hash = entry->d_name.hash;
1446
1447        if (!inode) {
1448                __d_instantiate(entry, NULL);
1449                return NULL;
1450        }
1451
1452        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1453                struct qstr *qstr = &alias->d_name;
1454
1455                /*
1456                 * Don't need alias->d_lock here, because aliases with
1457                 * d_parent == entry->d_parent are not subject to name or
1458                 * parent changes, because the parent inode i_mutex is held.
1459                 */
1460                if (qstr->hash != hash)
1461                        continue;
1462                if (alias->d_parent != entry->d_parent)
1463                        continue;
1464                if (dentry_cmp(qstr->name, qstr->len, name, len))
1465                        continue;
1466                __dget(alias);
1467                return alias;
1468        }
1469
1470        __d_instantiate(entry, inode);
1471        return NULL;
1472}
1473
1474struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1475{
1476        struct dentry *result;
1477
1478        BUG_ON(!list_empty(&entry->d_alias));
1479
1480        if (inode)
1481                spin_lock(&inode->i_lock);
1482        result = __d_instantiate_unique(entry, inode);
1483        if (inode)
1484                spin_unlock(&inode->i_lock);
1485
1486        if (!result) {
1487                security_d_instantiate(entry, inode);
1488                return NULL;
1489        }
1490
1491        BUG_ON(!d_unhashed(result));
1492        iput(inode);
1493        return result;
1494}
1495
1496EXPORT_SYMBOL(d_instantiate_unique);
1497
1498/**
1499 * d_alloc_root - allocate root dentry
1500 * @root_inode: inode to allocate the root for
1501 *
1502 * Allocate a root ("/") dentry for the inode given. The inode is
1503 * instantiated and returned. %NULL is returned if there is insufficient
1504 * memory or the inode passed is %NULL.
1505 */
1506 
1507struct dentry * d_alloc_root(struct inode * root_inode)
1508{
1509        struct dentry *res = NULL;
1510
1511        if (root_inode) {
1512                static const struct qstr name = { .name = "/", .len = 1 };
1513
1514                res = d_alloc(NULL, &name);
1515                if (res) {
1516                        res->d_sb = root_inode->i_sb;
1517                        d_set_d_op(res, res->d_sb->s_d_op);
1518                        res->d_parent = res;
1519                        d_instantiate(res, root_inode);
1520                }
1521        }
1522        return res;
1523}
1524EXPORT_SYMBOL(d_alloc_root);
1525
1526static struct dentry * __d_find_any_alias(struct inode *inode)
1527{
1528        struct dentry *alias;
1529
1530        if (list_empty(&inode->i_dentry))
1531                return NULL;
1532        alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1533        __dget(alias);
1534        return alias;
1535}
1536
1537static struct dentry * d_find_any_alias(struct inode *inode)
1538{
1539        struct dentry *de;
1540
1541        spin_lock(&inode->i_lock);
1542        de = __d_find_any_alias(inode);
1543        spin_unlock(&inode->i_lock);
1544        return de;
1545}
1546
1547
1548/**
1549 * d_obtain_alias - find or allocate a dentry for a given inode
1550 * @inode: inode to allocate the dentry for
1551 *
1552 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1553 * similar open by handle operations.  The returned dentry may be anonymous,
1554 * or may have a full name (if the inode was already in the cache).
1555 *
1556 * When called on a directory inode, we must ensure that the inode only ever
1557 * has one dentry.  If a dentry is found, that is returned instead of
1558 * allocating a new one.
1559 *
1560 * On successful return, the reference to the inode has been transferred
1561 * to the dentry.  In case of an error the reference on the inode is released.
1562 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1563 * be passed in and will be the error will be propagate to the return value,
1564 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1565 */
1566struct dentry *d_obtain_alias(struct inode *inode)
1567{
1568        static const struct qstr anonstring = { .name = "" };
1569        struct dentry *tmp;
1570        struct dentry *res;
1571
1572        if (!inode)
1573                return ERR_PTR(-ESTALE);
1574        if (IS_ERR(inode))
1575                return ERR_CAST(inode);
1576
1577        res = d_find_any_alias(inode);
1578        if (res)
1579                goto out_iput;
1580
1581        tmp = d_alloc(NULL, &anonstring);
1582        if (!tmp) {
1583                res = ERR_PTR(-ENOMEM);
1584                goto out_iput;
1585        }
1586        tmp->d_parent = tmp; /* make sure dput doesn't croak */
1587
1588
1589        spin_lock(&inode->i_lock);
1590        res = __d_find_any_alias(inode);
1591        if (res) {
1592                spin_unlock(&inode->i_lock);
1593                dput(tmp);
1594                goto out_iput;
1595        }
1596
1597        /* attach a disconnected dentry */
1598        spin_lock(&tmp->d_lock);
1599        tmp->d_sb = inode->i_sb;
1600        d_set_d_op(tmp, tmp->d_sb->s_d_op);
1601        tmp->d_inode = inode;
1602        tmp->d_flags |= DCACHE_DISCONNECTED;
1603        list_add(&tmp->d_alias, &inode->i_dentry);
1604        bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1605        tmp->d_flags &= ~DCACHE_UNHASHED;
1606        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1607        __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1608        spin_unlock(&tmp->d_lock);
1609        spin_unlock(&inode->i_lock);
1610
1611        return tmp;
1612
1613 out_iput:
1614        iput(inode);
1615        return res;
1616}
1617EXPORT_SYMBOL(d_obtain_alias);
1618
1619/**
1620 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1621 * @inode:  the inode which may have a disconnected dentry
1622 * @dentry: a negative dentry which we want to point to the inode.
1623 *
1624 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1625 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1626 * and return it, else simply d_add the inode to the dentry and return NULL.
1627 *
1628 * This is needed in the lookup routine of any filesystem that is exportable
1629 * (via knfsd) so that we can build dcache paths to directories effectively.
1630 *
1631 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1632 * is returned.  This matches the expected return value of ->lookup.
1633 *
1634 */
1635struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1636{
1637        struct dentry *new = NULL;
1638
1639        if (inode && S_ISDIR(inode->i_mode)) {
1640                spin_lock(&inode->i_lock);
1641                new = __d_find_alias(inode, 1);
1642                if (new) {
1643                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1644                        spin_unlock(&inode->i_lock);
1645                        security_d_instantiate(new, inode);
1646                        d_move(new, dentry);
1647                        iput(inode);
1648                } else {
1649                        /* already taking inode->i_lock, so d_add() by hand */
1650                        __d_instantiate(dentry, inode);
1651                        spin_unlock(&inode->i_lock);
1652                        security_d_instantiate(dentry, inode);
1653                        d_rehash(dentry);
1654                }
1655        } else
1656                d_add(dentry, inode);
1657        return new;
1658}
1659EXPORT_SYMBOL(d_splice_alias);
1660
1661/**
1662 * d_add_ci - lookup or allocate new dentry with case-exact name
1663 * @inode:  the inode case-insensitive lookup has found
1664 * @dentry: the negative dentry that was passed to the parent's lookup func
1665 * @name:   the case-exact name to be associated with the returned dentry
1666 *
1667 * This is to avoid filling the dcache with case-insensitive names to the
1668 * same inode, only the actual correct case is stored in the dcache for
1669 * case-insensitive filesystems.
1670 *
1671 * For a case-insensitive lookup match and if the the case-exact dentry
1672 * already exists in in the dcache, use it and return it.
1673 *
1674 * If no entry exists with the exact case name, allocate new dentry with
1675 * the exact case, and return the spliced entry.
1676 */
1677struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1678                        struct qstr *name)
1679{
1680        int error;
1681        struct dentry *found;
1682        struct dentry *new;
1683
1684        /*
1685         * First check if a dentry matching the name already exists,
1686         * if not go ahead and create it now.
1687         */
1688        found = d_hash_and_lookup(dentry->d_parent, name);
1689        if (!found) {
1690                new = d_alloc(dentry->d_parent, name);
1691                if (!new) {
1692                        error = -ENOMEM;
1693                        goto err_out;
1694                }
1695
1696                found = d_splice_alias(inode, new);
1697                if (found) {
1698                        dput(new);
1699                        return found;
1700                }
1701                return new;
1702        }
1703
1704        /*
1705         * If a matching dentry exists, and it's not negative use it.
1706         *
1707         * Decrement the reference count to balance the iget() done
1708         * earlier on.
1709         */
1710        if (found->d_inode) {
1711                if (unlikely(found->d_inode != inode)) {
1712                        /* This can't happen because bad inodes are unhashed. */
1713                        BUG_ON(!is_bad_inode(inode));
1714                        BUG_ON(!is_bad_inode(found->d_inode));
1715                }
1716                iput(inode);
1717                return found;
1718        }
1719
1720        /*
1721         * Negative dentry: instantiate it unless the inode is a directory and
1722         * already has a dentry.
1723         */
1724        spin_lock(&inode->i_lock);
1725        if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1726                __d_instantiate(found, inode);
1727                spin_unlock(&inode->i_lock);
1728                security_d_instantiate(found, inode);
1729                return found;
1730        }
1731
1732        /*
1733         * In case a directory already has a (disconnected) entry grab a
1734         * reference to it, move it in place and use it.
1735         */
1736        new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1737        __dget(new);
1738        spin_unlock(&inode->i_lock);
1739        security_d_instantiate(found, inode);
1740        d_move(new, found);
1741        iput(inode);
1742        dput(found);
1743        return new;
1744
1745err_out:
1746        iput(inode);
1747        return ERR_PTR(error);
1748}
1749EXPORT_SYMBOL(d_add_ci);
1750
1751/**
1752 * __d_lookup_rcu - search for a dentry (racy, store-free)
1753 * @parent: parent dentry
1754 * @name: qstr of name we wish to find
1755 * @seq: returns d_seq value at the point where the dentry was found
1756 * @inode: returns dentry->d_inode when the inode was found valid.
1757 * Returns: dentry, or NULL
1758 *
1759 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1760 * resolution (store-free path walking) design described in
1761 * Documentation/filesystems/path-lookup.txt.
1762 *
1763 * This is not to be used outside core vfs.
1764 *
1765 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1766 * held, and rcu_read_lock held. The returned dentry must not be stored into
1767 * without taking d_lock and checking d_seq sequence count against @seq
1768 * returned here.
1769 *
1770 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1771 * function.
1772 *
1773 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1774 * the returned dentry, so long as its parent's seqlock is checked after the
1775 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1776 * is formed, giving integrity down the path walk.
1777 */
1778struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1779                                unsigned *seq, struct inode **inode)
1780{
1781        unsigned int len = name->len;
1782        unsigned int hash = name->hash;
1783        const unsigned char *str = name->name;
1784        struct dcache_hash_bucket *b = d_hash(parent, hash);
1785        struct hlist_bl_node *node;
1786        struct dentry *dentry;
1787
1788        /*
1789         * Note: There is significant duplication with __d_lookup_rcu which is
1790         * required to prevent single threaded performance regressions
1791         * especially on architectures where smp_rmb (in seqcounts) are costly.
1792         * Keep the two functions in sync.
1793         */
1794
1795        /*
1796         * The hash list is protected using RCU.
1797         *
1798         * Carefully use d_seq when comparing a candidate dentry, to avoid
1799         * races with d_move().
1800         *
1801         * It is possible that concurrent renames can mess up our list
1802         * walk here and result in missing our dentry, resulting in the
1803         * false-negative result. d_lookup() protects against concurrent
1804         * renames using rename_lock seqlock.
1805         *
1806         * See Documentation/vfs/dcache-locking.txt for more details.
1807         */
1808        hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1809                struct inode *i;
1810                const char *tname;
1811                int tlen;
1812
1813                if (dentry->d_name.hash != hash)
1814                        continue;
1815
1816seqretry:
1817                *seq = read_seqcount_begin(&dentry->d_seq);
1818                if (dentry->d_parent != parent)
1819                        continue;
1820                if (d_unhashed(dentry))
1821                        continue;
1822                tlen = dentry->d_name.len;
1823                tname = dentry->d_name.name;
1824                i = dentry->d_inode;
1825                prefetch(tname);
1826                if (i)
1827                        prefetch(i);
1828                /*
1829                 * This seqcount check is required to ensure name and
1830                 * len are loaded atomically, so as not to walk off the
1831                 * edge of memory when walking. If we could load this
1832                 * atomically some other way, we could drop this check.
1833                 */
1834                if (read_seqcount_retry(&dentry->d_seq, *seq))
1835                        goto seqretry;
1836                if (parent->d_flags & DCACHE_OP_COMPARE) {
1837                        if (parent->d_op->d_compare(parent, *inode,
1838                                                dentry, i,
1839                                                tlen, tname, name))
1840                                continue;
1841                } else {
1842                        if (dentry_cmp(tname, tlen, str, len))
1843                                continue;
1844                }
1845                /*
1846                 * No extra seqcount check is required after the name
1847                 * compare. The caller must perform a seqcount check in
1848                 * order to do anything useful with the returned dentry
1849                 * anyway.
1850                 */
1851                *inode = i;
1852                return dentry;
1853        }
1854        return NULL;
1855}
1856
1857/**
1858 * d_lookup - search for a dentry
1859 * @parent: parent dentry
1860 * @name: qstr of name we wish to find
1861 * Returns: dentry, or NULL
1862 *
1863 * d_lookup searches the children of the parent dentry for the name in
1864 * question. If the dentry is found its reference count is incremented and the
1865 * dentry is returned. The caller must use dput to free the entry when it has
1866 * finished using it. %NULL is returned if the dentry does not exist.
1867 */
1868struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1869{
1870        struct dentry *dentry;
1871        unsigned seq;
1872
1873        do {
1874                seq = read_seqbegin(&rename_lock);
1875                dentry = __d_lookup(parent, name);
1876                if (dentry)
1877                        break;
1878        } while (read_seqretry(&rename_lock, seq));
1879        return dentry;
1880}
1881EXPORT_SYMBOL(d_lookup);
1882
1883/**
1884 * __d_lookup - search for a dentry (racy)
1885 * @parent: parent dentry
1886 * @name: qstr of name we wish to find
1887 * Returns: dentry, or NULL
1888 *
1889 * __d_lookup is like d_lookup, however it may (rarely) return a
1890 * false-negative result due to unrelated rename activity.
1891 *
1892 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1893 * however it must be used carefully, eg. with a following d_lookup in
1894 * the case of failure.
1895 *
1896 * __d_lookup callers must be commented.
1897 */
1898struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1899{
1900        unsigned int len = name->len;
1901        unsigned int hash = name->hash;
1902        const unsigned char *str = name->name;
1903        struct dcache_hash_bucket *b = d_hash(parent, hash);
1904        struct hlist_bl_node *node;
1905        struct dentry *found = NULL;
1906        struct dentry *dentry;
1907
1908        /*
1909         * Note: There is significant duplication with __d_lookup_rcu which is
1910         * required to prevent single threaded performance regressions
1911         * especially on architectures where smp_rmb (in seqcounts) are costly.
1912         * Keep the two functions in sync.
1913         */
1914
1915        /*
1916         * The hash list is protected using RCU.
1917         *
1918         * Take d_lock when comparing a candidate dentry, to avoid races
1919         * with d_move().
1920         *
1921         * It is possible that concurrent renames can mess up our list
1922         * walk here and result in missing our dentry, resulting in the
1923         * false-negative result. d_lookup() protects against concurrent
1924         * renames using rename_lock seqlock.
1925         *
1926         * See Documentation/vfs/dcache-locking.txt for more details.
1927         */
1928        rcu_read_lock();
1929        
1930        hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1931                const char *tname;
1932                int tlen;
1933
1934                if (dentry->d_name.hash != hash)
1935                        continue;
1936
1937                spin_lock(&dentry->d_lock);
1938                if (dentry->d_parent != parent)
1939                        goto next;
1940                if (d_unhashed(dentry))
1941                        goto next;
1942
1943                /*
1944                 * It is safe to compare names since d_move() cannot
1945                 * change the qstr (protected by d_lock).
1946                 */
1947                tlen = dentry->d_name.len;
1948                tname = dentry->d_name.name;
1949                if (parent->d_flags & DCACHE_OP_COMPARE) {
1950                        if (parent->d_op->d_compare(parent, parent->d_inode,
1951                                                dentry, dentry->d_inode,
1952                                                tlen, tname, name))
1953                                goto next;
1954                } else {
1955                        if (dentry_cmp(tname, tlen, str, len))
1956                                goto next;
1957                }
1958
1959                dentry->d_count++;
1960                found = dentry;
1961                spin_unlock(&dentry->d_lock);
1962                break;
1963next:
1964                spin_unlock(&dentry->d_lock);
1965        }
1966        rcu_read_unlock();
1967
1968        return found;
1969}
1970
1971/**
1972 * d_hash_and_lookup - hash the qstr then search for a dentry
1973 * @dir: Directory to search in
1974 * @name: qstr of name we wish to find
1975 *
1976 * On hash failure or on lookup failure NULL is returned.
1977 */
1978struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1979{
1980        struct dentry *dentry = NULL;
1981
1982        /*
1983         * Check for a fs-specific hash function. Note that we must
1984         * calculate the standard hash first, as the d_op->d_hash()
1985         * routine may choose to leave the hash value unchanged.
1986         */
1987        name->hash = full_name_hash(name->name, name->len);
1988        if (dir->d_flags & DCACHE_OP_HASH) {
1989                if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1990                        goto out;
1991        }
1992        dentry = d_lookup(dir, name);
1993out:
1994        return dentry;
1995}
1996
1997/**
1998 * d_validate - verify dentry provided from insecure source (deprecated)
1999 * @dentry: The dentry alleged to be valid child of @dparent
2000 * @dparent: The parent dentry (known to be valid)
2001 *
2002 * An insecure source has sent us a dentry, here we verify it and dget() it.
2003 * This is used by ncpfs in its readdir implementation.
2004 * Zero is returned in the dentry is invalid.
2005 *
2006 * This function is slow for big directories, and deprecated, do not use it.
2007 */
2008int d_validate(struct dentry *dentry, struct dentry *dparent)
2009{
2010        struct dentry *child;
2011
2012        spin_lock(&dparent->d_lock);
2013        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2014                if (dentry == child) {
2015                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2016                        __dget_dlock(dentry);
2017                        spin_unlock(&dentry->d_lock);
2018                        spin_unlock(&dparent->d_lock);
2019                        return 1;
2020                }
2021        }
2022        spin_unlock(&dparent->d_lock);
2023
2024        return 0;
2025}
2026EXPORT_SYMBOL(d_validate);
2027
2028/*
2029 * When a file is deleted, we have two options:
2030 * - turn this dentry into a negative dentry
2031 * - unhash this dentry and free it.
2032 *
2033 * Usually, we want to just turn this into
2034 * a negative dentry, but if anybody else is
2035 * currently using the dentry or the inode
2036 * we can't do that and we fall back on removing
2037 * it from the hash queues and waiting for
2038 * it to be deleted later when it has no users
2039 */
2040 
2041/**
2042 * d_delete - delete a dentry
2043 * @dentry: The dentry to delete
2044 *
2045 * Turn the dentry into a negative dentry if possible, otherwise
2046 * remove it from the hash queues so it can be deleted later
2047 */
2048 
2049void d_delete(struct dentry * dentry)
2050{
2051        struct inode *inode;
2052        int isdir = 0;
2053        /*
2054         * Are we the only user?
2055         */
2056again:
2057        spin_lock(&dentry->d_lock);
2058        inode = dentry->d_inode;
2059        isdir = S_ISDIR(inode->i_mode);
2060        if (dentry->d_count == 1) {
2061                if (inode && !spin_trylock(&inode->i_lock)) {
2062                        spin_unlock(&dentry->d_lock);
2063                        cpu_relax();
2064                        goto again;
2065                }
2066                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2067                dentry_unlink_inode(dentry);
2068                fsnotify_nameremove(dentry, isdir);
2069                return;
2070        }
2071
2072        if (!d_unhashed(dentry))
2073                __d_drop(dentry);
2074
2075        spin_unlock(&dentry->d_lock);
2076
2077        fsnotify_nameremove(dentry, isdir);
2078}
2079EXPORT_SYMBOL(d_delete);
2080
2081static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
2082{
2083        BUG_ON(!d_unhashed(entry));
2084        spin_lock_bucket(b);
2085        entry->d_flags &= ~DCACHE_UNHASHED;
2086        hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
2087        spin_unlock_bucket(b);
2088}
2089
2090static void _d_rehash(struct dentry * entry)
2091{
2092        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2093}
2094
2095/**
2096 * d_rehash     - add an entry back to the hash
2097 * @entry: dentry to add to the hash
2098 *
2099 * Adds a dentry to the hash according to its name.
2100 */
2101 
2102void d_rehash(struct dentry * entry)
2103{
2104        spin_lock(&entry->d_lock);
2105        _d_rehash(entry);
2106        spin_unlock(&entry->d_lock);
2107}
2108EXPORT_SYMBOL(d_rehash);
2109
2110/**
2111 * dentry_update_name_case - update case insensitive dentry with a new name
2112 * @dentry: dentry to be updated
2113 * @name: new name
2114 *
2115 * Update a case insensitive dentry with new case of name.
2116 *
2117 * dentry must have been returned by d_lookup with name @name. Old and new
2118 * name lengths must match (ie. no d_compare which allows mismatched name
2119 * lengths).
2120 *
2121 * Parent inode i_mutex must be held over d_lookup and into this call (to
2122 * keep renames and concurrent inserts, and readdir(2) away).
2123 */
2124void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2125{
2126        BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2127        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2128
2129        spin_lock(&dentry->d_lock);
2130        write_seqcount_begin(&dentry->d_seq);
2131        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2132        write_seqcount_end(&dentry->d_seq);
2133        spin_unlock(&dentry->d_lock);
2134}
2135EXPORT_SYMBOL(dentry_update_name_case);
2136
2137static void switch_names(struct dentry *dentry, struct dentry *target)
2138{
2139        if (dname_external(target)) {
2140                if (dname_external(dentry)) {
2141                        /*
2142                         * Both external: swap the pointers
2143                         */
2144                        swap(target->d_name.name, dentry->d_name.name);
2145                } else {
2146                        /*
2147                         * dentry:internal, target:external.  Steal target's
2148                         * storage and make target internal.
2149                         */
2150                        memcpy(target->d_iname, dentry->d_name.name,
2151                                        dentry->d_name.len + 1);
2152                        dentry->d_name.name = target->d_name.name;
2153                        target->d_name.name = target->d_iname;
2154                }
2155        } else {
2156                if (dname_external(dentry)) {
2157                        /*
2158                         * dentry:external, target:internal.  Give dentry's
2159                         * storage to target and make dentry internal
2160                         */
2161                        memcpy(dentry->d_iname, target->d_name.name,
2162                                        target->d_name.len + 1);
2163                        target->d_name.name = dentry->d_name.name;
2164                        dentry->d_name.name = dentry->d_iname;
2165                } else {
2166                        /*
2167                         * Both are internal.  Just copy target to dentry
2168                         */
2169                        memcpy(dentry->d_iname, target->d_name.name,
2170                                        target->d_name.len + 1);
2171                        dentry->d_name.len = target->d_name.len;
2172                        return;
2173                }
2174        }
2175        swap(dentry->d_name.len, target->d_name.len);
2176}
2177
2178static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2179{
2180        /*
2181         * XXXX: do we really need to take target->d_lock?
2182         */
2183        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2184                spin_lock(&target->d_parent->d_lock);
2185        else {
2186                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2187                        spin_lock(&dentry->d_parent->d_lock);
2188                        spin_lock_nested(&target->d_parent->d_lock,
2189                                                DENTRY_D_LOCK_NESTED);
2190                } else {
2191                        spin_lock(&target->d_parent->d_lock);
2192                        spin_lock_nested(&dentry->d_parent->d_lock,
2193                                                DENTRY_D_LOCK_NESTED);
2194                }
2195        }
2196        if (target < dentry) {
2197                spin_lock_nested(&target->d_lock, 2);
2198                spin_lock_nested(&dentry->d_lock, 3);
2199        } else {
2200                spin_lock_nested(&dentry->d_lock, 2);
2201                spin_lock_nested(&target->d_lock, 3);
2202        }
2203}
2204
2205static void dentry_unlock_parents_for_move(struct dentry *dentry,
2206                                        struct dentry *target)
2207{
2208        if (target->d_parent != dentry->d_parent)
2209                spin_unlock(&dentry->d_parent->d_lock);
2210        if (target->d_parent != target)
2211                spin_unlock(&target->d_parent->d_lock);
2212}
2213
2214/*
2215 * When switching names, the actual string doesn't strictly have to
2216 * be preserved in the target - because we're dropping the target
2217 * anyway. As such, we can just do a simple memcpy() to copy over
2218 * the new name before we switch.
2219 *
2220 * Note that we have to be a lot more careful about getting the hash
2221 * switched - we have to switch the hash value properly even if it
2222 * then no longer matches the actual (corrupted) string of the target.
2223 * The hash value has to match the hash queue that the dentry is on..
2224 */
2225/*
2226 * d_move - move a dentry
2227 * @dentry: entry to move
2228 * @target: new dentry
2229 *
2230 * Update the dcache to reflect the move of a file name. Negative
2231 * dcache entries should not be moved in this way.
2232 */
2233void d_move(struct dentry * dentry, struct dentry * target)
2234{
2235        if (!dentry->d_inode)
2236                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2237
2238        BUG_ON(d_ancestor(dentry, target));
2239        BUG_ON(d_ancestor(target, dentry));
2240
2241        write_seqlock(&rename_lock);
2242
2243        dentry_lock_for_move(dentry, target);
2244
2245        write_seqcount_begin(&dentry->d_seq);
2246        write_seqcount_begin(&target->d_seq);
2247
2248        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2249
2250        /*
2251         * Move the dentry to the target hash queue. Don't bother checking
2252         * for the same hash queue because of how unlikely it is.
2253         */
2254        __d_drop(dentry);
2255        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2256
2257        /* Unhash the target: dput() will then get rid of it */
2258        __d_drop(target);
2259
2260        list_del(&dentry->d_u.d_child);
2261        list_del(&target->d_u.d_child);
2262
2263        /* Switch the names.. */
2264        switch_names(dentry, target);
2265        swap(dentry->d_name.hash, target->d_name.hash);
2266
2267        /* ... and switch the parents */
2268        if (IS_ROOT(dentry)) {
2269                dentry->d_parent = target->d_parent;
2270                target->d_parent = target;
2271                INIT_LIST_HEAD(&target->d_u.d_child);
2272        } else {
2273                swap(dentry->d_parent, target->d_parent);
2274
2275                /* And add them back to the (new) parent lists */
2276                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2277        }
2278
2279        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2280
2281        write_seqcount_end(&target->d_seq);
2282        write_seqcount_end(&dentry->d_seq);
2283
2284        dentry_unlock_parents_for_move(dentry, target);
2285        spin_unlock(&target->d_lock);
2286        fsnotify_d_move(dentry);
2287        spin_unlock(&dentry->d_lock);
2288        write_sequnlock(&rename_lock);
2289}
2290EXPORT_SYMBOL(d_move);
2291
2292/**
2293 * d_ancestor - search for an ancestor
2294 * @p1: ancestor dentry
2295 * @p2: child dentry
2296 *
2297 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2298 * an ancestor of p2, else NULL.
2299 */
2300struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2301{
2302        struct dentry *p;
2303
2304        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2305                if (p->d_parent == p1)
2306                        return p;
2307        }
2308        return NULL;
2309}
2310
2311/*
2312 * This helper attempts to cope with remotely renamed directories
2313 *
2314 * It assumes that the caller is already holding
2315 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
2316 *
2317 * Note: If ever the locking in lock_rename() changes, then please
2318 * remember to update this too...
2319 */
2320static struct dentry *__d_unalias(struct inode *inode,
2321                struct dentry *dentry, struct dentry *alias)
2322{
2323        struct mutex *m1 = NULL, *m2 = NULL;
2324        struct dentry *ret;
2325
2326        /* If alias and dentry share a parent, then no extra locks required */
2327        if (alias->d_parent == dentry->d_parent)
2328                goto out_unalias;
2329
2330        /* Check for loops */
2331        ret = ERR_PTR(-ELOOP);
2332        if (d_ancestor(alias, dentry))
2333                goto out_err;
2334
2335        /* See lock_rename() */
2336        ret = ERR_PTR(-EBUSY);
2337        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2338                goto out_err;
2339        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2340        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2341                goto out_err;
2342        m2 = &alias->d_parent->d_inode->i_mutex;
2343out_unalias:
2344        d_move(alias, dentry);
2345        ret = alias;
2346out_err:
2347        spin_unlock(&inode->i_lock);
2348        if (m2)
2349                mutex_unlock(m2);
2350        if (m1)
2351                mutex_unlock(m1);
2352        return ret;
2353}
2354
2355/*
2356 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2357 * named dentry in place of the dentry to be replaced.
2358 * returns with anon->d_lock held!
2359 */
2360static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2361{
2362        struct dentry *dparent, *aparent;
2363
2364        dentry_lock_for_move(anon, dentry);
2365
2366        write_seqcount_begin(&dentry->d_seq);
2367        write_seqcount_begin(&anon->d_seq);
2368
2369        dparent = dentry->d_parent;
2370        aparent = anon->d_parent;
2371
2372        switch_names(dentry, anon);
2373        swap(dentry->d_name.hash, anon->d_name.hash);
2374
2375        dentry->d_parent = (aparent == anon) ? dentry : aparent;
2376        list_del(&dentry->d_u.d_child);
2377        if (!IS_ROOT(dentry))
2378                list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2379        else
2380                INIT_LIST_HEAD(&dentry->d_u.d_child);
2381
2382        anon->d_parent = (dparent == dentry) ? anon : dparent;
2383        list_del(&anon->d_u.d_child);
2384        if (!IS_ROOT(anon))
2385                list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2386        else
2387                INIT_LIST_HEAD(&anon->d_u.d_child);
2388
2389        write_seqcount_end(&dentry->d_seq);
2390        write_seqcount_end(&anon->d_seq);
2391
2392        dentry_unlock_parents_for_move(anon, dentry);
2393        spin_unlock(&dentry->d_lock);
2394
2395        /* anon->d_lock still locked, returns locked */
2396        anon->d_flags &= ~DCACHE_DISCONNECTED;
2397}
2398
2399/**
2400 * d_materialise_unique - introduce an inode into the tree
2401 * @dentry: candidate dentry
2402 * @inode: inode to bind to the dentry, to which aliases may be attached
2403 *
2404 * Introduces an dentry into the tree, substituting an extant disconnected
2405 * root directory alias in its place if there is one
2406 */
2407struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2408{
2409        struct dentry *actual;
2410
2411        BUG_ON(!d_unhashed(dentry));
2412
2413        if (!inode) {
2414                actual = dentry;
2415                __d_instantiate(dentry, NULL);
2416                d_rehash(actual);
2417                goto out_nolock;
2418        }
2419
2420        spin_lock(&inode->i_lock);
2421
2422        if (S_ISDIR(inode->i_mode)) {
2423                struct dentry *alias;
2424
2425                /* Does an aliased dentry already exist? */
2426                alias = __d_find_alias(inode, 0);
2427                if (alias) {
2428                        actual = alias;
2429                        /* Is this an anonymous mountpoint that we could splice
2430                         * into our tree? */
2431                        if (IS_ROOT(alias)) {
2432                                __d_materialise_dentry(dentry, alias);
2433                                __d_drop(alias);
2434                                goto found;
2435                        }
2436                        /* Nope, but we must(!) avoid directory aliasing */
2437                        actual = __d_unalias(inode, dentry, alias);
2438                        if (IS_ERR(actual))
2439                                dput(alias);
2440                        goto out_nolock;
2441                }
2442        }
2443
2444        /* Add a unique reference */
2445        actual = __d_instantiate_unique(dentry, inode);
2446        if (!actual)
2447                actual = dentry;
2448        else
2449                BUG_ON(!d_unhashed(actual));
2450
2451        spin_lock(&actual->d_lock);
2452found:
2453        _d_rehash(actual);
2454        spin_unlock(&actual->d_lock);
2455        spin_unlock(&inode->i_lock);
2456out_nolock:
2457        if (actual == dentry) {
2458                security_d_instantiate(dentry, inode);
2459                return NULL;
2460        }
2461
2462        iput(inode);
2463        return actual;
2464}
2465EXPORT_SYMBOL_GPL(d_materialise_unique);
2466
2467static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2468{
2469        *buflen -= namelen;
2470        if (*buflen < 0)
2471                return -ENAMETOOLONG;
2472        *buffer -= namelen;
2473        memcpy(*buffer, str, namelen);
2474        return 0;
2475}
2476
2477static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2478{
2479        return prepend(buffer, buflen, name->name, name->len);
2480}
2481
2482/**
2483 * prepend_path - Prepend path string to a buffer
2484 * @path: the dentry/vfsmount to report
2485 * @root: root vfsmnt/dentry (may be modified by this function)
2486 * @buffer: pointer to the end of the buffer
2487 * @buflen: pointer to buffer length
2488 *
2489 * Caller holds the rename_lock.
2490 *
2491 * If path is not reachable from the supplied root, then the value of
2492 * root is changed (without modifying refcounts).
2493 */
2494static int prepend_path(const struct path *path, struct path *root,
2495                        char **buffer, int *buflen)
2496{
2497        struct dentry *dentry = path->dentry;
2498        struct vfsmount *vfsmnt = path->mnt;
2499        bool slash = false;
2500        int error = 0;
2501
2502        br_read_lock(vfsmount_lock);
2503        while (dentry != root->dentry || vfsmnt != root->mnt) {
2504                struct dentry * parent;
2505
2506                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2507                        /* Global root? */
2508                        if (vfsmnt->mnt_parent == vfsmnt) {
2509                                goto global_root;
2510                        }
2511                        dentry = vfsmnt->mnt_mountpoint;
2512                        vfsmnt = vfsmnt->mnt_parent;
2513                        continue;
2514                }
2515                parent = dentry->d_parent;
2516                prefetch(parent);
2517                spin_lock(&dentry->d_lock);
2518                error = prepend_name(buffer, buflen, &dentry->d_name);
2519                spin_unlock(&dentry->d_lock);
2520                if (!error)
2521                        error = prepend(buffer, buflen, "/", 1);
2522                if (error)
2523                        break;
2524
2525                slash = true;
2526                dentry = parent;
2527        }
2528
2529out:
2530        if (!error && !slash)
2531                error = prepend(buffer, buflen, "/", 1);
2532
2533        br_read_unlock(vfsmount_lock);
2534        return error;
2535
2536global_root:
2537        /*
2538         * Filesystems needing to implement special "root names"
2539         * should do so with ->d_dname()
2540         */
2541        if (IS_ROOT(dentry) &&
2542            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2543                WARN(1, "Root dentry has weird name <%.*s>\n",
2544                     (int) dentry->d_name.len, dentry->d_name.name);
2545        }
2546        root->mnt = vfsmnt;
2547        root->dentry = dentry;
2548        goto out;
2549}
2550
2551/**
2552 * __d_path - return the path of a dentry
2553 * @path: the dentry/vfsmount to report
2554 * @root: root vfsmnt/dentry (may be modified by this function)
2555 * @buf: buffer to return value in
2556 * @buflen: buffer length
2557 *
2558 * Convert a dentry into an ASCII path name.
2559 *
2560 * Returns a pointer into the buffer or an error code if the
2561 * path was too long.
2562 *
2563 * "buflen" should be positive.
2564 *
2565 * If path is not reachable from the supplied root, then the value of
2566 * root is changed (without modifying refcounts).
2567 */
2568char *__d_path(const struct path *path, struct path *root,
2569               char *buf, int buflen)
2570{
2571        char *res = buf + buflen;
2572        int error;
2573
2574        prepend(&res, &buflen, "\0", 1);
2575        write_seqlock(&rename_lock);
2576        error = prepend_path(path, root, &res, &buflen);
2577        write_sequnlock(&rename_lock);
2578
2579        if (error)
2580                return ERR_PTR(error);
2581        return res;
2582}
2583
2584/*
2585 * same as __d_path but appends "(deleted)" for unlinked files.
2586 */
2587static int path_with_deleted(const struct path *path, struct path *root,
2588                                 char **buf, int *buflen)
2589{
2590        prepend(buf, buflen, "\0", 1);
2591        if (d_unlinked(path->dentry)) {
2592                int error = prepend(buf, buflen, " (deleted)", 10);
2593                if (error)
2594                        return error;
2595        }
2596
2597        return prepend_path(path, root, buf, buflen);
2598}
2599
2600static int prepend_unreachable(char **buffer, int *buflen)
2601{
2602        return prepend(buffer, buflen, "(unreachable)", 13);
2603}
2604
2605/**
2606 * d_path - return the path of a dentry
2607 * @path: path to report
2608 * @buf: buffer to return value in
2609 * @buflen: buffer length
2610 *
2611 * Convert a dentry into an ASCII path name. If the entry has been deleted
2612 * the string " (deleted)" is appended. Note that this is ambiguous.
2613 *
2614 * Returns a pointer into the buffer or an error code if the path was
2615 * too long. Note: Callers should use the returned pointer, not the passed
2616 * in buffer, to use the name! The implementation often starts at an offset
2617 * into the buffer, and may leave 0 bytes at the start.
2618 *
2619 * "buflen" should be positive.
2620 */
2621char *d_path(const struct path *path, char *buf, int buflen)
2622{
2623        char *res = buf + buflen;
2624        struct path root;
2625        struct path tmp;
2626        int error;
2627
2628        /*
2629         * We have various synthetic filesystems that never get mounted.  On
2630         * these filesystems dentries are never used for lookup purposes, and
2631         * thus don't need to be hashed.  They also don't need a name until a
2632         * user wants to identify the object in /proc/pid/fd/.  The little hack
2633         * below allows us to generate a name for these objects on demand:
2634         */
2635        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2636                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2637
2638        get_fs_root(current->fs, &root);
2639        write_seqlock(&rename_lock);
2640        tmp = root;
2641        error = path_with_deleted(path, &tmp, &res, &buflen);
2642        if (error)
2643                res = ERR_PTR(error);
2644        write_sequnlock(&rename_lock);
2645        path_put(&root);
2646        return res;
2647}
2648EXPORT_SYMBOL(d_path);
2649
2650/**
2651 * d_path_with_unreachable - return the path of a dentry
2652 * @path: path to report
2653 * @buf: buffer to return value in
2654 * @buflen: buffer length
2655 *
2656 * The difference from d_path() is that this prepends "(unreachable)"
2657 * to paths which are unreachable from the current process' root.
2658 */
2659char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2660{
2661        char *res = buf + buflen;
2662        struct path root;
2663        struct path tmp;
2664        int error;
2665
2666        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2667                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2668
2669        get_fs_root(current->fs, &root);
2670        write_seqlock(&rename_lock);
2671        tmp = root;
2672        error = path_with_deleted(path, &tmp, &res, &buflen);
2673        if (!error && !path_equal(&tmp, &root))
2674                error = prepend_unreachable(&res, &buflen);
2675        write_sequnlock(&rename_lock);
2676        path_put(&root);
2677        if (error)
2678                res =  ERR_PTR(error);
2679
2680        return res;
2681}
2682
2683/*
2684 * Helper function for dentry_operations.d_dname() members
2685 */
2686char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2687                        const char *fmt, ...)
2688{
2689        va_list args;
2690        char temp[64];
2691        int sz;
2692
2693        va_start(args, fmt);
2694        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2695        va_end(args);
2696
2697        if (sz > sizeof(temp) || sz > buflen)
2698                return ERR_PTR(-ENAMETOOLONG);
2699
2700        buffer += buflen - sz;
2701        return memcpy(buffer, temp, sz);
2702}
2703
2704/*
2705 * Write full pathname from the root of the filesystem into the buffer.
2706 */
2707static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2708{
2709        char *end = buf + buflen;
2710        char *retval;
2711
2712        prepend(&end, &buflen, "\0", 1);
2713        if (buflen < 1)
2714                goto Elong;
2715        /* Get '/' right */
2716        retval = end-1;
2717        *retval = '/';
2718
2719        while (!IS_ROOT(dentry)) {
2720                struct dentry *parent = dentry->d_parent;
2721                int error;
2722
2723                prefetch(parent);
2724                spin_lock(&dentry->d_lock);
2725                error = prepend_name(&end, &buflen, &dentry->d_name);
2726                spin_unlock(&dentry->d_lock);
2727                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2728                        goto Elong;
2729
2730                retval = end;
2731                dentry = parent;
2732        }
2733        return retval;
2734Elong:
2735        return ERR_PTR(-ENAMETOOLONG);
2736}
2737
2738char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2739{
2740        char *retval;
2741
2742        write_seqlock(&rename_lock);
2743        retval = __dentry_path(dentry, buf, buflen);
2744        write_sequnlock(&rename_lock);
2745
2746        return retval;
2747}
2748EXPORT_SYMBOL(dentry_path_raw);
2749
2750char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2751{
2752        char *p = NULL;
2753        char *retval;
2754
2755        write_seqlock(&rename_lock);
2756        if (d_unlinked(dentry)) {
2757                p = buf + buflen;
2758                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2759                        goto Elong;
2760                buflen++;
2761        }
2762        retval = __dentry_path(dentry, buf, buflen);
2763        write_sequnlock(&rename_lock);
2764        if (!IS_ERR(retval) && p)
2765                *p = '/';       /* restore '/' overriden with '\0' */
2766        return retval;
2767Elong:
2768        return ERR_PTR(-ENAMETOOLONG);
2769}
2770
2771/*
2772 * NOTE! The user-level library version returns a
2773 * character pointer. The kernel system call just
2774 * returns the length of the buffer filled (which
2775 * includes the ending '\0' character), or a negative
2776 * error value. So libc would do something like
2777 *
2778 *      char *getcwd(char * buf, size_t size)
2779 *      {
2780 *              int retval;
2781 *
2782 *              retval = sys_getcwd(buf, size);
2783 *              if (retval >= 0)
2784 *                      return buf;
2785 *              errno = -retval;
2786 *              return NULL;
2787 *      }
2788 */
2789SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2790{
2791        int error;
2792        struct path pwd, root;
2793        char *page = (char *) __get_free_page(GFP_USER);
2794
2795        if (!page)
2796                return -ENOMEM;
2797
2798        get_fs_root_and_pwd(current->fs, &root, &pwd);
2799
2800        error = -ENOENT;
2801        write_seqlock(&rename_lock);
2802        if (!d_unlinked(pwd.dentry)) {
2803                unsigned long len;
2804                struct path tmp = root;
2805                char *cwd = page + PAGE_SIZE;
2806                int buflen = PAGE_SIZE;
2807
2808                prepend(&cwd, &buflen, "\0", 1);
2809                error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2810                write_sequnlock(&rename_lock);
2811
2812                if (error)
2813                        goto out;
2814
2815                /* Unreachable from current root */
2816                if (!path_equal(&tmp, &root)) {
2817                        error = prepend_unreachable(&cwd, &buflen);
2818                        if (error)
2819                                goto out;
2820                }
2821
2822                error = -ERANGE;
2823                len = PAGE_SIZE + page - cwd;
2824                if (len <= size) {
2825                        error = len;
2826                        if (copy_to_user(buf, cwd, len))
2827                                error = -EFAULT;
2828                }
2829        } else {
2830                write_sequnlock(&rename_lock);
2831        }
2832
2833out:
2834        path_put(&pwd);
2835        path_put(&root);
2836        free_page((unsigned long) page);
2837        return error;
2838}
2839
2840/*
2841 * Test whether new_dentry is a subdirectory of old_dentry.
2842 *
2843 * Trivially implemented using the dcache structure
2844 */
2845
2846/**
2847 * is_subdir - is new dentry a subdirectory of old_dentry
2848 * @new_dentry: new dentry
2849 * @old_dentry: old dentry
2850 *
2851 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2852 * Returns 0 otherwise.
2853 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2854 */
2855  
2856int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2857{
2858        int result;
2859        unsigned seq;
2860
2861        if (new_dentry == old_dentry)
2862                return 1;
2863
2864        do {
2865                /* for restarting inner loop in case of seq retry */
2866                seq = read_seqbegin(&rename_lock);
2867                /*
2868                 * Need rcu_readlock to protect against the d_parent trashing
2869                 * due to d_move
2870                 */
2871                rcu_read_lock();
2872                if (d_ancestor(old_dentry, new_dentry))
2873                        result = 1;
2874                else
2875                        result = 0;
2876                rcu_read_unlock();
2877        } while (read_seqretry(&rename_lock, seq));
2878
2879        return result;
2880}
2881
2882int path_is_under(struct path *path1, struct path *path2)
2883{
2884        struct vfsmount *mnt = path1->mnt;
2885        struct dentry *dentry = path1->dentry;
2886        int res;
2887
2888        br_read_lock(vfsmount_lock);
2889        if (mnt != path2->mnt) {
2890                for (;;) {
2891                        if (mnt->mnt_parent == mnt) {
2892                                br_read_unlock(vfsmount_lock);
2893                                return 0;
2894                        }
2895                        if (mnt->mnt_parent == path2->mnt)
2896                                break;
2897                        mnt = mnt->mnt_parent;
2898                }
2899                dentry = mnt->mnt_mountpoint;
2900        }
2901        res = is_subdir(dentry, path2->dentry);
2902        br_read_unlock(vfsmount_lock);
2903        return res;
2904}
2905EXPORT_SYMBOL(path_is_under);
2906
2907void d_genocide(struct dentry *root)
2908{
2909        struct dentry *this_parent;
2910        struct list_head *next;
2911        unsigned seq;
2912        int locked = 0;
2913
2914        seq = read_seqbegin(&rename_lock);
2915again:
2916        this_parent = root;
2917        spin_lock(&this_parent->d_lock);
2918repeat:
2919        next = this_parent->d_subdirs.next;
2920resume:
2921        while (next != &this_parent->d_subdirs) {
2922                struct list_head *tmp = next;
2923                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2924                next = tmp->next;
2925
2926                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2927                if (d_unhashed(dentry) || !dentry->d_inode) {
2928                        spin_unlock(&dentry->d_lock);
2929                        continue;
2930                }
2931                if (!list_empty(&dentry->d_subdirs)) {
2932                        spin_unlock(&this_parent->d_lock);
2933                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2934                        this_parent = dentry;
2935                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2936                        goto repeat;
2937                }
2938                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2939                        dentry->d_flags |= DCACHE_GENOCIDE;
2940                        dentry->d_count--;
2941                }
2942                spin_unlock(&dentry->d_lock);
2943        }
2944        if (this_parent != root) {
2945                struct dentry *tmp;
2946                struct dentry *child;
2947
2948                tmp = this_parent->d_parent;
2949                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2950                        this_parent->d_flags |= DCACHE_GENOCIDE;
2951                        this_parent->d_count--;
2952                }
2953                rcu_read_lock();
2954                spin_unlock(&this_parent->d_lock);
2955                child = this_parent;
2956                this_parent = tmp;
2957                spin_lock(&this_parent->d_lock);
2958                /* might go back up the wrong parent if we have had a rename
2959                 * or deletion */
2960                if (this_parent != child->d_parent ||
2961                         (!locked && read_seqretry(&rename_lock, seq))) {
2962                        spin_unlock(&this_parent->d_lock);
2963                        rcu_read_unlock();
2964                        goto rename_retry;
2965                }
2966                rcu_read_unlock();
2967                next = child->d_u.d_child.next;
2968                goto resume;
2969        }
2970        spin_unlock(&this_parent->d_lock);
2971        if (!locked && read_seqretry(&rename_lock, seq))
2972                goto rename_retry;
2973        if (locked)
2974                write_sequnlock(&rename_lock);
2975        return;
2976
2977rename_retry:
2978        locked = 1;
2979        write_seqlock(&rename_lock);
2980        goto again;
2981}
2982
2983/**
2984 * find_inode_number - check for dentry with name
2985 * @dir: directory to check
2986 * @name: Name to find.
2987 *
2988 * Check whether a dentry already exists for the given name,
2989 * and return the inode number if it has an inode. Otherwise
2990 * 0 is returned.
2991 *
2992 * This routine is used to post-process directory listings for
2993 * filesystems using synthetic inode numbers, and is necessary
2994 * to keep getcwd() working.
2995 */
2996 
2997ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2998{
2999        struct dentry * dentry;
3000        ino_t ino = 0;
3001
3002        dentry = d_hash_and_lookup(dir, name);
3003        if (dentry) {
3004                if (dentry->d_inode)
3005                        ino = dentry->d_inode->i_ino;
3006                dput(dentry);
3007        }
3008        return ino;
3009}
3010EXPORT_SYMBOL(find_inode_number);
3011
3012static __initdata unsigned long dhash_entries;
3013static int __init set_dhash_entries(char *str)
3014{
3015        if (!str)
3016                return 0;
3017        dhash_entries = simple_strtoul(str, &str, 0);
3018        return 1;
3019}
3020__setup("dhash_entries=", set_dhash_entries);
3021
3022static void __init dcache_init_early(void)
3023{
3024        int loop;
3025
3026        /* If hashes are distributed across NUMA nodes, defer
3027         * hash allocation until vmalloc space is available.
3028         */
3029        if (hashdist)
3030                return;
3031
3032        dentry_hashtable =
3033                alloc_large_system_hash("Dentry cache",
3034                                        sizeof(struct dcache_hash_bucket),
3035                                        dhash_entries,
3036                                        13,
3037                                        HASH_EARLY,
3038                                        &d_hash_shift,
3039                                        &d_hash_mask,
3040                                        0);
3041
3042        for (loop = 0; loop < (1 << d_hash_shift); loop++)
3043                INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
3044}
3045
3046static void __init dcache_init(void)
3047{
3048        int loop;
3049
3050        /* 
3051         * A constructor could be added for stable state like the lists,
3052         * but it is probably not worth it because of the cache nature
3053         * of the dcache. 
3054         */
3055        dentry_cache = KMEM_CACHE(dentry,
3056                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3057        
3058        register_shrinker(&dcache_shrinker);
3059
3060        /* Hash may have been set up in dcache_init_early */
3061        if (!hashdist)
3062                return;
3063
3064        dentry_hashtable =
3065                alloc_large_system_hash("Dentry cache",
3066                                        sizeof(struct dcache_hash_bucket),
3067                                        dhash_entries,
3068                                        13,
3069                                        0,
3070                                        &d_hash_shift,
3071                                        &d_hash_mask,
3072                                        0);
3073
3074        for (loop = 0; loop < (1 << d_hash_shift); loop++)
3075                INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
3076}
3077
3078/* SLAB cache for __getname() consumers */
3079struct kmem_cache *names_cachep __read_mostly;
3080EXPORT_SYMBOL(names_cachep);
3081
3082EXPORT_SYMBOL(d_genocide);
3083
3084void __init vfs_caches_init_early(void)
3085{
3086        dcache_init_early();
3087        inode_init_early();
3088}
3089
3090void __init vfs_caches_init(unsigned long mempages)
3091{
3092        unsigned long reserve;
3093
3094        /* Base hash sizes on available memory, with a reserve equal to
3095           150% of current kernel size */
3096
3097        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3098        mempages -= reserve;
3099
3100        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3101                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3102
3103        dcache_init();
3104        inode_init();
3105        files_init(mempages);
3106        mnt_init();
3107        bdev_cache_init();
3108        chrdev_init();
3109}
3110