linux/fs/dcache.c
<<
>>
Prefs
   1/*
   2 * fs/dcache.c
   3 *
   4 * Complete reimplementation
   5 * (C) 1997 Thomas Schoebel-Theuer,
   6 * with heavy changes by Linus Torvalds
   7 */
   8
   9/*
  10 * Notes on the allocation strategy:
  11 *
  12 * The dcache is a master of the icache - whenever a dcache entry
  13 * exists, the inode will always exist. "iput()" is done either when
  14 * the dcache entry is deleted or garbage collected.
  15 */
  16
  17#include <linux/syscalls.h>
  18#include <linux/string.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/fsnotify.h>
  22#include <linux/slab.h>
  23#include <linux/init.h>
  24#include <linux/hash.h>
  25#include <linux/cache.h>
  26#include <linux/export.h>
  27#include <linux/mount.h>
  28#include <linux/file.h>
  29#include <asm/uaccess.h>
  30#include <linux/security.h>
  31#include <linux/seqlock.h>
  32#include <linux/swap.h>
  33#include <linux/bootmem.h>
  34#include <linux/fs_struct.h>
  35#include <linux/hardirq.h>
  36#include <linux/bit_spinlock.h>
  37#include <linux/rculist_bl.h>
  38#include <linux/prefetch.h>
  39#include <linux/ratelimit.h>
  40#include "internal.h"
  41#include "mount.h"
  42
  43/*
  44 * Usage:
  45 * dcache->d_inode->i_lock protects:
  46 *   - i_dentry, d_alias, d_inode of aliases
  47 * dcache_hash_bucket lock protects:
  48 *   - the dcache hash table
  49 * s_anon bl list spinlock protects:
  50 *   - the s_anon list (see __d_drop)
  51 * dcache_lru_lock protects:
  52 *   - the dcache lru lists and counters
  53 * d_lock protects:
  54 *   - d_flags
  55 *   - d_name
  56 *   - d_lru
  57 *   - d_count
  58 *   - d_unhashed()
  59 *   - d_parent and d_subdirs
  60 *   - childrens' d_child and d_parent
  61 *   - d_alias, d_inode
  62 *
  63 * Ordering:
  64 * dentry->d_inode->i_lock
  65 *   dentry->d_lock
  66 *     dcache_lru_lock
  67 *     dcache_hash_bucket lock
  68 *     s_anon lock
  69 *
  70 * If there is an ancestor relationship:
  71 * dentry->d_parent->...->d_parent->d_lock
  72 *   ...
  73 *     dentry->d_parent->d_lock
  74 *       dentry->d_lock
  75 *
  76 * If no ancestor relationship:
  77 * if (dentry1 < dentry2)
  78 *   dentry1->d_lock
  79 *     dentry2->d_lock
  80 */
  81int sysctl_vfs_cache_pressure __read_mostly = 100;
  82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
  83
  84static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
  85__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
  86
  87EXPORT_SYMBOL(rename_lock);
  88
  89static struct kmem_cache *dentry_cache __read_mostly;
  90
  91/*
  92 * This is the single most critical data structure when it comes
  93 * to the dcache: the hashtable for lookups. Somebody should try
  94 * to make this good - I've just made it work.
  95 *
  96 * This hash-function tries to avoid losing too many bits of hash
  97 * information, yet avoid using a prime hash-size or similar.
  98 */
  99#define D_HASHBITS     d_hash_shift
 100#define D_HASHMASK     d_hash_mask
 101
 102static unsigned int d_hash_mask __read_mostly;
 103static unsigned int d_hash_shift __read_mostly;
 104
 105static struct hlist_bl_head *dentry_hashtable __read_mostly;
 106
 107static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
 108                                        unsigned int hash)
 109{
 110        hash += (unsigned long) parent / L1_CACHE_BYTES;
 111        hash = hash + (hash >> D_HASHBITS);
 112        return dentry_hashtable + (hash & D_HASHMASK);
 113}
 114
 115/* Statistics gathering. */
 116struct dentry_stat_t dentry_stat = {
 117        .age_limit = 45,
 118};
 119
 120static DEFINE_PER_CPU(unsigned int, nr_dentry);
 121
 122#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 123static int get_nr_dentry(void)
 124{
 125        int i;
 126        int sum = 0;
 127        for_each_possible_cpu(i)
 128                sum += per_cpu(nr_dentry, i);
 129        return sum < 0 ? 0 : sum;
 130}
 131
 132int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 133                   size_t *lenp, loff_t *ppos)
 134{
 135        dentry_stat.nr_dentry = get_nr_dentry();
 136        return proc_dointvec(table, write, buffer, lenp, ppos);
 137}
 138#endif
 139
 140/*
 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 142 * The strings are both count bytes long, and count is non-zero.
 143 */
 144#ifdef CONFIG_DCACHE_WORD_ACCESS
 145
 146#include <asm/word-at-a-time.h>
 147/*
 148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
 149 * aligned allocation for this particular component. We don't
 150 * strictly need the load_unaligned_zeropad() safety, but it
 151 * doesn't hurt either.
 152 *
 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
 154 * need the careful unaligned handling.
 155 */
 156static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 157{
 158        unsigned long a,b,mask;
 159
 160        for (;;) {
 161                a = *(unsigned long *)cs;
 162                b = load_unaligned_zeropad(ct);
 163                if (tcount < sizeof(unsigned long))
 164                        break;
 165                if (unlikely(a != b))
 166                        return 1;
 167                cs += sizeof(unsigned long);
 168                ct += sizeof(unsigned long);
 169                tcount -= sizeof(unsigned long);
 170                if (!tcount)
 171                        return 0;
 172        }
 173        mask = ~(~0ul << tcount*8);
 174        return unlikely(!!((a ^ b) & mask));
 175}
 176
 177#else
 178
 179static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 180{
 181        do {
 182                if (*cs != *ct)
 183                        return 1;
 184                cs++;
 185                ct++;
 186                tcount--;
 187        } while (tcount);
 188        return 0;
 189}
 190
 191#endif
 192
 193static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 194{
 195        const unsigned char *cs;
 196        /*
 197         * Be careful about RCU walk racing with rename:
 198         * use ACCESS_ONCE to fetch the name pointer.
 199         *
 200         * NOTE! Even if a rename will mean that the length
 201         * was not loaded atomically, we don't care. The
 202         * RCU walk will check the sequence count eventually,
 203         * and catch it. And we won't overrun the buffer,
 204         * because we're reading the name pointer atomically,
 205         * and a dentry name is guaranteed to be properly
 206         * terminated with a NUL byte.
 207         *
 208         * End result: even if 'len' is wrong, we'll exit
 209         * early because the data cannot match (there can
 210         * be no NUL in the ct/tcount data)
 211         */
 212        cs = ACCESS_ONCE(dentry->d_name.name);
 213        smp_read_barrier_depends();
 214        return dentry_string_cmp(cs, ct, tcount);
 215}
 216
 217static void __d_free(struct rcu_head *head)
 218{
 219        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 220
 221        WARN_ON(!hlist_unhashed(&dentry->d_alias));
 222        if (dname_external(dentry))
 223                kfree(dentry->d_name.name);
 224        kmem_cache_free(dentry_cache, dentry); 
 225}
 226
 227/*
 228 * no locks, please.
 229 */
 230static void d_free(struct dentry *dentry)
 231{
 232        BUG_ON(dentry->d_lockref.count);
 233        this_cpu_dec(nr_dentry);
 234        if (dentry->d_op && dentry->d_op->d_release)
 235                dentry->d_op->d_release(dentry);
 236
 237        /* if dentry was never visible to RCU, immediate free is OK */
 238        if (!(dentry->d_flags & DCACHE_RCUACCESS))
 239                __d_free(&dentry->d_u.d_rcu);
 240        else
 241                call_rcu(&dentry->d_u.d_rcu, __d_free);
 242}
 243
 244/**
 245 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
 246 * @dentry: the target dentry
 247 * After this call, in-progress rcu-walk path lookup will fail. This
 248 * should be called after unhashing, and after changing d_inode (if
 249 * the dentry has not already been unhashed).
 250 */
 251static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
 252{
 253        assert_spin_locked(&dentry->d_lock);
 254        /* Go through a barrier */
 255        write_seqcount_barrier(&dentry->d_seq);
 256}
 257
 258/*
 259 * Release the dentry's inode, using the filesystem
 260 * d_iput() operation if defined. Dentry has no refcount
 261 * and is unhashed.
 262 */
 263static void dentry_iput(struct dentry * dentry)
 264        __releases(dentry->d_lock)
 265        __releases(dentry->d_inode->i_lock)
 266{
 267        struct inode *inode = dentry->d_inode;
 268        if (inode) {
 269                dentry->d_inode = NULL;
 270                hlist_del_init(&dentry->d_alias);
 271                spin_unlock(&dentry->d_lock);
 272                spin_unlock(&inode->i_lock);
 273                if (!inode->i_nlink)
 274                        fsnotify_inoderemove(inode);
 275                if (dentry->d_op && dentry->d_op->d_iput)
 276                        dentry->d_op->d_iput(dentry, inode);
 277                else
 278                        iput(inode);
 279        } else {
 280                spin_unlock(&dentry->d_lock);
 281        }
 282}
 283
 284/*
 285 * Release the dentry's inode, using the filesystem
 286 * d_iput() operation if defined. dentry remains in-use.
 287 */
 288static void dentry_unlink_inode(struct dentry * dentry)
 289        __releases(dentry->d_lock)
 290        __releases(dentry->d_inode->i_lock)
 291{
 292        struct inode *inode = dentry->d_inode;
 293        dentry->d_inode = NULL;
 294        hlist_del_init(&dentry->d_alias);
 295        dentry_rcuwalk_barrier(dentry);
 296        spin_unlock(&dentry->d_lock);
 297        spin_unlock(&inode->i_lock);
 298        if (!inode->i_nlink)
 299                fsnotify_inoderemove(inode);
 300        if (dentry->d_op && dentry->d_op->d_iput)
 301                dentry->d_op->d_iput(dentry, inode);
 302        else
 303                iput(inode);
 304}
 305
 306/*
 307 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
 308 */
 309static void dentry_lru_add(struct dentry *dentry)
 310{
 311        if (list_empty(&dentry->d_lru)) {
 312                spin_lock(&dcache_lru_lock);
 313                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 314                dentry->d_sb->s_nr_dentry_unused++;
 315                dentry_stat.nr_unused++;
 316                spin_unlock(&dcache_lru_lock);
 317        }
 318}
 319
 320static void __dentry_lru_del(struct dentry *dentry)
 321{
 322        list_del_init(&dentry->d_lru);
 323        dentry->d_flags &= ~DCACHE_SHRINK_LIST;
 324        dentry->d_sb->s_nr_dentry_unused--;
 325        dentry_stat.nr_unused--;
 326}
 327
 328/*
 329 * Remove a dentry with references from the LRU.
 330 */
 331static void dentry_lru_del(struct dentry *dentry)
 332{
 333        if (!list_empty(&dentry->d_lru)) {
 334                spin_lock(&dcache_lru_lock);
 335                __dentry_lru_del(dentry);
 336                spin_unlock(&dcache_lru_lock);
 337        }
 338}
 339
 340static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 341{
 342        spin_lock(&dcache_lru_lock);
 343        if (list_empty(&dentry->d_lru)) {
 344                list_add_tail(&dentry->d_lru, list);
 345                dentry->d_sb->s_nr_dentry_unused++;
 346                dentry_stat.nr_unused++;
 347        } else {
 348                list_move_tail(&dentry->d_lru, list);
 349        }
 350        spin_unlock(&dcache_lru_lock);
 351}
 352
 353/**
 354 * d_kill - kill dentry and return parent
 355 * @dentry: dentry to kill
 356 * @parent: parent dentry
 357 *
 358 * The dentry must already be unhashed and removed from the LRU.
 359 *
 360 * If this is the root of the dentry tree, return NULL.
 361 *
 362 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
 363 * d_kill.
 364 */
 365static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 366        __releases(dentry->d_lock)
 367        __releases(parent->d_lock)
 368        __releases(dentry->d_inode->i_lock)
 369{
 370        list_del(&dentry->d_u.d_child);
 371        /*
 372         * Inform try_to_ascend() that we are no longer attached to the
 373         * dentry tree
 374         */
 375        dentry->d_flags |= DCACHE_DENTRY_KILLED;
 376        if (parent)
 377                spin_unlock(&parent->d_lock);
 378        dentry_iput(dentry);
 379        /*
 380         * dentry_iput drops the locks, at which point nobody (except
 381         * transient RCU lookups) can reach this dentry.
 382         */
 383        d_free(dentry);
 384        return parent;
 385}
 386
 387/*
 388 * Unhash a dentry without inserting an RCU walk barrier or checking that
 389 * dentry->d_lock is locked.  The caller must take care of that, if
 390 * appropriate.
 391 */
 392static void __d_shrink(struct dentry *dentry)
 393{
 394        if (!d_unhashed(dentry)) {
 395                struct hlist_bl_head *b;
 396                if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
 397                        b = &dentry->d_sb->s_anon;
 398                else
 399                        b = d_hash(dentry->d_parent, dentry->d_name.hash);
 400
 401                hlist_bl_lock(b);
 402                __hlist_bl_del(&dentry->d_hash);
 403                dentry->d_hash.pprev = NULL;
 404                hlist_bl_unlock(b);
 405        }
 406}
 407
 408/**
 409 * d_drop - drop a dentry
 410 * @dentry: dentry to drop
 411 *
 412 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
 413 * be found through a VFS lookup any more. Note that this is different from
 414 * deleting the dentry - d_delete will try to mark the dentry negative if
 415 * possible, giving a successful _negative_ lookup, while d_drop will
 416 * just make the cache lookup fail.
 417 *
 418 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
 419 * reason (NFS timeouts or autofs deletes).
 420 *
 421 * __d_drop requires dentry->d_lock.
 422 */
 423void __d_drop(struct dentry *dentry)
 424{
 425        if (!d_unhashed(dentry)) {
 426                __d_shrink(dentry);
 427                dentry_rcuwalk_barrier(dentry);
 428        }
 429}
 430EXPORT_SYMBOL(__d_drop);
 431
 432void d_drop(struct dentry *dentry)
 433{
 434        spin_lock(&dentry->d_lock);
 435        __d_drop(dentry);
 436        spin_unlock(&dentry->d_lock);
 437}
 438EXPORT_SYMBOL(d_drop);
 439
 440/*
 441 * Finish off a dentry we've decided to kill.
 442 * dentry->d_lock must be held, returns with it unlocked.
 443 * If ref is non-zero, then decrement the refcount too.
 444 * Returns dentry requiring refcount drop, or NULL if we're done.
 445 */
 446static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
 447        __releases(dentry->d_lock)
 448{
 449        struct inode *inode;
 450        struct dentry *parent;
 451
 452        inode = dentry->d_inode;
 453        if (inode && !spin_trylock(&inode->i_lock)) {
 454relock:
 455                spin_unlock(&dentry->d_lock);
 456                cpu_relax();
 457                return dentry; /* try again with same dentry */
 458        }
 459        if (IS_ROOT(dentry))
 460                parent = NULL;
 461        else
 462                parent = dentry->d_parent;
 463        if (parent && !spin_trylock(&parent->d_lock)) {
 464                if (inode)
 465                        spin_unlock(&inode->i_lock);
 466                goto relock;
 467        }
 468
 469        if (ref)
 470                dentry->d_lockref.count--;
 471        /*
 472         * inform the fs via d_prune that this dentry is about to be
 473         * unhashed and destroyed.
 474         */
 475        if (dentry->d_flags & DCACHE_OP_PRUNE)
 476                dentry->d_op->d_prune(dentry);
 477
 478        dentry_lru_del(dentry);
 479        /* if it was on the hash then remove it */
 480        __d_drop(dentry);
 481        return d_kill(dentry, parent);
 482}
 483
 484/* 
 485 * This is dput
 486 *
 487 * This is complicated by the fact that we do not want to put
 488 * dentries that are no longer on any hash chain on the unused
 489 * list: we'd much rather just get rid of them immediately.
 490 *
 491 * However, that implies that we have to traverse the dentry
 492 * tree upwards to the parents which might _also_ now be
 493 * scheduled for deletion (it may have been only waiting for
 494 * its last child to go away).
 495 *
 496 * This tail recursion is done by hand as we don't want to depend
 497 * on the compiler to always get this right (gcc generally doesn't).
 498 * Real recursion would eat up our stack space.
 499 */
 500
 501/*
 502 * dput - release a dentry
 503 * @dentry: dentry to release 
 504 *
 505 * Release a dentry. This will drop the usage count and if appropriate
 506 * call the dentry unlink method as well as removing it from the queues and
 507 * releasing its resources. If the parent dentries were scheduled for release
 508 * they too may now get deleted.
 509 */
 510void dput(struct dentry *dentry)
 511{
 512        if (!dentry)
 513                return;
 514
 515repeat:
 516        if (dentry->d_lockref.count == 1)
 517                might_sleep();
 518        if (lockref_put_or_lock(&dentry->d_lockref))
 519                return;
 520
 521        if (dentry->d_flags & DCACHE_OP_DELETE) {
 522                if (dentry->d_op->d_delete(dentry))
 523                        goto kill_it;
 524        }
 525
 526        /* Unreachable? Get rid of it */
 527        if (d_unhashed(dentry))
 528                goto kill_it;
 529
 530        dentry->d_flags |= DCACHE_REFERENCED;
 531        dentry_lru_add(dentry);
 532
 533        dentry->d_lockref.count--;
 534        spin_unlock(&dentry->d_lock);
 535        return;
 536
 537kill_it:
 538        dentry = dentry_kill(dentry, 1);
 539        if (dentry)
 540                goto repeat;
 541}
 542EXPORT_SYMBOL(dput);
 543
 544/**
 545 * d_invalidate - invalidate a dentry
 546 * @dentry: dentry to invalidate
 547 *
 548 * Try to invalidate the dentry if it turns out to be
 549 * possible. If there are other dentries that can be
 550 * reached through this one we can't delete it and we
 551 * return -EBUSY. On success we return 0.
 552 *
 553 * no dcache lock.
 554 */
 555 
 556int d_invalidate(struct dentry * dentry)
 557{
 558        /*
 559         * If it's already been dropped, return OK.
 560         */
 561        spin_lock(&dentry->d_lock);
 562        if (d_unhashed(dentry)) {
 563                spin_unlock(&dentry->d_lock);
 564                return 0;
 565        }
 566        /*
 567         * Check whether to do a partial shrink_dcache
 568         * to get rid of unused child entries.
 569         */
 570        if (!list_empty(&dentry->d_subdirs)) {
 571                spin_unlock(&dentry->d_lock);
 572                shrink_dcache_parent(dentry);
 573                spin_lock(&dentry->d_lock);
 574        }
 575
 576        /*
 577         * Somebody else still using it?
 578         *
 579         * If it's a directory, we can't drop it
 580         * for fear of somebody re-populating it
 581         * with children (even though dropping it
 582         * would make it unreachable from the root,
 583         * we might still populate it if it was a
 584         * working directory or similar).
 585         * We also need to leave mountpoints alone,
 586         * directory or not.
 587         */
 588        if (dentry->d_lockref.count > 1 && dentry->d_inode) {
 589                if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
 590                        spin_unlock(&dentry->d_lock);
 591                        return -EBUSY;
 592                }
 593        }
 594
 595        __d_drop(dentry);
 596        spin_unlock(&dentry->d_lock);
 597        return 0;
 598}
 599EXPORT_SYMBOL(d_invalidate);
 600
 601/* This must be called with d_lock held */
 602static inline void __dget_dlock(struct dentry *dentry)
 603{
 604        dentry->d_lockref.count++;
 605}
 606
 607static inline void __dget(struct dentry *dentry)
 608{
 609        lockref_get(&dentry->d_lockref);
 610}
 611
 612struct dentry *dget_parent(struct dentry *dentry)
 613{
 614        struct dentry *ret;
 615
 616repeat:
 617        /*
 618         * Don't need rcu_dereference because we re-check it was correct under
 619         * the lock.
 620         */
 621        rcu_read_lock();
 622        ret = dentry->d_parent;
 623        spin_lock(&ret->d_lock);
 624        if (unlikely(ret != dentry->d_parent)) {
 625                spin_unlock(&ret->d_lock);
 626                rcu_read_unlock();
 627                goto repeat;
 628        }
 629        rcu_read_unlock();
 630        BUG_ON(!ret->d_lockref.count);
 631        ret->d_lockref.count++;
 632        spin_unlock(&ret->d_lock);
 633        return ret;
 634}
 635EXPORT_SYMBOL(dget_parent);
 636
 637/**
 638 * d_find_alias - grab a hashed alias of inode
 639 * @inode: inode in question
 640 * @want_discon:  flag, used by d_splice_alias, to request
 641 *          that only a DISCONNECTED alias be returned.
 642 *
 643 * If inode has a hashed alias, or is a directory and has any alias,
 644 * acquire the reference to alias and return it. Otherwise return NULL.
 645 * Notice that if inode is a directory there can be only one alias and
 646 * it can be unhashed only if it has no children, or if it is the root
 647 * of a filesystem.
 648 *
 649 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
 650 * any other hashed alias over that one unless @want_discon is set,
 651 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
 652 */
 653static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 654{
 655        struct dentry *alias, *discon_alias;
 656
 657again:
 658        discon_alias = NULL;
 659        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
 660                spin_lock(&alias->d_lock);
 661                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 662                        if (IS_ROOT(alias) &&
 663                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 664                                discon_alias = alias;
 665                        } else if (!want_discon) {
 666                                __dget_dlock(alias);
 667                                spin_unlock(&alias->d_lock);
 668                                return alias;
 669                        }
 670                }
 671                spin_unlock(&alias->d_lock);
 672        }
 673        if (discon_alias) {
 674                alias = discon_alias;
 675                spin_lock(&alias->d_lock);
 676                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 677                        if (IS_ROOT(alias) &&
 678                            (alias->d_flags & DCACHE_DISCONNECTED)) {
 679                                __dget_dlock(alias);
 680                                spin_unlock(&alias->d_lock);
 681                                return alias;
 682                        }
 683                }
 684                spin_unlock(&alias->d_lock);
 685                goto again;
 686        }
 687        return NULL;
 688}
 689
 690struct dentry *d_find_alias(struct inode *inode)
 691{
 692        struct dentry *de = NULL;
 693
 694        if (!hlist_empty(&inode->i_dentry)) {
 695                spin_lock(&inode->i_lock);
 696                de = __d_find_alias(inode, 0);
 697                spin_unlock(&inode->i_lock);
 698        }
 699        return de;
 700}
 701EXPORT_SYMBOL(d_find_alias);
 702
 703/*
 704 *      Try to kill dentries associated with this inode.
 705 * WARNING: you must own a reference to inode.
 706 */
 707void d_prune_aliases(struct inode *inode)
 708{
 709        struct dentry *dentry;
 710restart:
 711        spin_lock(&inode->i_lock);
 712        hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 713                spin_lock(&dentry->d_lock);
 714                if (!dentry->d_lockref.count) {
 715                        __dget_dlock(dentry);
 716                        __d_drop(dentry);
 717                        spin_unlock(&dentry->d_lock);
 718                        spin_unlock(&inode->i_lock);
 719                        dput(dentry);
 720                        goto restart;
 721                }
 722                spin_unlock(&dentry->d_lock);
 723        }
 724        spin_unlock(&inode->i_lock);
 725}
 726EXPORT_SYMBOL(d_prune_aliases);
 727
 728/*
 729 * Try to throw away a dentry - free the inode, dput the parent.
 730 * Requires dentry->d_lock is held, and dentry->d_count == 0.
 731 * Releases dentry->d_lock.
 732 *
 733 * This may fail if locks cannot be acquired no problem, just try again.
 734 */
 735static void try_prune_one_dentry(struct dentry *dentry)
 736        __releases(dentry->d_lock)
 737{
 738        struct dentry *parent;
 739
 740        parent = dentry_kill(dentry, 0);
 741        /*
 742         * If dentry_kill returns NULL, we have nothing more to do.
 743         * if it returns the same dentry, trylocks failed. In either
 744         * case, just loop again.
 745         *
 746         * Otherwise, we need to prune ancestors too. This is necessary
 747         * to prevent quadratic behavior of shrink_dcache_parent(), but
 748         * is also expected to be beneficial in reducing dentry cache
 749         * fragmentation.
 750         */
 751        if (!parent)
 752                return;
 753        if (parent == dentry)
 754                return;
 755
 756        /* Prune ancestors. */
 757        dentry = parent;
 758        while (dentry) {
 759                if (lockref_put_or_lock(&dentry->d_lockref))
 760                        return;
 761                dentry = dentry_kill(dentry, 1);
 762        }
 763}
 764
 765static void shrink_dentry_list(struct list_head *list)
 766{
 767        struct dentry *dentry;
 768
 769        rcu_read_lock();
 770        for (;;) {
 771                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
 772                if (&dentry->d_lru == list)
 773                        break; /* empty */
 774                spin_lock(&dentry->d_lock);
 775                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
 776                        spin_unlock(&dentry->d_lock);
 777                        continue;
 778                }
 779
 780                /*
 781                 * We found an inuse dentry which was not removed from
 782                 * the LRU because of laziness during lookup.  Do not free
 783                 * it - just keep it off the LRU list.
 784                 */
 785                if (dentry->d_lockref.count) {
 786                        dentry_lru_del(dentry);
 787                        spin_unlock(&dentry->d_lock);
 788                        continue;
 789                }
 790
 791                rcu_read_unlock();
 792
 793                try_prune_one_dentry(dentry);
 794
 795                rcu_read_lock();
 796        }
 797        rcu_read_unlock();
 798}
 799
 800/**
 801 * prune_dcache_sb - shrink the dcache
 802 * @sb: superblock
 803 * @count: number of entries to try to free
 804 *
 805 * Attempt to shrink the superblock dcache LRU by @count entries. This is
 806 * done when we need more memory an called from the superblock shrinker
 807 * function.
 808 *
 809 * This function may fail to free any resources if all the dentries are in
 810 * use.
 811 */
 812void prune_dcache_sb(struct super_block *sb, int count)
 813{
 814        struct dentry *dentry;
 815        LIST_HEAD(referenced);
 816        LIST_HEAD(tmp);
 817
 818relock:
 819        spin_lock(&dcache_lru_lock);
 820        while (!list_empty(&sb->s_dentry_lru)) {
 821                dentry = list_entry(sb->s_dentry_lru.prev,
 822                                struct dentry, d_lru);
 823                BUG_ON(dentry->d_sb != sb);
 824
 825                if (!spin_trylock(&dentry->d_lock)) {
 826                        spin_unlock(&dcache_lru_lock);
 827                        cpu_relax();
 828                        goto relock;
 829                }
 830
 831                if (dentry->d_flags & DCACHE_REFERENCED) {
 832                        dentry->d_flags &= ~DCACHE_REFERENCED;
 833                        list_move(&dentry->d_lru, &referenced);
 834                        spin_unlock(&dentry->d_lock);
 835                } else {
 836                        list_move_tail(&dentry->d_lru, &tmp);
 837                        dentry->d_flags |= DCACHE_SHRINK_LIST;
 838                        spin_unlock(&dentry->d_lock);
 839                        if (!--count)
 840                                break;
 841                }
 842                cond_resched_lock(&dcache_lru_lock);
 843        }
 844        if (!list_empty(&referenced))
 845                list_splice(&referenced, &sb->s_dentry_lru);
 846        spin_unlock(&dcache_lru_lock);
 847
 848        shrink_dentry_list(&tmp);
 849}
 850
 851/**
 852 * shrink_dcache_sb - shrink dcache for a superblock
 853 * @sb: superblock
 854 *
 855 * Shrink the dcache for the specified super block. This is used to free
 856 * the dcache before unmounting a file system.
 857 */
 858void shrink_dcache_sb(struct super_block *sb)
 859{
 860        LIST_HEAD(tmp);
 861
 862        spin_lock(&dcache_lru_lock);
 863        while (!list_empty(&sb->s_dentry_lru)) {
 864                list_splice_init(&sb->s_dentry_lru, &tmp);
 865                spin_unlock(&dcache_lru_lock);
 866                shrink_dentry_list(&tmp);
 867                spin_lock(&dcache_lru_lock);
 868        }
 869        spin_unlock(&dcache_lru_lock);
 870}
 871EXPORT_SYMBOL(shrink_dcache_sb);
 872
 873/*
 874 * destroy a single subtree of dentries for unmount
 875 * - see the comments on shrink_dcache_for_umount() for a description of the
 876 *   locking
 877 */
 878static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 879{
 880        struct dentry *parent;
 881
 882        BUG_ON(!IS_ROOT(dentry));
 883
 884        for (;;) {
 885                /* descend to the first leaf in the current subtree */
 886                while (!list_empty(&dentry->d_subdirs))
 887                        dentry = list_entry(dentry->d_subdirs.next,
 888                                            struct dentry, d_u.d_child);
 889
 890                /* consume the dentries from this leaf up through its parents
 891                 * until we find one with children or run out altogether */
 892                do {
 893                        struct inode *inode;
 894
 895                        /*
 896                         * inform the fs that this dentry is about to be
 897                         * unhashed and destroyed.
 898                         */
 899                        if (dentry->d_flags & DCACHE_OP_PRUNE)
 900                                dentry->d_op->d_prune(dentry);
 901
 902                        dentry_lru_del(dentry);
 903                        __d_shrink(dentry);
 904
 905                        if (dentry->d_lockref.count != 0) {
 906                                printk(KERN_ERR
 907                                       "BUG: Dentry %p{i=%lx,n=%s}"
 908                                       " still in use (%d)"
 909                                       " [unmount of %s %s]\n",
 910                                       dentry,
 911                                       dentry->d_inode ?
 912                                       dentry->d_inode->i_ino : 0UL,
 913                                       dentry->d_name.name,
 914                                       dentry->d_lockref.count,
 915                                       dentry->d_sb->s_type->name,
 916                                       dentry->d_sb->s_id);
 917                                BUG();
 918                        }
 919
 920                        if (IS_ROOT(dentry)) {
 921                                parent = NULL;
 922                                list_del(&dentry->d_u.d_child);
 923                        } else {
 924                                parent = dentry->d_parent;
 925                                parent->d_lockref.count--;
 926                                list_del(&dentry->d_u.d_child);
 927                        }
 928
 929                        inode = dentry->d_inode;
 930                        if (inode) {
 931                                dentry->d_inode = NULL;
 932                                hlist_del_init(&dentry->d_alias);
 933                                if (dentry->d_op && dentry->d_op->d_iput)
 934                                        dentry->d_op->d_iput(dentry, inode);
 935                                else
 936                                        iput(inode);
 937                        }
 938
 939                        d_free(dentry);
 940
 941                        /* finished when we fall off the top of the tree,
 942                         * otherwise we ascend to the parent and move to the
 943                         * next sibling if there is one */
 944                        if (!parent)
 945                                return;
 946                        dentry = parent;
 947                } while (list_empty(&dentry->d_subdirs));
 948
 949                dentry = list_entry(dentry->d_subdirs.next,
 950                                    struct dentry, d_u.d_child);
 951        }
 952}
 953
 954/*
 955 * destroy the dentries attached to a superblock on unmounting
 956 * - we don't need to use dentry->d_lock because:
 957 *   - the superblock is detached from all mountings and open files, so the
 958 *     dentry trees will not be rearranged by the VFS
 959 *   - s_umount is write-locked, so the memory pressure shrinker will ignore
 960 *     any dentries belonging to this superblock that it comes across
 961 *   - the filesystem itself is no longer permitted to rearrange the dentries
 962 *     in this superblock
 963 */
 964void shrink_dcache_for_umount(struct super_block *sb)
 965{
 966        struct dentry *dentry;
 967
 968        if (down_read_trylock(&sb->s_umount))
 969                BUG();
 970
 971        dentry = sb->s_root;
 972        sb->s_root = NULL;
 973        dentry->d_lockref.count--;
 974        shrink_dcache_for_umount_subtree(dentry);
 975
 976        while (!hlist_bl_empty(&sb->s_anon)) {
 977                dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 978                shrink_dcache_for_umount_subtree(dentry);
 979        }
 980}
 981
 982/*
 983 * This tries to ascend one level of parenthood, but
 984 * we can race with renaming, so we need to re-check
 985 * the parenthood after dropping the lock and check
 986 * that the sequence number still matches.
 987 */
 988static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
 989{
 990        struct dentry *new = old->d_parent;
 991
 992        rcu_read_lock();
 993        spin_unlock(&old->d_lock);
 994        spin_lock(&new->d_lock);
 995
 996        /*
 997         * might go back up the wrong parent if we have had a rename
 998         * or deletion
 999         */
1000        if (new != old->d_parent ||
1001                 (old->d_flags & DCACHE_DENTRY_KILLED) ||
1002                 (!locked && read_seqretry(&rename_lock, seq))) {
1003                spin_unlock(&new->d_lock);
1004                new = NULL;
1005        }
1006        rcu_read_unlock();
1007        return new;
1008}
1009
1010
1011/*
1012 * Search for at least 1 mount point in the dentry's subdirs.
1013 * We descend to the next level whenever the d_subdirs
1014 * list is non-empty and continue searching.
1015 */
1016 
1017/**
1018 * have_submounts - check for mounts over a dentry
1019 * @parent: dentry to check.
1020 *
1021 * Return true if the parent or its subdirectories contain
1022 * a mount point
1023 */
1024int have_submounts(struct dentry *parent)
1025{
1026        struct dentry *this_parent;
1027        struct list_head *next;
1028        unsigned seq;
1029        int locked = 0;
1030
1031        seq = read_seqbegin(&rename_lock);
1032again:
1033        this_parent = parent;
1034
1035        if (d_mountpoint(parent))
1036                goto positive;
1037        spin_lock(&this_parent->d_lock);
1038repeat:
1039        next = this_parent->d_subdirs.next;
1040resume:
1041        while (next != &this_parent->d_subdirs) {
1042                struct list_head *tmp = next;
1043                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1044                next = tmp->next;
1045
1046                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1047                /* Have we found a mount point ? */
1048                if (d_mountpoint(dentry)) {
1049                        spin_unlock(&dentry->d_lock);
1050                        spin_unlock(&this_parent->d_lock);
1051                        goto positive;
1052                }
1053                if (!list_empty(&dentry->d_subdirs)) {
1054                        spin_unlock(&this_parent->d_lock);
1055                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1056                        this_parent = dentry;
1057                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1058                        goto repeat;
1059                }
1060                spin_unlock(&dentry->d_lock);
1061        }
1062        /*
1063         * All done at this level ... ascend and resume the search.
1064         */
1065        if (this_parent != parent) {
1066                struct dentry *child = this_parent;
1067                this_parent = try_to_ascend(this_parent, locked, seq);
1068                if (!this_parent)
1069                        goto rename_retry;
1070                next = child->d_u.d_child.next;
1071                goto resume;
1072        }
1073        spin_unlock(&this_parent->d_lock);
1074        if (!locked && read_seqretry(&rename_lock, seq))
1075                goto rename_retry;
1076        if (locked)
1077                write_sequnlock(&rename_lock);
1078        return 0; /* No mount points found in tree */
1079positive:
1080        if (!locked && read_seqretry(&rename_lock, seq))
1081                goto rename_retry;
1082        if (locked)
1083                write_sequnlock(&rename_lock);
1084        return 1;
1085
1086rename_retry:
1087        if (locked)
1088                goto again;
1089        locked = 1;
1090        write_seqlock(&rename_lock);
1091        goto again;
1092}
1093EXPORT_SYMBOL(have_submounts);
1094
1095/*
1096 * Search the dentry child list of the specified parent,
1097 * and move any unused dentries to the end of the unused
1098 * list for prune_dcache(). We descend to the next level
1099 * whenever the d_subdirs list is non-empty and continue
1100 * searching.
1101 *
1102 * It returns zero iff there are no unused children,
1103 * otherwise  it returns the number of children moved to
1104 * the end of the unused list. This may not be the total
1105 * number of unused children, because select_parent can
1106 * drop the lock and return early due to latency
1107 * constraints.
1108 */
1109static int select_parent(struct dentry *parent, struct list_head *dispose)
1110{
1111        struct dentry *this_parent;
1112        struct list_head *next;
1113        unsigned seq;
1114        int found = 0;
1115        int locked = 0;
1116
1117        seq = read_seqbegin(&rename_lock);
1118again:
1119        this_parent = parent;
1120        spin_lock(&this_parent->d_lock);
1121repeat:
1122        next = this_parent->d_subdirs.next;
1123resume:
1124        while (next != &this_parent->d_subdirs) {
1125                struct list_head *tmp = next;
1126                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1127                next = tmp->next;
1128
1129                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1130
1131                /*
1132                 * move only zero ref count dentries to the dispose list.
1133                 *
1134                 * Those which are presently on the shrink list, being processed
1135                 * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
1136                 * loop in shrink_dcache_parent() might not make any progress
1137                 * and loop forever.
1138                 */
1139                if (dentry->d_lockref.count) {
1140                        dentry_lru_del(dentry);
1141                } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
1142                        dentry_lru_move_list(dentry, dispose);
1143                        dentry->d_flags |= DCACHE_SHRINK_LIST;
1144                        found++;
1145                }
1146                /*
1147                 * We can return to the caller if we have found some (this
1148                 * ensures forward progress). We'll be coming back to find
1149                 * the rest.
1150                 */
1151                if (found && need_resched()) {
1152                        spin_unlock(&dentry->d_lock);
1153                        goto out;
1154                }
1155
1156                /*
1157                 * Descend a level if the d_subdirs list is non-empty.
1158                 */
1159                if (!list_empty(&dentry->d_subdirs)) {
1160                        spin_unlock(&this_parent->d_lock);
1161                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1162                        this_parent = dentry;
1163                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1164                        goto repeat;
1165                }
1166
1167                spin_unlock(&dentry->d_lock);
1168        }
1169        /*
1170         * All done at this level ... ascend and resume the search.
1171         */
1172        if (this_parent != parent) {
1173                struct dentry *child = this_parent;
1174                this_parent = try_to_ascend(this_parent, locked, seq);
1175                if (!this_parent)
1176                        goto rename_retry;
1177                next = child->d_u.d_child.next;
1178                goto resume;
1179        }
1180out:
1181        spin_unlock(&this_parent->d_lock);
1182        if (!locked && read_seqretry(&rename_lock, seq))
1183                goto rename_retry;
1184        if (locked)
1185                write_sequnlock(&rename_lock);
1186        return found;
1187
1188rename_retry:
1189        if (found)
1190                return found;
1191        if (locked)
1192                goto again;
1193        locked = 1;
1194        write_seqlock(&rename_lock);
1195        goto again;
1196}
1197
1198/**
1199 * shrink_dcache_parent - prune dcache
1200 * @parent: parent of entries to prune
1201 *
1202 * Prune the dcache to remove unused children of the parent dentry.
1203 */
1204void shrink_dcache_parent(struct dentry * parent)
1205{
1206        LIST_HEAD(dispose);
1207        int found;
1208
1209        while ((found = select_parent(parent, &dispose)) != 0) {
1210                shrink_dentry_list(&dispose);
1211                cond_resched();
1212        }
1213}
1214EXPORT_SYMBOL(shrink_dcache_parent);
1215
1216/**
1217 * __d_alloc    -       allocate a dcache entry
1218 * @sb: filesystem it will belong to
1219 * @name: qstr of the name
1220 *
1221 * Allocates a dentry. It returns %NULL if there is insufficient memory
1222 * available. On a success the dentry is returned. The name passed in is
1223 * copied and the copy passed in may be reused after this call.
1224 */
1225 
1226struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1227{
1228        struct dentry *dentry;
1229        char *dname;
1230
1231        dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1232        if (!dentry)
1233                return NULL;
1234
1235        /*
1236         * We guarantee that the inline name is always NUL-terminated.
1237         * This way the memcpy() done by the name switching in rename
1238         * will still always have a NUL at the end, even if we might
1239         * be overwriting an internal NUL character
1240         */
1241        dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1242        if (name->len > DNAME_INLINE_LEN-1) {
1243                dname = kmalloc(name->len + 1, GFP_KERNEL);
1244                if (!dname) {
1245                        kmem_cache_free(dentry_cache, dentry); 
1246                        return NULL;
1247                }
1248        } else  {
1249                dname = dentry->d_iname;
1250        }       
1251
1252        dentry->d_name.len = name->len;
1253        dentry->d_name.hash = name->hash;
1254        memcpy(dname, name->name, name->len);
1255        dname[name->len] = 0;
1256
1257        /* Make sure we always see the terminating NUL character */
1258        smp_wmb();
1259        dentry->d_name.name = dname;
1260
1261        dentry->d_lockref.count = 1;
1262        dentry->d_flags = 0;
1263        spin_lock_init(&dentry->d_lock);
1264        seqcount_init(&dentry->d_seq);
1265        dentry->d_inode = NULL;
1266        dentry->d_parent = dentry;
1267        dentry->d_sb = sb;
1268        dentry->d_op = NULL;
1269        dentry->d_fsdata = NULL;
1270        INIT_HLIST_BL_NODE(&dentry->d_hash);
1271        INIT_LIST_HEAD(&dentry->d_lru);
1272        INIT_LIST_HEAD(&dentry->d_subdirs);
1273        INIT_HLIST_NODE(&dentry->d_alias);
1274        INIT_LIST_HEAD(&dentry->d_u.d_child);
1275        d_set_d_op(dentry, dentry->d_sb->s_d_op);
1276
1277        this_cpu_inc(nr_dentry);
1278
1279        return dentry;
1280}
1281
1282/**
1283 * d_alloc      -       allocate a dcache entry
1284 * @parent: parent of entry to allocate
1285 * @name: qstr of the name
1286 *
1287 * Allocates a dentry. It returns %NULL if there is insufficient memory
1288 * available. On a success the dentry is returned. The name passed in is
1289 * copied and the copy passed in may be reused after this call.
1290 */
1291struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1292{
1293        struct dentry *dentry = __d_alloc(parent->d_sb, name);
1294        if (!dentry)
1295                return NULL;
1296
1297        spin_lock(&parent->d_lock);
1298        /*
1299         * don't need child lock because it is not subject
1300         * to concurrency here
1301         */
1302        __dget_dlock(parent);
1303        dentry->d_parent = parent;
1304        list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1305        spin_unlock(&parent->d_lock);
1306
1307        return dentry;
1308}
1309EXPORT_SYMBOL(d_alloc);
1310
1311struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1312{
1313        struct dentry *dentry = __d_alloc(sb, name);
1314        if (dentry)
1315                dentry->d_flags |= DCACHE_DISCONNECTED;
1316        return dentry;
1317}
1318EXPORT_SYMBOL(d_alloc_pseudo);
1319
1320struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1321{
1322        struct qstr q;
1323
1324        q.name = name;
1325        q.len = strlen(name);
1326        q.hash = full_name_hash(q.name, q.len);
1327        return d_alloc(parent, &q);
1328}
1329EXPORT_SYMBOL(d_alloc_name);
1330
1331void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1332{
1333        WARN_ON_ONCE(dentry->d_op);
1334        WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1335                                DCACHE_OP_COMPARE       |
1336                                DCACHE_OP_REVALIDATE    |
1337                                DCACHE_OP_WEAK_REVALIDATE       |
1338                                DCACHE_OP_DELETE ));
1339        dentry->d_op = op;
1340        if (!op)
1341                return;
1342        if (op->d_hash)
1343                dentry->d_flags |= DCACHE_OP_HASH;
1344        if (op->d_compare)
1345                dentry->d_flags |= DCACHE_OP_COMPARE;
1346        if (op->d_revalidate)
1347                dentry->d_flags |= DCACHE_OP_REVALIDATE;
1348        if (op->d_weak_revalidate)
1349                dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1350        if (op->d_delete)
1351                dentry->d_flags |= DCACHE_OP_DELETE;
1352        if (op->d_prune)
1353                dentry->d_flags |= DCACHE_OP_PRUNE;
1354
1355}
1356EXPORT_SYMBOL(d_set_d_op);
1357
1358static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1359{
1360        spin_lock(&dentry->d_lock);
1361        if (inode) {
1362                if (unlikely(IS_AUTOMOUNT(inode)))
1363                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1364                hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1365        }
1366        dentry->d_inode = inode;
1367        dentry_rcuwalk_barrier(dentry);
1368        spin_unlock(&dentry->d_lock);
1369        fsnotify_d_instantiate(dentry, inode);
1370}
1371
1372/**
1373 * d_instantiate - fill in inode information for a dentry
1374 * @entry: dentry to complete
1375 * @inode: inode to attach to this dentry
1376 *
1377 * Fill in inode information in the entry.
1378 *
1379 * This turns negative dentries into productive full members
1380 * of society.
1381 *
1382 * NOTE! This assumes that the inode count has been incremented
1383 * (or otherwise set) by the caller to indicate that it is now
1384 * in use by the dcache.
1385 */
1386 
1387void d_instantiate(struct dentry *entry, struct inode * inode)
1388{
1389        BUG_ON(!hlist_unhashed(&entry->d_alias));
1390        if (inode)
1391                spin_lock(&inode->i_lock);
1392        __d_instantiate(entry, inode);
1393        if (inode)
1394                spin_unlock(&inode->i_lock);
1395        security_d_instantiate(entry, inode);
1396}
1397EXPORT_SYMBOL(d_instantiate);
1398
1399/**
1400 * d_instantiate_unique - instantiate a non-aliased dentry
1401 * @entry: dentry to instantiate
1402 * @inode: inode to attach to this dentry
1403 *
1404 * Fill in inode information in the entry. On success, it returns NULL.
1405 * If an unhashed alias of "entry" already exists, then we return the
1406 * aliased dentry instead and drop one reference to inode.
1407 *
1408 * Note that in order to avoid conflicts with rename() etc, the caller
1409 * had better be holding the parent directory semaphore.
1410 *
1411 * This also assumes that the inode count has been incremented
1412 * (or otherwise set) by the caller to indicate that it is now
1413 * in use by the dcache.
1414 */
1415static struct dentry *__d_instantiate_unique(struct dentry *entry,
1416                                             struct inode *inode)
1417{
1418        struct dentry *alias;
1419        int len = entry->d_name.len;
1420        const char *name = entry->d_name.name;
1421        unsigned int hash = entry->d_name.hash;
1422
1423        if (!inode) {
1424                __d_instantiate(entry, NULL);
1425                return NULL;
1426        }
1427
1428        hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
1429                /*
1430                 * Don't need alias->d_lock here, because aliases with
1431                 * d_parent == entry->d_parent are not subject to name or
1432                 * parent changes, because the parent inode i_mutex is held.
1433                 */
1434                if (alias->d_name.hash != hash)
1435                        continue;
1436                if (alias->d_parent != entry->d_parent)
1437                        continue;
1438                if (alias->d_name.len != len)
1439                        continue;
1440                if (dentry_cmp(alias, name, len))
1441                        continue;
1442                __dget(alias);
1443                return alias;
1444        }
1445
1446        __d_instantiate(entry, inode);
1447        return NULL;
1448}
1449
1450struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1451{
1452        struct dentry *result;
1453
1454        BUG_ON(!hlist_unhashed(&entry->d_alias));
1455
1456        if (inode)
1457                spin_lock(&inode->i_lock);
1458        result = __d_instantiate_unique(entry, inode);
1459        if (inode)
1460                spin_unlock(&inode->i_lock);
1461
1462        if (!result) {
1463                security_d_instantiate(entry, inode);
1464                return NULL;
1465        }
1466
1467        BUG_ON(!d_unhashed(result));
1468        iput(inode);
1469        return result;
1470}
1471
1472EXPORT_SYMBOL(d_instantiate_unique);
1473
1474struct dentry *d_make_root(struct inode *root_inode)
1475{
1476        struct dentry *res = NULL;
1477
1478        if (root_inode) {
1479                static const struct qstr name = QSTR_INIT("/", 1);
1480
1481                res = __d_alloc(root_inode->i_sb, &name);
1482                if (res)
1483                        d_instantiate(res, root_inode);
1484                else
1485                        iput(root_inode);
1486        }
1487        return res;
1488}
1489EXPORT_SYMBOL(d_make_root);
1490
1491static struct dentry * __d_find_any_alias(struct inode *inode)
1492{
1493        struct dentry *alias;
1494
1495        if (hlist_empty(&inode->i_dentry))
1496                return NULL;
1497        alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
1498        __dget(alias);
1499        return alias;
1500}
1501
1502/**
1503 * d_find_any_alias - find any alias for a given inode
1504 * @inode: inode to find an alias for
1505 *
1506 * If any aliases exist for the given inode, take and return a
1507 * reference for one of them.  If no aliases exist, return %NULL.
1508 */
1509struct dentry *d_find_any_alias(struct inode *inode)
1510{
1511        struct dentry *de;
1512
1513        spin_lock(&inode->i_lock);
1514        de = __d_find_any_alias(inode);
1515        spin_unlock(&inode->i_lock);
1516        return de;
1517}
1518EXPORT_SYMBOL(d_find_any_alias);
1519
1520/**
1521 * d_obtain_alias - find or allocate a dentry for a given inode
1522 * @inode: inode to allocate the dentry for
1523 *
1524 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1525 * similar open by handle operations.  The returned dentry may be anonymous,
1526 * or may have a full name (if the inode was already in the cache).
1527 *
1528 * When called on a directory inode, we must ensure that the inode only ever
1529 * has one dentry.  If a dentry is found, that is returned instead of
1530 * allocating a new one.
1531 *
1532 * On successful return, the reference to the inode has been transferred
1533 * to the dentry.  In case of an error the reference on the inode is released.
1534 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1535 * be passed in and will be the error will be propagate to the return value,
1536 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1537 */
1538struct dentry *d_obtain_alias(struct inode *inode)
1539{
1540        static const struct qstr anonstring = QSTR_INIT("/", 1);
1541        struct dentry *tmp;
1542        struct dentry *res;
1543
1544        if (!inode)
1545                return ERR_PTR(-ESTALE);
1546        if (IS_ERR(inode))
1547                return ERR_CAST(inode);
1548
1549        res = d_find_any_alias(inode);
1550        if (res)
1551                goto out_iput;
1552
1553        tmp = __d_alloc(inode->i_sb, &anonstring);
1554        if (!tmp) {
1555                res = ERR_PTR(-ENOMEM);
1556                goto out_iput;
1557        }
1558
1559        spin_lock(&inode->i_lock);
1560        res = __d_find_any_alias(inode);
1561        if (res) {
1562                spin_unlock(&inode->i_lock);
1563                dput(tmp);
1564                goto out_iput;
1565        }
1566
1567        /* attach a disconnected dentry */
1568        spin_lock(&tmp->d_lock);
1569        tmp->d_inode = inode;
1570        tmp->d_flags |= DCACHE_DISCONNECTED;
1571        hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1572        hlist_bl_lock(&tmp->d_sb->s_anon);
1573        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1574        hlist_bl_unlock(&tmp->d_sb->s_anon);
1575        spin_unlock(&tmp->d_lock);
1576        spin_unlock(&inode->i_lock);
1577        security_d_instantiate(tmp, inode);
1578
1579        return tmp;
1580
1581 out_iput:
1582        if (res && !IS_ERR(res))
1583                security_d_instantiate(res, inode);
1584        iput(inode);
1585        return res;
1586}
1587EXPORT_SYMBOL(d_obtain_alias);
1588
1589/**
1590 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1591 * @inode:  the inode which may have a disconnected dentry
1592 * @dentry: a negative dentry which we want to point to the inode.
1593 *
1594 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1595 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1596 * and return it, else simply d_add the inode to the dentry and return NULL.
1597 *
1598 * This is needed in the lookup routine of any filesystem that is exportable
1599 * (via knfsd) so that we can build dcache paths to directories effectively.
1600 *
1601 * If a dentry was found and moved, then it is returned.  Otherwise NULL
1602 * is returned.  This matches the expected return value of ->lookup.
1603 *
1604 * Cluster filesystems may call this function with a negative, hashed dentry.
1605 * In that case, we know that the inode will be a regular file, and also this
1606 * will only occur during atomic_open. So we need to check for the dentry
1607 * being already hashed only in the final case.
1608 */
1609struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1610{
1611        struct dentry *new = NULL;
1612
1613        if (IS_ERR(inode))
1614                return ERR_CAST(inode);
1615
1616        if (inode && S_ISDIR(inode->i_mode)) {
1617                spin_lock(&inode->i_lock);
1618                new = __d_find_alias(inode, 1);
1619                if (new) {
1620                        BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1621                        spin_unlock(&inode->i_lock);
1622                        security_d_instantiate(new, inode);
1623                        d_move(new, dentry);
1624                        iput(inode);
1625                } else {
1626                        /* already taking inode->i_lock, so d_add() by hand */
1627                        __d_instantiate(dentry, inode);
1628                        spin_unlock(&inode->i_lock);
1629                        security_d_instantiate(dentry, inode);
1630                        d_rehash(dentry);
1631                }
1632        } else {
1633                d_instantiate(dentry, inode);
1634                if (d_unhashed(dentry))
1635                        d_rehash(dentry);
1636        }
1637        return new;
1638}
1639EXPORT_SYMBOL(d_splice_alias);
1640
1641/**
1642 * d_add_ci - lookup or allocate new dentry with case-exact name
1643 * @inode:  the inode case-insensitive lookup has found
1644 * @dentry: the negative dentry that was passed to the parent's lookup func
1645 * @name:   the case-exact name to be associated with the returned dentry
1646 *
1647 * This is to avoid filling the dcache with case-insensitive names to the
1648 * same inode, only the actual correct case is stored in the dcache for
1649 * case-insensitive filesystems.
1650 *
1651 * For a case-insensitive lookup match and if the the case-exact dentry
1652 * already exists in in the dcache, use it and return it.
1653 *
1654 * If no entry exists with the exact case name, allocate new dentry with
1655 * the exact case, and return the spliced entry.
1656 */
1657struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1658                        struct qstr *name)
1659{
1660        struct dentry *found;
1661        struct dentry *new;
1662
1663        /*
1664         * First check if a dentry matching the name already exists,
1665         * if not go ahead and create it now.
1666         */
1667        found = d_hash_and_lookup(dentry->d_parent, name);
1668        if (unlikely(IS_ERR(found)))
1669                goto err_out;
1670        if (!found) {
1671                new = d_alloc(dentry->d_parent, name);
1672                if (!new) {
1673                        found = ERR_PTR(-ENOMEM);
1674                        goto err_out;
1675                }
1676
1677                found = d_splice_alias(inode, new);
1678                if (found) {
1679                        dput(new);
1680                        return found;
1681                }
1682                return new;
1683        }
1684
1685        /*
1686         * If a matching dentry exists, and it's not negative use it.
1687         *
1688         * Decrement the reference count to balance the iget() done
1689         * earlier on.
1690         */
1691        if (found->d_inode) {
1692                if (unlikely(found->d_inode != inode)) {
1693                        /* This can't happen because bad inodes are unhashed. */
1694                        BUG_ON(!is_bad_inode(inode));
1695                        BUG_ON(!is_bad_inode(found->d_inode));
1696                }
1697                iput(inode);
1698                return found;
1699        }
1700
1701        /*
1702         * Negative dentry: instantiate it unless the inode is a directory and
1703         * already has a dentry.
1704         */
1705        new = d_splice_alias(inode, found);
1706        if (new) {
1707                dput(found);
1708                found = new;
1709        }
1710        return found;
1711
1712err_out:
1713        iput(inode);
1714        return found;
1715}
1716EXPORT_SYMBOL(d_add_ci);
1717
1718/*
1719 * Do the slow-case of the dentry name compare.
1720 *
1721 * Unlike the dentry_cmp() function, we need to atomically
1722 * load the name and length information, so that the
1723 * filesystem can rely on them, and can use the 'name' and
1724 * 'len' information without worrying about walking off the
1725 * end of memory etc.
1726 *
1727 * Thus the read_seqcount_retry() and the "duplicate" info
1728 * in arguments (the low-level filesystem should not look
1729 * at the dentry inode or name contents directly, since
1730 * rename can change them while we're in RCU mode).
1731 */
1732enum slow_d_compare {
1733        D_COMP_OK,
1734        D_COMP_NOMATCH,
1735        D_COMP_SEQRETRY,
1736};
1737
1738static noinline enum slow_d_compare slow_dentry_cmp(
1739                const struct dentry *parent,
1740                struct dentry *dentry,
1741                unsigned int seq,
1742                const struct qstr *name)
1743{
1744        int tlen = dentry->d_name.len;
1745        const char *tname = dentry->d_name.name;
1746
1747        if (read_seqcount_retry(&dentry->d_seq, seq)) {
1748                cpu_relax();
1749                return D_COMP_SEQRETRY;
1750        }
1751        if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
1752                return D_COMP_NOMATCH;
1753        return D_COMP_OK;
1754}
1755
1756/**
1757 * __d_lookup_rcu - search for a dentry (racy, store-free)
1758 * @parent: parent dentry
1759 * @name: qstr of name we wish to find
1760 * @seqp: returns d_seq value at the point where the dentry was found
1761 * Returns: dentry, or NULL
1762 *
1763 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1764 * resolution (store-free path walking) design described in
1765 * Documentation/filesystems/path-lookup.txt.
1766 *
1767 * This is not to be used outside core vfs.
1768 *
1769 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1770 * held, and rcu_read_lock held. The returned dentry must not be stored into
1771 * without taking d_lock and checking d_seq sequence count against @seq
1772 * returned here.
1773 *
1774 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1775 * function.
1776 *
1777 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1778 * the returned dentry, so long as its parent's seqlock is checked after the
1779 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1780 * is formed, giving integrity down the path walk.
1781 *
1782 * NOTE! The caller *has* to check the resulting dentry against the sequence
1783 * number we've returned before using any of the resulting dentry state!
1784 */
1785struct dentry *__d_lookup_rcu(const struct dentry *parent,
1786                                const struct qstr *name,
1787                                unsigned *seqp)
1788{
1789        u64 hashlen = name->hash_len;
1790        const unsigned char *str = name->name;
1791        struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
1792        struct hlist_bl_node *node;
1793        struct dentry *dentry;
1794
1795        /*
1796         * Note: There is significant duplication with __d_lookup_rcu which is
1797         * required to prevent single threaded performance regressions
1798         * especially on architectures where smp_rmb (in seqcounts) are costly.
1799         * Keep the two functions in sync.
1800         */
1801
1802        /*
1803         * The hash list is protected using RCU.
1804         *
1805         * Carefully use d_seq when comparing a candidate dentry, to avoid
1806         * races with d_move().
1807         *
1808         * It is possible that concurrent renames can mess up our list
1809         * walk here and result in missing our dentry, resulting in the
1810         * false-negative result. d_lookup() protects against concurrent
1811         * renames using rename_lock seqlock.
1812         *
1813         * See Documentation/filesystems/path-lookup.txt for more details.
1814         */
1815        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1816                unsigned seq;
1817
1818seqretry:
1819                /*
1820                 * The dentry sequence count protects us from concurrent
1821                 * renames, and thus protects parent and name fields.
1822                 *
1823                 * The caller must perform a seqcount check in order
1824                 * to do anything useful with the returned dentry.
1825                 *
1826                 * NOTE! We do a "raw" seqcount_begin here. That means that
1827                 * we don't wait for the sequence count to stabilize if it
1828                 * is in the middle of a sequence change. If we do the slow
1829                 * dentry compare, we will do seqretries until it is stable,
1830                 * and if we end up with a successful lookup, we actually
1831                 * want to exit RCU lookup anyway.
1832                 */
1833                seq = raw_seqcount_begin(&dentry->d_seq);
1834                if (dentry->d_parent != parent)
1835                        continue;
1836                if (d_unhashed(dentry))
1837                        continue;
1838
1839                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1840                        if (dentry->d_name.hash != hashlen_hash(hashlen))
1841                                continue;
1842                        *seqp = seq;
1843                        switch (slow_dentry_cmp(parent, dentry, seq, name)) {
1844                        case D_COMP_OK:
1845                                return dentry;
1846                        case D_COMP_NOMATCH:
1847                                continue;
1848                        default:
1849                                goto seqretry;
1850                        }
1851                }
1852
1853                if (dentry->d_name.hash_len != hashlen)
1854                        continue;
1855                *seqp = seq;
1856                if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
1857                        return dentry;
1858        }
1859        return NULL;
1860}
1861
1862/**
1863 * d_lookup - search for a dentry
1864 * @parent: parent dentry
1865 * @name: qstr of name we wish to find
1866 * Returns: dentry, or NULL
1867 *
1868 * d_lookup searches the children of the parent dentry for the name in
1869 * question. If the dentry is found its reference count is incremented and the
1870 * dentry is returned. The caller must use dput to free the entry when it has
1871 * finished using it. %NULL is returned if the dentry does not exist.
1872 */
1873struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
1874{
1875        struct dentry *dentry;
1876        unsigned seq;
1877
1878        do {
1879                seq = read_seqbegin(&rename_lock);
1880                dentry = __d_lookup(parent, name);
1881                if (dentry)
1882                        break;
1883        } while (read_seqretry(&rename_lock, seq));
1884        return dentry;
1885}
1886EXPORT_SYMBOL(d_lookup);
1887
1888/**
1889 * __d_lookup - search for a dentry (racy)
1890 * @parent: parent dentry
1891 * @name: qstr of name we wish to find
1892 * Returns: dentry, or NULL
1893 *
1894 * __d_lookup is like d_lookup, however it may (rarely) return a
1895 * false-negative result due to unrelated rename activity.
1896 *
1897 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1898 * however it must be used carefully, eg. with a following d_lookup in
1899 * the case of failure.
1900 *
1901 * __d_lookup callers must be commented.
1902 */
1903struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
1904{
1905        unsigned int len = name->len;
1906        unsigned int hash = name->hash;
1907        const unsigned char *str = name->name;
1908        struct hlist_bl_head *b = d_hash(parent, hash);
1909        struct hlist_bl_node *node;
1910        struct dentry *found = NULL;
1911        struct dentry *dentry;
1912
1913        /*
1914         * Note: There is significant duplication with __d_lookup_rcu which is
1915         * required to prevent single threaded performance regressions
1916         * especially on architectures where smp_rmb (in seqcounts) are costly.
1917         * Keep the two functions in sync.
1918         */
1919
1920        /*
1921         * The hash list is protected using RCU.
1922         *
1923         * Take d_lock when comparing a candidate dentry, to avoid races
1924         * with d_move().
1925         *
1926         * It is possible that concurrent renames can mess up our list
1927         * walk here and result in missing our dentry, resulting in the
1928         * false-negative result. d_lookup() protects against concurrent
1929         * renames using rename_lock seqlock.
1930         *
1931         * See Documentation/filesystems/path-lookup.txt for more details.
1932         */
1933        rcu_read_lock();
1934        
1935        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1936
1937                if (dentry->d_name.hash != hash)
1938                        continue;
1939
1940                spin_lock(&dentry->d_lock);
1941                if (dentry->d_parent != parent)
1942                        goto next;
1943                if (d_unhashed(dentry))
1944                        goto next;
1945
1946                /*
1947                 * It is safe to compare names since d_move() cannot
1948                 * change the qstr (protected by d_lock).
1949                 */
1950                if (parent->d_flags & DCACHE_OP_COMPARE) {
1951                        int tlen = dentry->d_name.len;
1952                        const char *tname = dentry->d_name.name;
1953                        if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
1954                                goto next;
1955                } else {
1956                        if (dentry->d_name.len != len)
1957                                goto next;
1958                        if (dentry_cmp(dentry, str, len))
1959                                goto next;
1960                }
1961
1962                dentry->d_lockref.count++;
1963                found = dentry;
1964                spin_unlock(&dentry->d_lock);
1965                break;
1966next:
1967                spin_unlock(&dentry->d_lock);
1968        }
1969        rcu_read_unlock();
1970
1971        return found;
1972}
1973
1974/**
1975 * d_hash_and_lookup - hash the qstr then search for a dentry
1976 * @dir: Directory to search in
1977 * @name: qstr of name we wish to find
1978 *
1979 * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
1980 */
1981struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1982{
1983        /*
1984         * Check for a fs-specific hash function. Note that we must
1985         * calculate the standard hash first, as the d_op->d_hash()
1986         * routine may choose to leave the hash value unchanged.
1987         */
1988        name->hash = full_name_hash(name->name, name->len);
1989        if (dir->d_flags & DCACHE_OP_HASH) {
1990                int err = dir->d_op->d_hash(dir, name);
1991                if (unlikely(err < 0))
1992                        return ERR_PTR(err);
1993        }
1994        return d_lookup(dir, name);
1995}
1996EXPORT_SYMBOL(d_hash_and_lookup);
1997
1998/**
1999 * d_validate - verify dentry provided from insecure source (deprecated)
2000 * @dentry: The dentry alleged to be valid child of @dparent
2001 * @dparent: The parent dentry (known to be valid)
2002 *
2003 * An insecure source has sent us a dentry, here we verify it and dget() it.
2004 * This is used by ncpfs in its readdir implementation.
2005 * Zero is returned in the dentry is invalid.
2006 *
2007 * This function is slow for big directories, and deprecated, do not use it.
2008 */
2009int d_validate(struct dentry *dentry, struct dentry *dparent)
2010{
2011        struct dentry *child;
2012
2013        spin_lock(&dparent->d_lock);
2014        list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
2015                if (dentry == child) {
2016                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2017                        __dget_dlock(dentry);
2018                        spin_unlock(&dentry->d_lock);
2019                        spin_unlock(&dparent->d_lock);
2020                        return 1;
2021                }
2022        }
2023        spin_unlock(&dparent->d_lock);
2024
2025        return 0;
2026}
2027EXPORT_SYMBOL(d_validate);
2028
2029/*
2030 * When a file is deleted, we have two options:
2031 * - turn this dentry into a negative dentry
2032 * - unhash this dentry and free it.
2033 *
2034 * Usually, we want to just turn this into
2035 * a negative dentry, but if anybody else is
2036 * currently using the dentry or the inode
2037 * we can't do that and we fall back on removing
2038 * it from the hash queues and waiting for
2039 * it to be deleted later when it has no users
2040 */
2041 
2042/**
2043 * d_delete - delete a dentry
2044 * @dentry: The dentry to delete
2045 *
2046 * Turn the dentry into a negative dentry if possible, otherwise
2047 * remove it from the hash queues so it can be deleted later
2048 */
2049 
2050void d_delete(struct dentry * dentry)
2051{
2052        struct inode *inode;
2053        int isdir = 0;
2054        /*
2055         * Are we the only user?
2056         */
2057again:
2058        spin_lock(&dentry->d_lock);
2059        inode = dentry->d_inode;
2060        isdir = S_ISDIR(inode->i_mode);
2061        if (dentry->d_lockref.count == 1) {
2062                if (!spin_trylock(&inode->i_lock)) {
2063                        spin_unlock(&dentry->d_lock);
2064                        cpu_relax();
2065                        goto again;
2066                }
2067                dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2068                dentry_unlink_inode(dentry);
2069                fsnotify_nameremove(dentry, isdir);
2070                return;
2071        }
2072
2073        if (!d_unhashed(dentry))
2074                __d_drop(dentry);
2075
2076        spin_unlock(&dentry->d_lock);
2077
2078        fsnotify_nameremove(dentry, isdir);
2079}
2080EXPORT_SYMBOL(d_delete);
2081
2082static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2083{
2084        BUG_ON(!d_unhashed(entry));
2085        hlist_bl_lock(b);
2086        entry->d_flags |= DCACHE_RCUACCESS;
2087        hlist_bl_add_head_rcu(&entry->d_hash, b);
2088        hlist_bl_unlock(b);
2089}
2090
2091static void _d_rehash(struct dentry * entry)
2092{
2093        __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
2094}
2095
2096/**
2097 * d_rehash     - add an entry back to the hash
2098 * @entry: dentry to add to the hash
2099 *
2100 * Adds a dentry to the hash according to its name.
2101 */
2102 
2103void d_rehash(struct dentry * entry)
2104{
2105        spin_lock(&entry->d_lock);
2106        _d_rehash(entry);
2107        spin_unlock(&entry->d_lock);
2108}
2109EXPORT_SYMBOL(d_rehash);
2110
2111/**
2112 * dentry_update_name_case - update case insensitive dentry with a new name
2113 * @dentry: dentry to be updated
2114 * @name: new name
2115 *
2116 * Update a case insensitive dentry with new case of name.
2117 *
2118 * dentry must have been returned by d_lookup with name @name. Old and new
2119 * name lengths must match (ie. no d_compare which allows mismatched name
2120 * lengths).
2121 *
2122 * Parent inode i_mutex must be held over d_lookup and into this call (to
2123 * keep renames and concurrent inserts, and readdir(2) away).
2124 */
2125void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2126{
2127        BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2128        BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2129
2130        spin_lock(&dentry->d_lock);
2131        write_seqcount_begin(&dentry->d_seq);
2132        memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2133        write_seqcount_end(&dentry->d_seq);
2134        spin_unlock(&dentry->d_lock);
2135}
2136EXPORT_SYMBOL(dentry_update_name_case);
2137
2138static void switch_names(struct dentry *dentry, struct dentry *target)
2139{
2140        if (dname_external(target)) {
2141                if (dname_external(dentry)) {
2142                        /*
2143                         * Both external: swap the pointers
2144                         */
2145                        swap(target->d_name.name, dentry->d_name.name);
2146                } else {
2147                        /*
2148                         * dentry:internal, target:external.  Steal target's
2149                         * storage and make target internal.
2150                         */
2151                        memcpy(target->d_iname, dentry->d_name.name,
2152                                        dentry->d_name.len + 1);
2153                        dentry->d_name.name = target->d_name.name;
2154                        target->d_name.name = target->d_iname;
2155                }
2156        } else {
2157                if (dname_external(dentry)) {
2158                        /*
2159                         * dentry:external, target:internal.  Give dentry's
2160                         * storage to target and make dentry internal
2161                         */
2162                        memcpy(dentry->d_iname, target->d_name.name,
2163                                        target->d_name.len + 1);
2164                        target->d_name.name = dentry->d_name.name;
2165                        dentry->d_name.name = dentry->d_iname;
2166                } else {
2167                        /*
2168                         * Both are internal.  Just copy target to dentry
2169                         */
2170                        memcpy(dentry->d_iname, target->d_name.name,
2171                                        target->d_name.len + 1);
2172                        dentry->d_name.len = target->d_name.len;
2173                        return;
2174                }
2175        }
2176        swap(dentry->d_name.len, target->d_name.len);
2177}
2178
2179static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2180{
2181        /*
2182         * XXXX: do we really need to take target->d_lock?
2183         */
2184        if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2185                spin_lock(&target->d_parent->d_lock);
2186        else {
2187                if (d_ancestor(dentry->d_parent, target->d_parent)) {
2188                        spin_lock(&dentry->d_parent->d_lock);
2189                        spin_lock_nested(&target->d_parent->d_lock,
2190                                                DENTRY_D_LOCK_NESTED);
2191                } else {
2192                        spin_lock(&target->d_parent->d_lock);
2193                        spin_lock_nested(&dentry->d_parent->d_lock,
2194                                                DENTRY_D_LOCK_NESTED);
2195                }
2196        }
2197        if (target < dentry) {
2198                spin_lock_nested(&target->d_lock, 2);
2199                spin_lock_nested(&dentry->d_lock, 3);
2200        } else {
2201                spin_lock_nested(&dentry->d_lock, 2);
2202                spin_lock_nested(&target->d_lock, 3);
2203        }
2204}
2205
2206static void dentry_unlock_parents_for_move(struct dentry *dentry,
2207                                        struct dentry *target)
2208{
2209        if (target->d_parent != dentry->d_parent)
2210                spin_unlock(&dentry->d_parent->d_lock);
2211        if (target->d_parent != target)
2212                spin_unlock(&target->d_parent->d_lock);
2213}
2214
2215/*
2216 * When switching names, the actual string doesn't strictly have to
2217 * be preserved in the target - because we're dropping the target
2218 * anyway. As such, we can just do a simple memcpy() to copy over
2219 * the new name before we switch.
2220 *
2221 * Note that we have to be a lot more careful about getting the hash
2222 * switched - we have to switch the hash value properly even if it
2223 * then no longer matches the actual (corrupted) string of the target.
2224 * The hash value has to match the hash queue that the dentry is on..
2225 */
2226/*
2227 * __d_move - move a dentry
2228 * @dentry: entry to move
2229 * @target: new dentry
2230 *
2231 * Update the dcache to reflect the move of a file name. Negative
2232 * dcache entries should not be moved in this way. Caller must hold
2233 * rename_lock, the i_mutex of the source and target directories,
2234 * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2235 */
2236static void __d_move(struct dentry * dentry, struct dentry * target)
2237{
2238        if (!dentry->d_inode)
2239                printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2240
2241        BUG_ON(d_ancestor(dentry, target));
2242        BUG_ON(d_ancestor(target, dentry));
2243
2244        dentry_lock_for_move(dentry, target);
2245
2246        write_seqcount_begin(&dentry->d_seq);
2247        write_seqcount_begin(&target->d_seq);
2248
2249        /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2250
2251        /*
2252         * Move the dentry to the target hash queue. Don't bother checking
2253         * for the same hash queue because of how unlikely it is.
2254         */
2255        __d_drop(dentry);
2256        __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
2257
2258        /* Unhash the target: dput() will then get rid of it */
2259        __d_drop(target);
2260
2261        list_del(&dentry->d_u.d_child);
2262        list_del(&target->d_u.d_child);
2263
2264        /* Switch the names.. */
2265        switch_names(dentry, target);
2266        swap(dentry->d_name.hash, target->d_name.hash);
2267
2268        /* ... and switch the parents */
2269        if (IS_ROOT(dentry)) {
2270                dentry->d_parent = target->d_parent;
2271                target->d_parent = target;
2272                INIT_LIST_HEAD(&target->d_u.d_child);
2273        } else {
2274                swap(dentry->d_parent, target->d_parent);
2275
2276                /* And add them back to the (new) parent lists */
2277                list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
2278        }
2279
2280        list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2281
2282        write_seqcount_end(&target->d_seq);
2283        write_seqcount_end(&dentry->d_seq);
2284
2285        dentry_unlock_parents_for_move(dentry, target);
2286        spin_unlock(&target->d_lock);
2287        fsnotify_d_move(dentry);
2288        spin_unlock(&dentry->d_lock);
2289}
2290
2291/*
2292 * d_move - move a dentry
2293 * @dentry: entry to move
2294 * @target: new dentry
2295 *
2296 * Update the dcache to reflect the move of a file name. Negative
2297 * dcache entries should not be moved in this way. See the locking
2298 * requirements for __d_move.
2299 */
2300void d_move(struct dentry *dentry, struct dentry *target)
2301{
2302        write_seqlock(&rename_lock);
2303        __d_move(dentry, target);
2304        write_sequnlock(&rename_lock);
2305}
2306EXPORT_SYMBOL(d_move);
2307
2308/**
2309 * d_ancestor - search for an ancestor
2310 * @p1: ancestor dentry
2311 * @p2: child dentry
2312 *
2313 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2314 * an ancestor of p2, else NULL.
2315 */
2316struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2317{
2318        struct dentry *p;
2319
2320        for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2321                if (p->d_parent == p1)
2322                        return p;
2323        }
2324        return NULL;
2325}
2326
2327/*
2328 * This helper attempts to cope with remotely renamed directories
2329 *
2330 * It assumes that the caller is already holding
2331 * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
2332 *
2333 * Note: If ever the locking in lock_rename() changes, then please
2334 * remember to update this too...
2335 */
2336static struct dentry *__d_unalias(struct inode *inode,
2337                struct dentry *dentry, struct dentry *alias)
2338{
2339        struct mutex *m1 = NULL, *m2 = NULL;
2340        struct dentry *ret = ERR_PTR(-EBUSY);
2341
2342        /* If alias and dentry share a parent, then no extra locks required */
2343        if (alias->d_parent == dentry->d_parent)
2344                goto out_unalias;
2345
2346        /* See lock_rename() */
2347        if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2348                goto out_err;
2349        m1 = &dentry->d_sb->s_vfs_rename_mutex;
2350        if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2351                goto out_err;
2352        m2 = &alias->d_parent->d_inode->i_mutex;
2353out_unalias:
2354        if (likely(!d_mountpoint(alias))) {
2355                __d_move(alias, dentry);
2356                ret = alias;
2357        }
2358out_err:
2359        spin_unlock(&inode->i_lock);
2360        if (m2)
2361                mutex_unlock(m2);
2362        if (m1)
2363                mutex_unlock(m1);
2364        return ret;
2365}
2366
2367/*
2368 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2369 * named dentry in place of the dentry to be replaced.
2370 * returns with anon->d_lock held!
2371 */
2372static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2373{
2374        struct dentry *dparent;
2375
2376        dentry_lock_for_move(anon, dentry);
2377
2378        write_seqcount_begin(&dentry->d_seq);
2379        write_seqcount_begin(&anon->d_seq);
2380
2381        dparent = dentry->d_parent;
2382
2383        switch_names(dentry, anon);
2384        swap(dentry->d_name.hash, anon->d_name.hash);
2385
2386        dentry->d_parent = dentry;
2387        list_del_init(&dentry->d_u.d_child);
2388        anon->d_parent = dparent;
2389        list_move(&anon->d_u.d_child, &dparent->d_subdirs);
2390
2391        write_seqcount_end(&dentry->d_seq);
2392        write_seqcount_end(&anon->d_seq);
2393
2394        dentry_unlock_parents_for_move(anon, dentry);
2395        spin_unlock(&dentry->d_lock);
2396
2397        /* anon->d_lock still locked, returns locked */
2398        anon->d_flags &= ~DCACHE_DISCONNECTED;
2399}
2400
2401/**
2402 * d_materialise_unique - introduce an inode into the tree
2403 * @dentry: candidate dentry
2404 * @inode: inode to bind to the dentry, to which aliases may be attached
2405 *
2406 * Introduces an dentry into the tree, substituting an extant disconnected
2407 * root directory alias in its place if there is one. Caller must hold the
2408 * i_mutex of the parent directory.
2409 */
2410struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2411{
2412        struct dentry *actual;
2413
2414        BUG_ON(!d_unhashed(dentry));
2415
2416        if (!inode) {
2417                actual = dentry;
2418                __d_instantiate(dentry, NULL);
2419                d_rehash(actual);
2420                goto out_nolock;
2421        }
2422
2423        spin_lock(&inode->i_lock);
2424
2425        if (S_ISDIR(inode->i_mode)) {
2426                struct dentry *alias;
2427
2428                /* Does an aliased dentry already exist? */
2429                alias = __d_find_alias(inode, 0);
2430                if (alias) {
2431                        actual = alias;
2432                        write_seqlock(&rename_lock);
2433
2434                        if (d_ancestor(alias, dentry)) {
2435                                /* Check for loops */
2436                                actual = ERR_PTR(-ELOOP);
2437                                spin_unlock(&inode->i_lock);
2438                        } else if (IS_ROOT(alias)) {
2439                                /* Is this an anonymous mountpoint that we
2440                                 * could splice into our tree? */
2441                                __d_materialise_dentry(dentry, alias);
2442                                write_sequnlock(&rename_lock);
2443                                __d_drop(alias);
2444                                goto found;
2445                        } else {
2446                                /* Nope, but we must(!) avoid directory
2447                                 * aliasing. This drops inode->i_lock */
2448                                actual = __d_unalias(inode, dentry, alias);
2449                        }
2450                        write_sequnlock(&rename_lock);
2451                        if (IS_ERR(actual)) {
2452                                if (PTR_ERR(actual) == -ELOOP)
2453                                        pr_warn_ratelimited(
2454                                                "VFS: Lookup of '%s' in %s %s"
2455                                                " would have caused loop\n",
2456                                                dentry->d_name.name,
2457                                                inode->i_sb->s_type->name,
2458                                                inode->i_sb->s_id);
2459                                dput(alias);
2460                        }
2461                        goto out_nolock;
2462                }
2463        }
2464
2465        /* Add a unique reference */
2466        actual = __d_instantiate_unique(dentry, inode);
2467        if (!actual)
2468                actual = dentry;
2469        else
2470                BUG_ON(!d_unhashed(actual));
2471
2472        spin_lock(&actual->d_lock);
2473found:
2474        _d_rehash(actual);
2475        spin_unlock(&actual->d_lock);
2476        spin_unlock(&inode->i_lock);
2477out_nolock:
2478        if (actual == dentry) {
2479                security_d_instantiate(dentry, inode);
2480                return NULL;
2481        }
2482
2483        iput(inode);
2484        return actual;
2485}
2486EXPORT_SYMBOL_GPL(d_materialise_unique);
2487
2488static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2489{
2490        *buflen -= namelen;
2491        if (*buflen < 0)
2492                return -ENAMETOOLONG;
2493        *buffer -= namelen;
2494        memcpy(*buffer, str, namelen);
2495        return 0;
2496}
2497
2498static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2499{
2500        return prepend(buffer, buflen, name->name, name->len);
2501}
2502
2503/**
2504 * prepend_path - Prepend path string to a buffer
2505 * @path: the dentry/vfsmount to report
2506 * @root: root vfsmnt/dentry
2507 * @buffer: pointer to the end of the buffer
2508 * @buflen: pointer to buffer length
2509 *
2510 * Caller holds the rename_lock.
2511 */
2512static int prepend_path(const struct path *path,
2513                        const struct path *root,
2514                        char **buffer, int *buflen)
2515{
2516        struct dentry *dentry = path->dentry;
2517        struct vfsmount *vfsmnt = path->mnt;
2518        struct mount *mnt = real_mount(vfsmnt);
2519        bool slash = false;
2520        int error = 0;
2521
2522        while (dentry != root->dentry || vfsmnt != root->mnt) {
2523                struct dentry * parent;
2524
2525                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2526                        /* Global root? */
2527                        if (!mnt_has_parent(mnt))
2528                                goto global_root;
2529                        dentry = mnt->mnt_mountpoint;
2530                        mnt = mnt->mnt_parent;
2531                        vfsmnt = &mnt->mnt;
2532                        continue;
2533                }
2534                parent = dentry->d_parent;
2535                prefetch(parent);
2536                spin_lock(&dentry->d_lock);
2537                error = prepend_name(buffer, buflen, &dentry->d_name);
2538                spin_unlock(&dentry->d_lock);
2539                if (!error)
2540                        error = prepend(buffer, buflen, "/", 1);
2541                if (error)
2542                        break;
2543
2544                slash = true;
2545                dentry = parent;
2546        }
2547
2548        if (!error && !slash)
2549                error = prepend(buffer, buflen, "/", 1);
2550
2551        return error;
2552
2553global_root:
2554        /*
2555         * Filesystems needing to implement special "root names"
2556         * should do so with ->d_dname()
2557         */
2558        if (IS_ROOT(dentry) &&
2559            (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2560                WARN(1, "Root dentry has weird name <%.*s>\n",
2561                     (int) dentry->d_name.len, dentry->d_name.name);
2562        }
2563        if (!slash)
2564                error = prepend(buffer, buflen, "/", 1);
2565        if (!error)
2566                error = is_mounted(vfsmnt) ? 1 : 2;
2567        return error;
2568}
2569
2570/**
2571 * __d_path - return the path of a dentry
2572 * @path: the dentry/vfsmount to report
2573 * @root: root vfsmnt/dentry
2574 * @buf: buffer to return value in
2575 * @buflen: buffer length
2576 *
2577 * Convert a dentry into an ASCII path name.
2578 *
2579 * Returns a pointer into the buffer or an error code if the
2580 * path was too long.
2581 *
2582 * "buflen" should be positive.
2583 *
2584 * If the path is not reachable from the supplied root, return %NULL.
2585 */
2586char *__d_path(const struct path *path,
2587               const struct path *root,
2588               char *buf, int buflen)
2589{
2590        char *res = buf + buflen;
2591        int error;
2592
2593        prepend(&res, &buflen, "\0", 1);
2594        br_read_lock(&vfsmount_lock);
2595        write_seqlock(&rename_lock);
2596        error = prepend_path(path, root, &res, &buflen);
2597        write_sequnlock(&rename_lock);
2598        br_read_unlock(&vfsmount_lock);
2599
2600        if (error < 0)
2601                return ERR_PTR(error);
2602        if (error > 0)
2603                return NULL;
2604        return res;
2605}
2606
2607char *d_absolute_path(const struct path *path,
2608               char *buf, int buflen)
2609{
2610        struct path root = {};
2611        char *res = buf + buflen;
2612        int error;
2613
2614        prepend(&res, &buflen, "\0", 1);
2615        br_read_lock(&vfsmount_lock);
2616        write_seqlock(&rename_lock);
2617        error = prepend_path(path, &root, &res, &buflen);
2618        write_sequnlock(&rename_lock);
2619        br_read_unlock(&vfsmount_lock);
2620
2621        if (error > 1)
2622                error = -EINVAL;
2623        if (error < 0)
2624                return ERR_PTR(error);
2625        return res;
2626}
2627
2628/*
2629 * same as __d_path but appends "(deleted)" for unlinked files.
2630 */
2631static int path_with_deleted(const struct path *path,
2632                             const struct path *root,
2633                             char **buf, int *buflen)
2634{
2635        prepend(buf, buflen, "\0", 1);
2636        if (d_unlinked(path->dentry)) {
2637                int error = prepend(buf, buflen, " (deleted)", 10);
2638                if (error)
2639                        return error;
2640        }
2641
2642        return prepend_path(path, root, buf, buflen);
2643}
2644
2645static int prepend_unreachable(char **buffer, int *buflen)
2646{
2647        return prepend(buffer, buflen, "(unreachable)", 13);
2648}
2649
2650/**
2651 * d_path - return the path of a dentry
2652 * @path: path to report
2653 * @buf: buffer to return value in
2654 * @buflen: buffer length
2655 *
2656 * Convert a dentry into an ASCII path name. If the entry has been deleted
2657 * the string " (deleted)" is appended. Note that this is ambiguous.
2658 *
2659 * Returns a pointer into the buffer or an error code if the path was
2660 * too long. Note: Callers should use the returned pointer, not the passed
2661 * in buffer, to use the name! The implementation often starts at an offset
2662 * into the buffer, and may leave 0 bytes at the start.
2663 *
2664 * "buflen" should be positive.
2665 */
2666char *d_path(const struct path *path, char *buf, int buflen)
2667{
2668        char *res = buf + buflen;
2669        struct path root;
2670        int error;
2671
2672        /*
2673         * We have various synthetic filesystems that never get mounted.  On
2674         * these filesystems dentries are never used for lookup purposes, and
2675         * thus don't need to be hashed.  They also don't need a name until a
2676         * user wants to identify the object in /proc/pid/fd/.  The little hack
2677         * below allows us to generate a name for these objects on demand:
2678         */
2679        if (path->dentry->d_op && path->dentry->d_op->d_dname)
2680                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2681
2682        get_fs_root(current->fs, &root);
2683        br_read_lock(&vfsmount_lock);
2684        write_seqlock(&rename_lock);
2685        error = path_with_deleted(path, &root, &res, &buflen);
2686        write_sequnlock(&rename_lock);
2687        br_read_unlock(&vfsmount_lock);
2688        if (error < 0)
2689                res = ERR_PTR(error);
2690        path_put(&root);
2691        return res;
2692}
2693EXPORT_SYMBOL(d_path);
2694
2695/*
2696 * Helper function for dentry_operations.d_dname() members
2697 */
2698char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2699                        const char *fmt, ...)
2700{
2701        va_list args;
2702        char temp[64];
2703        int sz;
2704
2705        va_start(args, fmt);
2706        sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2707        va_end(args);
2708
2709        if (sz > sizeof(temp) || sz > buflen)
2710                return ERR_PTR(-ENAMETOOLONG);
2711
2712        buffer += buflen - sz;
2713        return memcpy(buffer, temp, sz);
2714}
2715
2716char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
2717{
2718        char *end = buffer + buflen;
2719        /* these dentries are never renamed, so d_lock is not needed */
2720        if (prepend(&end, &buflen, " (deleted)", 11) ||
2721            prepend_name(&end, &buflen, &dentry->d_name) ||
2722            prepend(&end, &buflen, "/", 1))  
2723                end = ERR_PTR(-ENAMETOOLONG);
2724        return end;  
2725}
2726
2727/*
2728 * Write full pathname from the root of the filesystem into the buffer.
2729 */
2730static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2731{
2732        char *end = buf + buflen;
2733        char *retval;
2734
2735        prepend(&end, &buflen, "\0", 1);
2736        if (buflen < 1)
2737                goto Elong;
2738        /* Get '/' right */
2739        retval = end-1;
2740        *retval = '/';
2741
2742        while (!IS_ROOT(dentry)) {
2743                struct dentry *parent = dentry->d_parent;
2744                int error;
2745
2746                prefetch(parent);
2747                spin_lock(&dentry->d_lock);
2748                error = prepend_name(&end, &buflen, &dentry->d_name);
2749                spin_unlock(&dentry->d_lock);
2750                if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2751                        goto Elong;
2752
2753                retval = end;
2754                dentry = parent;
2755        }
2756        return retval;
2757Elong:
2758        return ERR_PTR(-ENAMETOOLONG);
2759}
2760
2761char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2762{
2763        char *retval;
2764
2765        write_seqlock(&rename_lock);
2766        retval = __dentry_path(dentry, buf, buflen);
2767        write_sequnlock(&rename_lock);
2768
2769        return retval;
2770}
2771EXPORT_SYMBOL(dentry_path_raw);
2772
2773char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2774{
2775        char *p = NULL;
2776        char *retval;
2777
2778        write_seqlock(&rename_lock);
2779        if (d_unlinked(dentry)) {
2780                p = buf + buflen;
2781                if (prepend(&p, &buflen, "//deleted", 10) != 0)
2782                        goto Elong;
2783                buflen++;
2784        }
2785        retval = __dentry_path(dentry, buf, buflen);
2786        write_sequnlock(&rename_lock);
2787        if (!IS_ERR(retval) && p)
2788                *p = '/';       /* restore '/' overriden with '\0' */
2789        return retval;
2790Elong:
2791        return ERR_PTR(-ENAMETOOLONG);
2792}
2793
2794/*
2795 * NOTE! The user-level library version returns a
2796 * character pointer. The kernel system call just
2797 * returns the length of the buffer filled (which
2798 * includes the ending '\0' character), or a negative
2799 * error value. So libc would do something like
2800 *
2801 *      char *getcwd(char * buf, size_t size)
2802 *      {
2803 *              int retval;
2804 *
2805 *              retval = sys_getcwd(buf, size);
2806 *              if (retval >= 0)
2807 *                      return buf;
2808 *              errno = -retval;
2809 *              return NULL;
2810 *      }
2811 */
2812SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2813{
2814        int error;
2815        struct path pwd, root;
2816        char *page = (char *) __get_free_page(GFP_USER);
2817
2818        if (!page)
2819                return -ENOMEM;
2820
2821        get_fs_root_and_pwd(current->fs, &root, &pwd);
2822
2823        error = -ENOENT;
2824        br_read_lock(&vfsmount_lock);
2825        write_seqlock(&rename_lock);
2826        if (!d_unlinked(pwd.dentry)) {
2827                unsigned long len;
2828                char *cwd = page + PAGE_SIZE;
2829                int buflen = PAGE_SIZE;
2830
2831                prepend(&cwd, &buflen, "\0", 1);
2832                error = prepend_path(&pwd, &root, &cwd, &buflen);
2833                write_sequnlock(&rename_lock);
2834                br_read_unlock(&vfsmount_lock);
2835
2836                if (error < 0)
2837                        goto out;
2838
2839                /* Unreachable from current root */
2840                if (error > 0) {
2841                        error = prepend_unreachable(&cwd, &buflen);
2842                        if (error)
2843                                goto out;
2844                }
2845
2846                error = -ERANGE;
2847                len = PAGE_SIZE + page - cwd;
2848                if (len <= size) {
2849                        error = len;
2850                        if (copy_to_user(buf, cwd, len))
2851                                error = -EFAULT;
2852                }
2853        } else {
2854                write_sequnlock(&rename_lock);
2855                br_read_unlock(&vfsmount_lock);
2856        }
2857
2858out:
2859        path_put(&pwd);
2860        path_put(&root);
2861        free_page((unsigned long) page);
2862        return error;
2863}
2864
2865/*
2866 * Test whether new_dentry is a subdirectory of old_dentry.
2867 *
2868 * Trivially implemented using the dcache structure
2869 */
2870
2871/**
2872 * is_subdir - is new dentry a subdirectory of old_dentry
2873 * @new_dentry: new dentry
2874 * @old_dentry: old dentry
2875 *
2876 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2877 * Returns 0 otherwise.
2878 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2879 */
2880  
2881int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2882{
2883        int result;
2884        unsigned seq;
2885
2886        if (new_dentry == old_dentry)
2887                return 1;
2888
2889        do {
2890                /* for restarting inner loop in case of seq retry */
2891                seq = read_seqbegin(&rename_lock);
2892                /*
2893                 * Need rcu_readlock to protect against the d_parent trashing
2894                 * due to d_move
2895                 */
2896                rcu_read_lock();
2897                if (d_ancestor(old_dentry, new_dentry))
2898                        result = 1;
2899                else
2900                        result = 0;
2901                rcu_read_unlock();
2902        } while (read_seqretry(&rename_lock, seq));
2903
2904        return result;
2905}
2906
2907void d_genocide(struct dentry *root)
2908{
2909        struct dentry *this_parent;
2910        struct list_head *next;
2911        unsigned seq;
2912        int locked = 0;
2913
2914        seq = read_seqbegin(&rename_lock);
2915again:
2916        this_parent = root;
2917        spin_lock(&this_parent->d_lock);
2918repeat:
2919        next = this_parent->d_subdirs.next;
2920resume:
2921        while (next != &this_parent->d_subdirs) {
2922                struct list_head *tmp = next;
2923                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2924                next = tmp->next;
2925
2926                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2927                if (d_unhashed(dentry) || !dentry->d_inode) {
2928                        spin_unlock(&dentry->d_lock);
2929                        continue;
2930                }
2931                if (!list_empty(&dentry->d_subdirs)) {
2932                        spin_unlock(&this_parent->d_lock);
2933                        spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2934                        this_parent = dentry;
2935                        spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2936                        goto repeat;
2937                }
2938                if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2939                        dentry->d_flags |= DCACHE_GENOCIDE;
2940                        dentry->d_lockref.count--;
2941                }
2942                spin_unlock(&dentry->d_lock);
2943        }
2944        if (this_parent != root) {
2945                struct dentry *child = this_parent;
2946                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2947                        this_parent->d_flags |= DCACHE_GENOCIDE;
2948                        this_parent->d_lockref.count--;
2949                }
2950                this_parent = try_to_ascend(this_parent, locked, seq);
2951                if (!this_parent)
2952                        goto rename_retry;
2953                next = child->d_u.d_child.next;
2954                goto resume;
2955        }
2956        spin_unlock(&this_parent->d_lock);
2957        if (!locked && read_seqretry(&rename_lock, seq))
2958                goto rename_retry;
2959        if (locked)
2960                write_sequnlock(&rename_lock);
2961        return;
2962
2963rename_retry:
2964        if (locked)
2965                goto again;
2966        locked = 1;
2967        write_seqlock(&rename_lock);
2968        goto again;
2969}
2970
2971void d_tmpfile(struct dentry *dentry, struct inode *inode)
2972{
2973        inode_dec_link_count(inode);
2974        BUG_ON(dentry->d_name.name != dentry->d_iname ||
2975                !hlist_unhashed(&dentry->d_alias) ||
2976                !d_unlinked(dentry));
2977        spin_lock(&dentry->d_parent->d_lock);
2978        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2979        dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
2980                                (unsigned long long)inode->i_ino);
2981        spin_unlock(&dentry->d_lock);
2982        spin_unlock(&dentry->d_parent->d_lock);
2983        d_instantiate(dentry, inode);
2984}
2985EXPORT_SYMBOL(d_tmpfile);
2986
2987static __initdata unsigned long dhash_entries;
2988static int __init set_dhash_entries(char *str)
2989{
2990        if (!str)
2991                return 0;
2992        dhash_entries = simple_strtoul(str, &str, 0);
2993        return 1;
2994}
2995__setup("dhash_entries=", set_dhash_entries);
2996
2997static void __init dcache_init_early(void)
2998{
2999        unsigned int loop;
3000
3001        /* If hashes are distributed across NUMA nodes, defer
3002         * hash allocation until vmalloc space is available.
3003         */
3004        if (hashdist)
3005                return;
3006
3007        dentry_hashtable =
3008                alloc_large_system_hash("Dentry cache",
3009                                        sizeof(struct hlist_bl_head),
3010                                        dhash_entries,
3011                                        13,
3012                                        HASH_EARLY,
3013                                        &d_hash_shift,
3014                                        &d_hash_mask,
3015                                        0,
3016                                        0);
3017
3018        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3019                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3020}
3021
3022static void __init dcache_init(void)
3023{
3024        unsigned int loop;
3025
3026        /* 
3027         * A constructor could be added for stable state like the lists,
3028         * but it is probably not worth it because of the cache nature
3029         * of the dcache. 
3030         */
3031        dentry_cache = KMEM_CACHE(dentry,
3032                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3033
3034        /* Hash may have been set up in dcache_init_early */
3035        if (!hashdist)
3036                return;
3037
3038        dentry_hashtable =
3039                alloc_large_system_hash("Dentry cache",
3040                                        sizeof(struct hlist_bl_head),
3041                                        dhash_entries,
3042                                        13,
3043                                        0,
3044                                        &d_hash_shift,
3045                                        &d_hash_mask,
3046                                        0,
3047                                        0);
3048
3049        for (loop = 0; loop < (1U << d_hash_shift); loop++)
3050                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3051}
3052
3053/* SLAB cache for __getname() consumers */
3054struct kmem_cache *names_cachep __read_mostly;
3055EXPORT_SYMBOL(names_cachep);
3056
3057EXPORT_SYMBOL(d_genocide);
3058
3059void __init vfs_caches_init_early(void)
3060{
3061        dcache_init_early();
3062        inode_init_early();
3063}
3064
3065void __init vfs_caches_init(unsigned long mempages)
3066{
3067        unsigned long reserve;
3068
3069        /* Base hash sizes on available memory, with a reserve equal to
3070           150% of current kernel size */
3071
3072        reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
3073        mempages -= reserve;
3074
3075        names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3076                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3077
3078        dcache_init();
3079        inode_init();
3080        files_init(mempages);
3081        mnt_init();
3082        bdev_cache_init();
3083        chrdev_init();
3084}
3085