linux/fs/kernfs/dir.c
<<
>>
Prefs
   1/*
   2 * fs/kernfs/dir.c - kernfs directory implementation
   3 *
   4 * Copyright (c) 2001-3 Patrick Mochel
   5 * Copyright (c) 2007 SUSE Linux Products GmbH
   6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
   7 *
   8 * This file is released under the GPLv2.
   9 */
  10
  11#include <linux/sched.h>
  12#include <linux/fs.h>
  13#include <linux/namei.h>
  14#include <linux/idr.h>
  15#include <linux/slab.h>
  16#include <linux/security.h>
  17#include <linux/hash.h>
  18
  19#include "kernfs-internal.h"
  20
  21DECLARE_RWSEM(kernfs_rwsem);
  22static DEFINE_SPINLOCK(kernfs_rename_lock);     /* kn->parent and ->name */
  23static char kernfs_pr_cont_buf[PATH_MAX];       /* protected by rename_lock */
  24static DEFINE_SPINLOCK(kernfs_idr_lock);        /* root->ino_idr */
  25
  26#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
  27
  28static bool kernfs_active(struct kernfs_node *kn)
  29{
  30        lockdep_assert_held(&kernfs_rwsem);
  31        return atomic_read(&kn->active) >= 0;
  32}
  33
  34static bool kernfs_lockdep(struct kernfs_node *kn)
  35{
  36#ifdef CONFIG_DEBUG_LOCK_ALLOC
  37        return kn->flags & KERNFS_LOCKDEP;
  38#else
  39        return false;
  40#endif
  41}
  42
  43static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
  44{
  45        if (!kn)
  46                return strlcpy(buf, "(null)", buflen);
  47
  48        return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
  49}
  50
  51/* kernfs_node_depth - compute depth from @from to @to */
  52static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
  53{
  54        size_t depth = 0;
  55
  56        while (to->parent && to != from) {
  57                depth++;
  58                to = to->parent;
  59        }
  60        return depth;
  61}
  62
  63static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
  64                                                  struct kernfs_node *b)
  65{
  66        size_t da, db;
  67        struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
  68
  69        if (ra != rb)
  70                return NULL;
  71
  72        da = kernfs_depth(ra->kn, a);
  73        db = kernfs_depth(rb->kn, b);
  74
  75        while (da > db) {
  76                a = a->parent;
  77                da--;
  78        }
  79        while (db > da) {
  80                b = b->parent;
  81                db--;
  82        }
  83
  84        /* worst case b and a will be the same at root */
  85        while (b != a) {
  86                b = b->parent;
  87                a = a->parent;
  88        }
  89
  90        return a;
  91}
  92
  93/**
  94 * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
  95 * where kn_from is treated as root of the path.
  96 * @kn_from: kernfs node which should be treated as root for the path
  97 * @kn_to: kernfs node to which path is needed
  98 * @buf: buffer to copy the path into
  99 * @buflen: size of @buf
 100 *
 101 * We need to handle couple of scenarios here:
 102 * [1] when @kn_from is an ancestor of @kn_to at some level
 103 * kn_from: /n1/n2/n3
 104 * kn_to:   /n1/n2/n3/n4/n5
 105 * result:  /n4/n5
 106 *
 107 * [2] when @kn_from is on a different hierarchy and we need to find common
 108 * ancestor between @kn_from and @kn_to.
 109 * kn_from: /n1/n2/n3/n4
 110 * kn_to:   /n1/n2/n5
 111 * result:  /../../n5
 112 * OR
 113 * kn_from: /n1/n2/n3/n4/n5   [depth=5]
 114 * kn_to:   /n1/n2/n3         [depth=3]
 115 * result:  /../..
 116 *
 117 * [3] when @kn_to is NULL result will be "(null)"
 118 *
 119 * Returns the length of the full path.  If the full length is equal to or
 120 * greater than @buflen, @buf contains the truncated path with the trailing
 121 * '\0'.  On error, -errno is returned.
 122 */
 123static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 124                                        struct kernfs_node *kn_from,
 125                                        char *buf, size_t buflen)
 126{
 127        struct kernfs_node *kn, *common;
 128        const char parent_str[] = "/..";
 129        size_t depth_from, depth_to, len = 0;
 130        int i, j;
 131
 132        if (!kn_to)
 133                return strlcpy(buf, "(null)", buflen);
 134
 135        if (!kn_from)
 136                kn_from = kernfs_root(kn_to)->kn;
 137
 138        if (kn_from == kn_to)
 139                return strlcpy(buf, "/", buflen);
 140
 141        common = kernfs_common_ancestor(kn_from, kn_to);
 142        if (WARN_ON(!common))
 143                return -EINVAL;
 144
 145        depth_to = kernfs_depth(common, kn_to);
 146        depth_from = kernfs_depth(common, kn_from);
 147
 148        if (buf)
 149                buf[0] = '\0';
 150
 151        for (i = 0; i < depth_from; i++)
 152                len += strlcpy(buf + len, parent_str,
 153                               len < buflen ? buflen - len : 0);
 154
 155        /* Calculate how many bytes we need for the rest */
 156        for (i = depth_to - 1; i >= 0; i--) {
 157                for (kn = kn_to, j = 0; j < i; j++)
 158                        kn = kn->parent;
 159                len += strlcpy(buf + len, "/",
 160                               len < buflen ? buflen - len : 0);
 161                len += strlcpy(buf + len, kn->name,
 162                               len < buflen ? buflen - len : 0);
 163        }
 164
 165        return len;
 166}
 167
 168/**
 169 * kernfs_name - obtain the name of a given node
 170 * @kn: kernfs_node of interest
 171 * @buf: buffer to copy @kn's name into
 172 * @buflen: size of @buf
 173 *
 174 * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
 175 * similar to strlcpy().  It returns the length of @kn's name and if @buf
 176 * isn't long enough, it's filled upto @buflen-1 and nul terminated.
 177 *
 178 * Fills buffer with "(null)" if @kn is NULL.
 179 *
 180 * This function can be called from any context.
 181 */
 182int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 183{
 184        unsigned long flags;
 185        int ret;
 186
 187        spin_lock_irqsave(&kernfs_rename_lock, flags);
 188        ret = kernfs_name_locked(kn, buf, buflen);
 189        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 190        return ret;
 191}
 192
 193/**
 194 * kernfs_path_from_node - build path of node @to relative to @from.
 195 * @from: parent kernfs_node relative to which we need to build the path
 196 * @to: kernfs_node of interest
 197 * @buf: buffer to copy @to's path into
 198 * @buflen: size of @buf
 199 *
 200 * Builds @to's path relative to @from in @buf. @from and @to must
 201 * be on the same kernfs-root. If @from is not parent of @to, then a relative
 202 * path (which includes '..'s) as needed to reach from @from to @to is
 203 * returned.
 204 *
 205 * Returns the length of the full path.  If the full length is equal to or
 206 * greater than @buflen, @buf contains the truncated path with the trailing
 207 * '\0'.  On error, -errno is returned.
 208 */
 209int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
 210                          char *buf, size_t buflen)
 211{
 212        unsigned long flags;
 213        int ret;
 214
 215        spin_lock_irqsave(&kernfs_rename_lock, flags);
 216        ret = kernfs_path_from_node_locked(to, from, buf, buflen);
 217        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 218        return ret;
 219}
 220EXPORT_SYMBOL_GPL(kernfs_path_from_node);
 221
 222/**
 223 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
 224 * @kn: kernfs_node of interest
 225 *
 226 * This function can be called from any context.
 227 */
 228void pr_cont_kernfs_name(struct kernfs_node *kn)
 229{
 230        unsigned long flags;
 231
 232        spin_lock_irqsave(&kernfs_rename_lock, flags);
 233
 234        kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
 235        pr_cont("%s", kernfs_pr_cont_buf);
 236
 237        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 238}
 239
 240/**
 241 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
 242 * @kn: kernfs_node of interest
 243 *
 244 * This function can be called from any context.
 245 */
 246void pr_cont_kernfs_path(struct kernfs_node *kn)
 247{
 248        unsigned long flags;
 249        int sz;
 250
 251        spin_lock_irqsave(&kernfs_rename_lock, flags);
 252
 253        sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
 254                                          sizeof(kernfs_pr_cont_buf));
 255        if (sz < 0) {
 256                pr_cont("(error)");
 257                goto out;
 258        }
 259
 260        if (sz >= sizeof(kernfs_pr_cont_buf)) {
 261                pr_cont("(name too long)");
 262                goto out;
 263        }
 264
 265        pr_cont("%s", kernfs_pr_cont_buf);
 266
 267out:
 268        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 269}
 270
 271/**
 272 * kernfs_get_parent - determine the parent node and pin it
 273 * @kn: kernfs_node of interest
 274 *
 275 * Determines @kn's parent, pins and returns it.  This function can be
 276 * called from any context.
 277 */
 278struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
 279{
 280        struct kernfs_node *parent;
 281        unsigned long flags;
 282
 283        spin_lock_irqsave(&kernfs_rename_lock, flags);
 284        parent = kn->parent;
 285        kernfs_get(parent);
 286        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 287
 288        return parent;
 289}
 290
 291/**
 292 *      kernfs_name_hash
 293 *      @name: Null terminated string to hash
 294 *      @ns:   Namespace tag to hash
 295 *
 296 *      Returns 31 bit hash of ns + name (so it fits in an off_t )
 297 */
 298static unsigned int kernfs_name_hash(const char *name, const void *ns)
 299{
 300        unsigned long hash = init_name_hash(ns);
 301        unsigned int len = strlen(name);
 302        while (len--)
 303                hash = partial_name_hash(*name++, hash);
 304        hash = end_name_hash(hash);
 305        hash &= 0x7fffffffU;
 306        /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
 307        if (hash < 2)
 308                hash += 2;
 309        if (hash >= INT_MAX)
 310                hash = INT_MAX - 1;
 311        return hash;
 312}
 313
 314static int kernfs_name_compare(unsigned int hash, const char *name,
 315                               const void *ns, const struct kernfs_node *kn)
 316{
 317        if (hash < kn->hash)
 318                return -1;
 319        if (hash > kn->hash)
 320                return 1;
 321        if (ns < kn->ns)
 322                return -1;
 323        if (ns > kn->ns)
 324                return 1;
 325        return strcmp(name, kn->name);
 326}
 327
 328static int kernfs_sd_compare(const struct kernfs_node *left,
 329                             const struct kernfs_node *right)
 330{
 331        return kernfs_name_compare(left->hash, left->name, left->ns, right);
 332}
 333
 334/**
 335 *      kernfs_link_sibling - link kernfs_node into sibling rbtree
 336 *      @kn: kernfs_node of interest
 337 *
 338 *      Link @kn into its sibling rbtree which starts from
 339 *      @kn->parent->dir.children.
 340 *
 341 *      Locking:
 342 *      kernfs_rwsem held exclusive
 343 *
 344 *      RETURNS:
 345 *      0 on susccess -EEXIST on failure.
 346 */
 347static int kernfs_link_sibling(struct kernfs_node *kn)
 348{
 349        struct rb_node **node = &kn->parent->dir.children.rb_node;
 350        struct rb_node *parent = NULL;
 351
 352        while (*node) {
 353                struct kernfs_node *pos;
 354                int result;
 355
 356                pos = rb_to_kn(*node);
 357                parent = *node;
 358                result = kernfs_sd_compare(kn, pos);
 359                if (result < 0)
 360                        node = &pos->rb.rb_left;
 361                else if (result > 0)
 362                        node = &pos->rb.rb_right;
 363                else
 364                        return -EEXIST;
 365        }
 366
 367        /* add new node and rebalance the tree */
 368        rb_link_node(&kn->rb, parent, node);
 369        rb_insert_color(&kn->rb, &kn->parent->dir.children);
 370
 371        /* successfully added, account subdir number */
 372        if (kernfs_type(kn) == KERNFS_DIR)
 373                kn->parent->dir.subdirs++;
 374        kernfs_inc_rev(kn->parent);
 375
 376        return 0;
 377}
 378
 379/**
 380 *      kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
 381 *      @kn: kernfs_node of interest
 382 *
 383 *      Try to unlink @kn from its sibling rbtree which starts from
 384 *      kn->parent->dir.children.  Returns %true if @kn was actually
 385 *      removed, %false if @kn wasn't on the rbtree.
 386 *
 387 *      Locking:
 388 *      kernfs_rwsem held exclusive
 389 */
 390static bool kernfs_unlink_sibling(struct kernfs_node *kn)
 391{
 392        if (RB_EMPTY_NODE(&kn->rb))
 393                return false;
 394
 395        if (kernfs_type(kn) == KERNFS_DIR)
 396                kn->parent->dir.subdirs--;
 397        kernfs_inc_rev(kn->parent);
 398
 399        rb_erase(&kn->rb, &kn->parent->dir.children);
 400        RB_CLEAR_NODE(&kn->rb);
 401        return true;
 402}
 403
 404/**
 405 *      kernfs_get_active - get an active reference to kernfs_node
 406 *      @kn: kernfs_node to get an active reference to
 407 *
 408 *      Get an active reference of @kn.  This function is noop if @kn
 409 *      is NULL.
 410 *
 411 *      RETURNS:
 412 *      Pointer to @kn on success, NULL on failure.
 413 */
 414struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
 415{
 416        if (unlikely(!kn))
 417                return NULL;
 418
 419        if (!atomic_inc_unless_negative(&kn->active))
 420                return NULL;
 421
 422        if (kernfs_lockdep(kn))
 423                rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
 424        return kn;
 425}
 426
 427/**
 428 *      kernfs_put_active - put an active reference to kernfs_node
 429 *      @kn: kernfs_node to put an active reference to
 430 *
 431 *      Put an active reference to @kn.  This function is noop if @kn
 432 *      is NULL.
 433 */
 434void kernfs_put_active(struct kernfs_node *kn)
 435{
 436        int v;
 437
 438        if (unlikely(!kn))
 439                return;
 440
 441        if (kernfs_lockdep(kn))
 442                rwsem_release(&kn->dep_map, _RET_IP_);
 443        v = atomic_dec_return(&kn->active);
 444        if (likely(v != KN_DEACTIVATED_BIAS))
 445                return;
 446
 447        wake_up_all(&kernfs_root(kn)->deactivate_waitq);
 448}
 449
 450/**
 451 * kernfs_drain - drain kernfs_node
 452 * @kn: kernfs_node to drain
 453 *
 454 * Drain existing usages and nuke all existing mmaps of @kn.  Mutiple
 455 * removers may invoke this function concurrently on @kn and all will
 456 * return after draining is complete.
 457 */
 458static void kernfs_drain(struct kernfs_node *kn)
 459        __releases(&kernfs_rwsem) __acquires(&kernfs_rwsem)
 460{
 461        struct kernfs_root *root = kernfs_root(kn);
 462
 463        lockdep_assert_held_write(&kernfs_rwsem);
 464        WARN_ON_ONCE(kernfs_active(kn));
 465
 466        up_write(&kernfs_rwsem);
 467
 468        if (kernfs_lockdep(kn)) {
 469                rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
 470                if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
 471                        lock_contended(&kn->dep_map, _RET_IP_);
 472        }
 473
 474        /* but everyone should wait for draining */
 475        wait_event(root->deactivate_waitq,
 476                   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
 477
 478        if (kernfs_lockdep(kn)) {
 479                lock_acquired(&kn->dep_map, _RET_IP_);
 480                rwsem_release(&kn->dep_map, _RET_IP_);
 481        }
 482
 483        kernfs_drain_open_files(kn);
 484
 485        down_write(&kernfs_rwsem);
 486}
 487
 488/**
 489 * kernfs_get - get a reference count on a kernfs_node
 490 * @kn: the target kernfs_node
 491 */
 492void kernfs_get(struct kernfs_node *kn)
 493{
 494        if (kn) {
 495                WARN_ON(!atomic_read(&kn->count));
 496                atomic_inc(&kn->count);
 497        }
 498}
 499EXPORT_SYMBOL_GPL(kernfs_get);
 500
 501/**
 502 * kernfs_put - put a reference count on a kernfs_node
 503 * @kn: the target kernfs_node
 504 *
 505 * Put a reference count of @kn and destroy it if it reached zero.
 506 */
 507void kernfs_put(struct kernfs_node *kn)
 508{
 509        struct kernfs_node *parent;
 510        struct kernfs_root *root;
 511
 512        /*
 513         * kernfs_node is freed with ->count 0, kernfs_find_and_get_node_by_ino
 514         * depends on this to filter reused stale node
 515         */
 516        if (!kn || !atomic_dec_and_test(&kn->count))
 517                return;
 518        root = kernfs_root(kn);
 519 repeat:
 520        /*
 521         * Moving/renaming is always done while holding reference.
 522         * kn->parent won't change beneath us.
 523         */
 524        parent = kn->parent;
 525
 526        WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
 527                  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
 528                  parent ? parent->name : "", kn->name, atomic_read(&kn->active));
 529
 530        if (kernfs_type(kn) == KERNFS_LINK)
 531                kernfs_put(kn->symlink.target_kn);
 532
 533        kfree_const(kn->name);
 534
 535        if (kn->iattr) {
 536                simple_xattrs_free(&kn->iattr->xattrs);
 537                kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
 538        }
 539        spin_lock(&kernfs_idr_lock);
 540        idr_remove(&root->ino_idr, kernfs_ino(kn));
 541        spin_unlock(&kernfs_idr_lock);
 542        kmem_cache_free(kernfs_node_cache, kn);
 543
 544        kn = parent;
 545        if (kn) {
 546                if (atomic_dec_and_test(&kn->count))
 547                        goto repeat;
 548        } else {
 549                /* just released the root kn, free @root too */
 550                idr_destroy(&root->ino_idr);
 551                kfree(root);
 552        }
 553}
 554EXPORT_SYMBOL_GPL(kernfs_put);
 555
 556/**
 557 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
 558 * @dentry: the dentry in question
 559 *
 560 * Return the kernfs_node associated with @dentry.  If @dentry is not a
 561 * kernfs one, %NULL is returned.
 562 *
 563 * While the returned kernfs_node will stay accessible as long as @dentry
 564 * is accessible, the returned node can be in any state and the caller is
 565 * fully responsible for determining what's accessible.
 566 */
 567struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 568{
 569        if (dentry->d_sb->s_op == &kernfs_sops &&
 570            !d_really_is_negative(dentry))
 571                return kernfs_dentry_node(dentry);
 572        return NULL;
 573}
 574
 575static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 576                                             struct kernfs_node *parent,
 577                                             const char *name, umode_t mode,
 578                                             kuid_t uid, kgid_t gid,
 579                                             unsigned flags)
 580{
 581        struct kernfs_node *kn;
 582        u32 gen;
 583        int cursor;
 584        int ret;
 585
 586        name = kstrdup_const(name, GFP_KERNEL);
 587        if (!name)
 588                return NULL;
 589
 590        kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
 591        if (!kn)
 592                goto err_out1;
 593
 594        idr_preload(GFP_KERNEL);
 595        spin_lock(&kernfs_idr_lock);
 596        cursor = idr_get_cursor(&root->ino_idr);
 597        ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC);
 598        if (ret >= 0 && ret < cursor)
 599                root->next_generation++;
 600        gen = root->next_generation;
 601        spin_unlock(&kernfs_idr_lock);
 602        idr_preload_end();
 603        if (ret < 0)
 604                goto err_out2;
 605
 606        kn->id = (u64)gen << 32 | ret;
 607
 608        /*
 609         * set ino first. This RELEASE is paired with atomic_inc_not_zero in
 610         * kernfs_find_and_get_node_by_ino
 611         */
 612        atomic_set_release(&kn->count, 1);
 613        atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
 614        RB_CLEAR_NODE(&kn->rb);
 615
 616        kn->name = name;
 617        kn->mode = mode;
 618        kn->flags = flags;
 619
 620        if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
 621                struct iattr iattr = {
 622                        .ia_valid = ATTR_UID | ATTR_GID,
 623                        .ia_uid = uid,
 624                        .ia_gid = gid,
 625                };
 626
 627                ret = __kernfs_setattr(kn, &iattr);
 628                if (ret < 0)
 629                        goto err_out3;
 630        }
 631
 632        if (parent) {
 633                ret = security_kernfs_init_security(parent, kn);
 634                if (ret)
 635                        goto err_out3;
 636        }
 637
 638        return kn;
 639
 640 err_out3:
 641        idr_remove(&root->ino_idr, kernfs_ino(kn));
 642 err_out2:
 643        kmem_cache_free(kernfs_node_cache, kn);
 644 err_out1:
 645        kfree_const(name);
 646        return NULL;
 647}
 648
 649struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 650                                    const char *name, umode_t mode,
 651                                    kuid_t uid, kgid_t gid,
 652                                    unsigned flags)
 653{
 654        struct kernfs_node *kn;
 655
 656        kn = __kernfs_new_node(kernfs_root(parent), parent,
 657                               name, mode, uid, gid, flags);
 658        if (kn) {
 659                kernfs_get(parent);
 660                kn->parent = parent;
 661        }
 662        return kn;
 663}
 664
 665/*
 666 * kernfs_find_and_get_node_by_ino - get kernfs_node from inode number
 667 * @root: the kernfs root
 668 * @ino: inode number
 669 *
 670 * RETURNS:
 671 * NULL on failure. Return a kernfs node with reference counter incremented
 672 */
 673struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
 674                                                    unsigned int ino)
 675{
 676        struct kernfs_node *kn;
 677
 678        rcu_read_lock();
 679        kn = idr_find(&root->ino_idr, ino);
 680        if (!kn)
 681                goto out;
 682
 683        /*
 684         * Since kernfs_node is freed in RCU, it's possible an old node for ino
 685         * is freed, but reused before RCU grace period. But a freed node (see
 686         * kernfs_put) or an incompletedly initialized node (see
 687         * __kernfs_new_node) should have 'count' 0. We can use this fact to
 688         * filter out such node.
 689         */
 690        if (!atomic_inc_not_zero(&kn->count)) {
 691                kn = NULL;
 692                goto out;
 693        }
 694
 695        /*
 696         * The node could be a new node or a reused node. If it's a new node,
 697         * we are ok. If it's reused because of RCU (because of
 698         * SLAB_TYPESAFE_BY_RCU), the __kernfs_new_node always sets its 'ino'
 699         * before 'count'. So if 'count' is uptodate, 'ino' should be uptodate,
 700         * hence we can use 'ino' to filter stale node.
 701         */
 702        if (kernfs_ino(kn) != ino)
 703                goto out;
 704        rcu_read_unlock();
 705
 706        return kn;
 707out:
 708        rcu_read_unlock();
 709        kernfs_put(kn);
 710        return NULL;
 711}
 712
 713/**
 714 *      kernfs_add_one - add kernfs_node to parent without warning
 715 *      @kn: kernfs_node to be added
 716 *
 717 *      The caller must already have initialized @kn->parent.  This
 718 *      function increments nlink of the parent's inode if @kn is a
 719 *      directory and link into the children list of the parent.
 720 *
 721 *      RETURNS:
 722 *      0 on success, -EEXIST if entry with the given name already
 723 *      exists.
 724 */
 725int kernfs_add_one(struct kernfs_node *kn)
 726{
 727        struct kernfs_node *parent = kn->parent;
 728        struct kernfs_iattrs *ps_iattr;
 729        bool has_ns;
 730        int ret;
 731
 732        down_write(&kernfs_rwsem);
 733
 734        ret = -EINVAL;
 735        has_ns = kernfs_ns_enabled(parent);
 736        if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 737                 has_ns ? "required" : "invalid", parent->name, kn->name))
 738                goto out_unlock;
 739
 740        if (kernfs_type(parent) != KERNFS_DIR)
 741                goto out_unlock;
 742
 743        ret = -ENOENT;
 744        if (parent->flags & KERNFS_EMPTY_DIR)
 745                goto out_unlock;
 746
 747        if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
 748                goto out_unlock;
 749
 750        kn->hash = kernfs_name_hash(kn->name, kn->ns);
 751
 752        ret = kernfs_link_sibling(kn);
 753        if (ret)
 754                goto out_unlock;
 755
 756        /* Update timestamps on the parent */
 757        ps_iattr = parent->iattr;
 758        if (ps_iattr) {
 759                ktime_get_real_ts64(&ps_iattr->ia_ctime);
 760                ps_iattr->ia_mtime = ps_iattr->ia_ctime;
 761        }
 762
 763        up_write(&kernfs_rwsem);
 764
 765        /*
 766         * Activate the new node unless CREATE_DEACTIVATED is requested.
 767         * If not activated here, the kernfs user is responsible for
 768         * activating the node with kernfs_activate().  A node which hasn't
 769         * been activated is not visible to userland and its removal won't
 770         * trigger deactivation.
 771         */
 772        if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
 773                kernfs_activate(kn);
 774        return 0;
 775
 776out_unlock:
 777        up_write(&kernfs_rwsem);
 778        return ret;
 779}
 780
 781/**
 782 * kernfs_find_ns - find kernfs_node with the given name
 783 * @parent: kernfs_node to search under
 784 * @name: name to look for
 785 * @ns: the namespace tag to use
 786 *
 787 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 788 * the found kernfs_node on success, %NULL on failure.
 789 */
 790static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 791                                          const unsigned char *name,
 792                                          const void *ns)
 793{
 794        struct rb_node *node = parent->dir.children.rb_node;
 795        bool has_ns = kernfs_ns_enabled(parent);
 796        unsigned int hash;
 797
 798        lockdep_assert_held(&kernfs_rwsem);
 799
 800        if (has_ns != (bool)ns) {
 801                WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 802                     has_ns ? "required" : "invalid", parent->name, name);
 803                return NULL;
 804        }
 805
 806        hash = kernfs_name_hash(name, ns);
 807        while (node) {
 808                struct kernfs_node *kn;
 809                int result;
 810
 811                kn = rb_to_kn(node);
 812                result = kernfs_name_compare(hash, name, ns, kn);
 813                if (result < 0)
 814                        node = node->rb_left;
 815                else if (result > 0)
 816                        node = node->rb_right;
 817                else
 818                        return kn;
 819        }
 820        return NULL;
 821}
 822
 823static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
 824                                          const unsigned char *path,
 825                                          const void *ns)
 826{
 827        size_t len;
 828        char *p, *name;
 829
 830        lockdep_assert_held_read(&kernfs_rwsem);
 831
 832        /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
 833        spin_lock_irq(&kernfs_rename_lock);
 834
 835        len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
 836
 837        if (len >= sizeof(kernfs_pr_cont_buf)) {
 838                spin_unlock_irq(&kernfs_rename_lock);
 839                return NULL;
 840        }
 841
 842        p = kernfs_pr_cont_buf;
 843
 844        while ((name = strsep(&p, "/")) && parent) {
 845                if (*name == '\0')
 846                        continue;
 847                parent = kernfs_find_ns(parent, name, ns);
 848        }
 849
 850        spin_unlock_irq(&kernfs_rename_lock);
 851
 852        return parent;
 853}
 854
 855/**
 856 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 857 * @parent: kernfs_node to search under
 858 * @name: name to look for
 859 * @ns: the namespace tag to use
 860 *
 861 * Look for kernfs_node with name @name under @parent and get a reference
 862 * if found.  This function may sleep and returns pointer to the found
 863 * kernfs_node on success, %NULL on failure.
 864 */
 865struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
 866                                           const char *name, const void *ns)
 867{
 868        struct kernfs_node *kn;
 869
 870        down_read(&kernfs_rwsem);
 871        kn = kernfs_find_ns(parent, name, ns);
 872        kernfs_get(kn);
 873        up_read(&kernfs_rwsem);
 874
 875        return kn;
 876}
 877EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
 878
 879/**
 880 * kernfs_walk_and_get_ns - find and get kernfs_node with the given path
 881 * @parent: kernfs_node to search under
 882 * @path: path to look for
 883 * @ns: the namespace tag to use
 884 *
 885 * Look for kernfs_node with path @path under @parent and get a reference
 886 * if found.  This function may sleep and returns pointer to the found
 887 * kernfs_node on success, %NULL on failure.
 888 */
 889struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
 890                                           const char *path, const void *ns)
 891{
 892        struct kernfs_node *kn;
 893
 894        down_read(&kernfs_rwsem);
 895        kn = kernfs_walk_ns(parent, path, ns);
 896        kernfs_get(kn);
 897        up_read(&kernfs_rwsem);
 898
 899        return kn;
 900}
 901
 902/**
 903 * kernfs_create_root - create a new kernfs hierarchy
 904 * @scops: optional syscall operations for the hierarchy
 905 * @flags: KERNFS_ROOT_* flags
 906 * @priv: opaque data associated with the new directory
 907 *
 908 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 909 * failure.
 910 */
 911struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 912                                       unsigned int flags, void *priv)
 913{
 914        struct kernfs_root *root;
 915        struct kernfs_node *kn;
 916
 917        root = kzalloc(sizeof(*root), GFP_KERNEL);
 918        if (!root)
 919                return ERR_PTR(-ENOMEM);
 920
 921        idr_init(&root->ino_idr);
 922        INIT_LIST_HEAD(&root->supers);
 923        root->next_generation = 1;
 924
 925        kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
 926                               GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 927                               KERNFS_DIR);
 928        if (!kn) {
 929                idr_destroy(&root->ino_idr);
 930                kfree(root);
 931                return ERR_PTR(-ENOMEM);
 932        }
 933
 934        kn->priv = priv;
 935        kn->dir.root = root;
 936
 937        root->syscall_ops = scops;
 938        root->flags = flags;
 939        root->kn = kn;
 940        init_waitqueue_head(&root->deactivate_waitq);
 941
 942        if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
 943                kernfs_activate(kn);
 944
 945        return root;
 946}
 947
 948/**
 949 * kernfs_destroy_root - destroy a kernfs hierarchy
 950 * @root: root of the hierarchy to destroy
 951 *
 952 * Destroy the hierarchy anchored at @root by removing all existing
 953 * directories and destroying @root.
 954 */
 955void kernfs_destroy_root(struct kernfs_root *root)
 956{
 957        kernfs_remove(root->kn);        /* will also free @root */
 958}
 959
 960/**
 961 * kernfs_create_dir_ns - create a directory
 962 * @parent: parent in which to create a new directory
 963 * @name: name of the new directory
 964 * @mode: mode of the new directory
 965 * @uid: uid of the new directory
 966 * @gid: gid of the new directory
 967 * @priv: opaque data associated with the new directory
 968 * @ns: optional namespace tag of the directory
 969 *
 970 * Returns the created node on success, ERR_PTR() value on failure.
 971 */
 972struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 973                                         const char *name, umode_t mode,
 974                                         kuid_t uid, kgid_t gid,
 975                                         void *priv, const void *ns)
 976{
 977        struct kernfs_node *kn;
 978        int rc;
 979
 980        /* allocate */
 981        kn = kernfs_new_node(parent, name, mode | S_IFDIR,
 982                             uid, gid, KERNFS_DIR);
 983        if (!kn)
 984                return ERR_PTR(-ENOMEM);
 985
 986        kn->dir.root = parent->dir.root;
 987        kn->ns = ns;
 988        kn->priv = priv;
 989
 990        /* link in */
 991        rc = kernfs_add_one(kn);
 992        if (!rc)
 993                return kn;
 994
 995        kernfs_put(kn);
 996        return ERR_PTR(rc);
 997}
 998
 999/**
1000 * kernfs_create_empty_dir - create an always empty directory
1001 * @parent: parent in which to create a new directory
1002 * @name: name of the new directory
1003 *
1004 * Returns the created node on success, ERR_PTR() value on failure.
1005 */
1006struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
1007                                            const char *name)
1008{
1009        struct kernfs_node *kn;
1010        int rc;
1011
1012        /* allocate */
1013        kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
1014                             GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
1015        if (!kn)
1016                return ERR_PTR(-ENOMEM);
1017
1018        kn->flags |= KERNFS_EMPTY_DIR;
1019        kn->dir.root = parent->dir.root;
1020        kn->ns = NULL;
1021        kn->priv = NULL;
1022
1023        /* link in */
1024        rc = kernfs_add_one(kn);
1025        if (!rc)
1026                return kn;
1027
1028        kernfs_put(kn);
1029        return ERR_PTR(rc);
1030}
1031
1032static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
1033{
1034        struct kernfs_node *kn;
1035
1036        if (flags & LOOKUP_RCU)
1037                return -ECHILD;
1038
1039        /* Negative hashed dentry? */
1040        if (d_really_is_negative(dentry)) {
1041                struct kernfs_node *parent;
1042
1043                /* If the kernfs parent node has changed discard and
1044                 * proceed to ->lookup.
1045                 */
1046                down_read(&kernfs_rwsem);
1047                spin_lock(&dentry->d_lock);
1048                parent = kernfs_dentry_node(dentry->d_parent);
1049                if (parent) {
1050                        if (kernfs_dir_changed(parent, dentry)) {
1051                                spin_unlock(&dentry->d_lock);
1052                                up_read(&kernfs_rwsem);
1053                                return 0;
1054                        }
1055                }
1056                spin_unlock(&dentry->d_lock);
1057                up_read(&kernfs_rwsem);
1058
1059                /* The kernfs parent node hasn't changed, leave the
1060                 * dentry negative and return success.
1061                 */
1062                return 1;
1063        }
1064
1065        kn = kernfs_dentry_node(dentry);
1066        down_read(&kernfs_rwsem);
1067
1068        /* The kernfs node has been deactivated */
1069        if (!kernfs_active(kn))
1070                goto out_bad;
1071
1072        /* The kernfs node has been moved? */
1073        if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
1074                goto out_bad;
1075
1076        /* The kernfs node has been renamed */
1077        if (strcmp(dentry->d_name.name, kn->name) != 0)
1078                goto out_bad;
1079
1080        /* The kernfs node has been moved to a different namespace */
1081        if (kn->parent && kernfs_ns_enabled(kn->parent) &&
1082            kernfs_info(dentry->d_sb)->ns != kn->ns)
1083                goto out_bad;
1084
1085        up_read(&kernfs_rwsem);
1086        return 1;
1087out_bad:
1088        up_read(&kernfs_rwsem);
1089        return 0;
1090}
1091
1092const struct dentry_operations kernfs_dops = {
1093        .d_revalidate   = kernfs_dop_revalidate,
1094};
1095
1096static struct dentry *kernfs_iop_lookup(struct inode *dir,
1097                                        struct dentry *dentry,
1098                                        unsigned int flags)
1099{
1100        struct kernfs_node *parent = dir->i_private;
1101        struct kernfs_node *kn;
1102        struct inode *inode = NULL;
1103        const void *ns = NULL;
1104
1105        down_read(&kernfs_rwsem);
1106        if (kernfs_ns_enabled(parent))
1107                ns = kernfs_info(dir->i_sb)->ns;
1108
1109        kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
1110        /* attach dentry and inode */
1111        if (kn && kernfs_active(kn)) {
1112                inode = kernfs_get_inode(dir->i_sb, kn);
1113                if (!inode)
1114                        inode = ERR_PTR(-ENOMEM);
1115        }
1116        /* Needed only for negative dentry validation */
1117        if (!inode)
1118                kernfs_set_rev(parent, dentry);
1119        up_read(&kernfs_rwsem);
1120
1121        /* instantiate and hash (possibly negative) dentry */
1122        return d_splice_alias(inode, dentry);
1123}
1124
1125static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
1126                            umode_t mode)
1127{
1128        struct kernfs_node *parent = dir->i_private;
1129        struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
1130        int ret;
1131
1132        if (!scops || !scops->mkdir)
1133                return -EPERM;
1134
1135        if (!kernfs_get_active(parent))
1136                return -ENODEV;
1137
1138        ret = scops->mkdir(parent, dentry->d_name.name, mode);
1139
1140        kernfs_put_active(parent);
1141        return ret;
1142}
1143
1144static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
1145{
1146        struct kernfs_node *kn  = kernfs_dentry_node(dentry);
1147        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1148        int ret;
1149
1150        if (!scops || !scops->rmdir)
1151                return -EPERM;
1152
1153        if (!kernfs_get_active(kn))
1154                return -ENODEV;
1155
1156        ret = scops->rmdir(kn);
1157
1158        kernfs_put_active(kn);
1159        return ret;
1160}
1161
1162static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
1163                             struct inode *new_dir, struct dentry *new_dentry,
1164                             unsigned int flags)
1165{
1166        struct kernfs_node *kn = kernfs_dentry_node(old_dentry);
1167        struct kernfs_node *new_parent = new_dir->i_private;
1168        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
1169        int ret;
1170
1171        if (flags)
1172                return -EINVAL;
1173
1174        if (!scops || !scops->rename)
1175                return -EPERM;
1176
1177        if (!kernfs_get_active(kn))
1178                return -ENODEV;
1179
1180        if (!kernfs_get_active(new_parent)) {
1181                kernfs_put_active(kn);
1182                return -ENODEV;
1183        }
1184
1185        ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
1186
1187        kernfs_put_active(new_parent);
1188        kernfs_put_active(kn);
1189        return ret;
1190}
1191
1192const struct inode_operations kernfs_dir_iops = {
1193        .lookup         = kernfs_iop_lookup,
1194        .permission     = kernfs_iop_permission,
1195        .setattr        = kernfs_iop_setattr,
1196        .getattr        = kernfs_iop_getattr,
1197        .listxattr      = kernfs_iop_listxattr,
1198
1199        .mkdir          = kernfs_iop_mkdir,
1200        .rmdir          = kernfs_iop_rmdir,
1201        .rename         = kernfs_iop_rename,
1202};
1203
1204static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
1205{
1206        struct kernfs_node *last;
1207
1208        while (true) {
1209                struct rb_node *rbn;
1210
1211                last = pos;
1212
1213                if (kernfs_type(pos) != KERNFS_DIR)
1214                        break;
1215
1216                rbn = rb_first(&pos->dir.children);
1217                if (!rbn)
1218                        break;
1219
1220                pos = rb_to_kn(rbn);
1221        }
1222
1223        return last;
1224}
1225
1226/**
1227 * kernfs_next_descendant_post - find the next descendant for post-order walk
1228 * @pos: the current position (%NULL to initiate traversal)
1229 * @root: kernfs_node whose descendants to walk
1230 *
1231 * Find the next descendant to visit for post-order traversal of @root's
1232 * descendants.  @root is included in the iteration and the last node to be
1233 * visited.
1234 */
1235static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
1236                                                       struct kernfs_node *root)
1237{
1238        struct rb_node *rbn;
1239
1240        lockdep_assert_held_write(&kernfs_rwsem);
1241
1242        /* if first iteration, visit leftmost descendant which may be root */
1243        if (!pos)
1244                return kernfs_leftmost_descendant(root);
1245
1246        /* if we visited @root, we're done */
1247        if (pos == root)
1248                return NULL;
1249
1250        /* if there's an unvisited sibling, visit its leftmost descendant */
1251        rbn = rb_next(&pos->rb);
1252        if (rbn)
1253                return kernfs_leftmost_descendant(rb_to_kn(rbn));
1254
1255        /* no sibling left, visit parent */
1256        return pos->parent;
1257}
1258
1259/**
1260 * kernfs_activate - activate a node which started deactivated
1261 * @kn: kernfs_node whose subtree is to be activated
1262 *
1263 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
1264 * needs to be explicitly activated.  A node which hasn't been activated
1265 * isn't visible to userland and deactivation is skipped during its
1266 * removal.  This is useful to construct atomic init sequences where
1267 * creation of multiple nodes should either succeed or fail atomically.
1268 *
1269 * The caller is responsible for ensuring that this function is not called
1270 * after kernfs_remove*() is invoked on @kn.
1271 */
1272void kernfs_activate(struct kernfs_node *kn)
1273{
1274        struct kernfs_node *pos;
1275
1276        down_write(&kernfs_rwsem);
1277
1278        pos = NULL;
1279        while ((pos = kernfs_next_descendant_post(pos, kn))) {
1280                if (!pos || (pos->flags & KERNFS_ACTIVATED))
1281                        continue;
1282
1283                WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1284                WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1285
1286                atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1287                pos->flags |= KERNFS_ACTIVATED;
1288        }
1289
1290        up_write(&kernfs_rwsem);
1291}
1292
1293static void __kernfs_remove(struct kernfs_node *kn)
1294{
1295        struct kernfs_node *pos;
1296
1297        lockdep_assert_held_write(&kernfs_rwsem);
1298
1299        /*
1300         * Short-circuit if non-root @kn has already finished removal.
1301         * This is for kernfs_remove_self() which plays with active ref
1302         * after removal.
1303         */
1304        if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1305                return;
1306
1307        pr_debug("kernfs %s: removing\n", kn->name);
1308
1309        /* prevent any new usage under @kn by deactivating all nodes */
1310        pos = NULL;
1311        while ((pos = kernfs_next_descendant_post(pos, kn)))
1312                if (kernfs_active(pos))
1313                        atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1314
1315        /* deactivate and unlink the subtree node-by-node */
1316        do {
1317                pos = kernfs_leftmost_descendant(kn);
1318
1319                /*
1320                 * kernfs_drain() drops kernfs_rwsem temporarily and @pos's
1321                 * base ref could have been put by someone else by the time
1322                 * the function returns.  Make sure it doesn't go away
1323                 * underneath us.
1324                 */
1325                kernfs_get(pos);
1326
1327                /*
1328                 * Drain iff @kn was activated.  This avoids draining and
1329                 * its lockdep annotations for nodes which have never been
1330                 * activated and allows embedding kernfs_remove() in create
1331                 * error paths without worrying about draining.
1332                 */
1333                if (kn->flags & KERNFS_ACTIVATED)
1334                        kernfs_drain(pos);
1335                else
1336                        WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1337
1338                /*
1339                 * kernfs_unlink_sibling() succeeds once per node.  Use it
1340                 * to decide who's responsible for cleanups.
1341                 */
1342                if (!pos->parent || kernfs_unlink_sibling(pos)) {
1343                        struct kernfs_iattrs *ps_iattr =
1344                                pos->parent ? pos->parent->iattr : NULL;
1345
1346                        /* update timestamps on the parent */
1347                        if (ps_iattr) {
1348                                ktime_get_real_ts64(&ps_iattr->ia_ctime);
1349                                ps_iattr->ia_mtime = ps_iattr->ia_ctime;
1350                        }
1351
1352                        kernfs_put(pos);
1353                }
1354
1355                kernfs_put(pos);
1356        } while (pos != kn);
1357}
1358
1359/**
1360 * kernfs_remove - remove a kernfs_node recursively
1361 * @kn: the kernfs_node to remove
1362 *
1363 * Remove @kn along with all its subdirectories and files.
1364 */
1365void kernfs_remove(struct kernfs_node *kn)
1366{
1367        down_write(&kernfs_rwsem);
1368        __kernfs_remove(kn);
1369        up_write(&kernfs_rwsem);
1370}
1371
1372/**
1373 * kernfs_break_active_protection - break out of active protection
1374 * @kn: the self kernfs_node
1375 *
1376 * The caller must be running off of a kernfs operation which is invoked
1377 * with an active reference - e.g. one of kernfs_ops.  Each invocation of
1378 * this function must also be matched with an invocation of
1379 * kernfs_unbreak_active_protection().
1380 *
1381 * This function releases the active reference of @kn the caller is
1382 * holding.  Once this function is called, @kn may be removed at any point
1383 * and the caller is solely responsible for ensuring that the objects it
1384 * dereferences are accessible.
1385 */
1386void kernfs_break_active_protection(struct kernfs_node *kn)
1387{
1388        /*
1389         * Take out ourself out of the active ref dependency chain.  If
1390         * we're called without an active ref, lockdep will complain.
1391         */
1392        kernfs_put_active(kn);
1393}
1394
1395/**
1396 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1397 * @kn: the self kernfs_node
1398 *
1399 * If kernfs_break_active_protection() was called, this function must be
1400 * invoked before finishing the kernfs operation.  Note that while this
1401 * function restores the active reference, it doesn't and can't actually
1402 * restore the active protection - @kn may already or be in the process of
1403 * being removed.  Once kernfs_break_active_protection() is invoked, that
1404 * protection is irreversibly gone for the kernfs operation instance.
1405 *
1406 * While this function may be called at any point after
1407 * kernfs_break_active_protection() is invoked, its most useful location
1408 * would be right before the enclosing kernfs operation returns.
1409 */
1410void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1411{
1412        /*
1413         * @kn->active could be in any state; however, the increment we do
1414         * here will be undone as soon as the enclosing kernfs operation
1415         * finishes and this temporary bump can't break anything.  If @kn
1416         * is alive, nothing changes.  If @kn is being deactivated, the
1417         * soon-to-follow put will either finish deactivation or restore
1418         * deactivated state.  If @kn is already removed, the temporary
1419         * bump is guaranteed to be gone before @kn is released.
1420         */
1421        atomic_inc(&kn->active);
1422        if (kernfs_lockdep(kn))
1423                rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1424}
1425
1426/**
1427 * kernfs_remove_self - remove a kernfs_node from its own method
1428 * @kn: the self kernfs_node to remove
1429 *
1430 * The caller must be running off of a kernfs operation which is invoked
1431 * with an active reference - e.g. one of kernfs_ops.  This can be used to
1432 * implement a file operation which deletes itself.
1433 *
1434 * For example, the "delete" file for a sysfs device directory can be
1435 * implemented by invoking kernfs_remove_self() on the "delete" file
1436 * itself.  This function breaks the circular dependency of trying to
1437 * deactivate self while holding an active ref itself.  It isn't necessary
1438 * to modify the usual removal path to use kernfs_remove_self().  The
1439 * "delete" implementation can simply invoke kernfs_remove_self() on self
1440 * before proceeding with the usual removal path.  kernfs will ignore later
1441 * kernfs_remove() on self.
1442 *
1443 * kernfs_remove_self() can be called multiple times concurrently on the
1444 * same kernfs_node.  Only the first one actually performs removal and
1445 * returns %true.  All others will wait until the kernfs operation which
1446 * won self-removal finishes and return %false.  Note that the losers wait
1447 * for the completion of not only the winning kernfs_remove_self() but also
1448 * the whole kernfs_ops which won the arbitration.  This can be used to
1449 * guarantee, for example, all concurrent writes to a "delete" file to
1450 * finish only after the whole operation is complete.
1451 */
1452bool kernfs_remove_self(struct kernfs_node *kn)
1453{
1454        bool ret;
1455
1456        down_write(&kernfs_rwsem);
1457        kernfs_break_active_protection(kn);
1458
1459        /*
1460         * SUICIDAL is used to arbitrate among competing invocations.  Only
1461         * the first one will actually perform removal.  When the removal
1462         * is complete, SUICIDED is set and the active ref is restored
1463         * while kernfs_rwsem for held exclusive.  The ones which lost
1464         * arbitration waits for SUICIDED && drained which can happen only
1465         * after the enclosing kernfs operation which executed the winning
1466         * instance of kernfs_remove_self() finished.
1467         */
1468        if (!(kn->flags & KERNFS_SUICIDAL)) {
1469                kn->flags |= KERNFS_SUICIDAL;
1470                __kernfs_remove(kn);
1471                kn->flags |= KERNFS_SUICIDED;
1472                ret = true;
1473        } else {
1474                wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1475                DEFINE_WAIT(wait);
1476
1477                while (true) {
1478                        prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1479
1480                        if ((kn->flags & KERNFS_SUICIDED) &&
1481                            atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1482                                break;
1483
1484                        up_write(&kernfs_rwsem);
1485                        schedule();
1486                        down_write(&kernfs_rwsem);
1487                }
1488                finish_wait(waitq, &wait);
1489                WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1490                ret = false;
1491        }
1492
1493        /*
1494         * This must be done while kernfs_rwsem held exclusive; otherwise,
1495         * waiting for SUICIDED && deactivated could finish prematurely.
1496         */
1497        kernfs_unbreak_active_protection(kn);
1498
1499        up_write(&kernfs_rwsem);
1500        return ret;
1501}
1502
1503/**
1504 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1505 * @parent: parent of the target
1506 * @name: name of the kernfs_node to remove
1507 * @ns: namespace tag of the kernfs_node to remove
1508 *
1509 * Look for the kernfs_node with @name and @ns under @parent and remove it.
1510 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1511 */
1512int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1513                             const void *ns)
1514{
1515        struct kernfs_node *kn;
1516
1517        if (!parent) {
1518                WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1519                        name);
1520                return -ENOENT;
1521        }
1522
1523        down_write(&kernfs_rwsem);
1524
1525        kn = kernfs_find_ns(parent, name, ns);
1526        if (kn)
1527                __kernfs_remove(kn);
1528
1529        up_write(&kernfs_rwsem);
1530
1531        if (kn)
1532                return 0;
1533        else
1534                return -ENOENT;
1535}
1536
1537/**
1538 * kernfs_rename_ns - move and rename a kernfs_node
1539 * @kn: target node
1540 * @new_parent: new parent to put @sd under
1541 * @new_name: new name
1542 * @new_ns: new namespace tag
1543 */
1544int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1545                     const char *new_name, const void *new_ns)
1546{
1547        struct kernfs_node *old_parent;
1548        const char *old_name = NULL;
1549        int error;
1550
1551        /* can't move or rename root */
1552        if (!kn->parent)
1553                return -EINVAL;
1554
1555        down_write(&kernfs_rwsem);
1556
1557        error = -ENOENT;
1558        if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1559            (new_parent->flags & KERNFS_EMPTY_DIR))
1560                goto out;
1561
1562        error = 0;
1563        if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1564            (strcmp(kn->name, new_name) == 0))
1565                goto out;       /* nothing to rename */
1566
1567        error = -EEXIST;
1568        if (kernfs_find_ns(new_parent, new_name, new_ns))
1569                goto out;
1570
1571        /* rename kernfs_node */
1572        if (strcmp(kn->name, new_name) != 0) {
1573                error = -ENOMEM;
1574                new_name = kstrdup_const(new_name, GFP_KERNEL);
1575                if (!new_name)
1576                        goto out;
1577        } else {
1578                new_name = NULL;
1579        }
1580
1581        /*
1582         * Move to the appropriate place in the appropriate directories rbtree.
1583         */
1584        kernfs_unlink_sibling(kn);
1585        kernfs_get(new_parent);
1586
1587        /* rename_lock protects ->parent and ->name accessors */
1588        spin_lock_irq(&kernfs_rename_lock);
1589
1590        old_parent = kn->parent;
1591        kn->parent = new_parent;
1592
1593        kn->ns = new_ns;
1594        if (new_name) {
1595                old_name = kn->name;
1596                kn->name = new_name;
1597        }
1598
1599        spin_unlock_irq(&kernfs_rename_lock);
1600
1601        kn->hash = kernfs_name_hash(kn->name, kn->ns);
1602        kernfs_link_sibling(kn);
1603
1604        kernfs_put(old_parent);
1605        kfree_const(old_name);
1606
1607        error = 0;
1608 out:
1609        up_write(&kernfs_rwsem);
1610        return error;
1611}
1612
1613/* Relationship between s_mode and the DT_xxx types */
1614static inline unsigned char dt_type(struct kernfs_node *kn)
1615{
1616        return (kn->mode >> 12) & 15;
1617}
1618
1619static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1620{
1621        kernfs_put(filp->private_data);
1622        return 0;
1623}
1624
1625static struct kernfs_node *kernfs_dir_pos(const void *ns,
1626        struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1627{
1628        if (pos) {
1629                int valid = kernfs_active(pos) &&
1630                        pos->parent == parent && hash == pos->hash;
1631                kernfs_put(pos);
1632                if (!valid)
1633                        pos = NULL;
1634        }
1635        if (!pos && (hash > 1) && (hash < INT_MAX)) {
1636                struct rb_node *node = parent->dir.children.rb_node;
1637                while (node) {
1638                        pos = rb_to_kn(node);
1639
1640                        if (hash < pos->hash)
1641                                node = node->rb_left;
1642                        else if (hash > pos->hash)
1643                                node = node->rb_right;
1644                        else
1645                                break;
1646                }
1647        }
1648        /* Skip over entries which are dying/dead or in the wrong namespace */
1649        while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1650                struct rb_node *node = rb_next(&pos->rb);
1651                if (!node)
1652                        pos = NULL;
1653                else
1654                        pos = rb_to_kn(node);
1655        }
1656        return pos;
1657}
1658
1659static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1660        struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1661{
1662        pos = kernfs_dir_pos(ns, parent, ino, pos);
1663        if (pos) {
1664                do {
1665                        struct rb_node *node = rb_next(&pos->rb);
1666                        if (!node)
1667                                pos = NULL;
1668                        else
1669                                pos = rb_to_kn(node);
1670                } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1671        }
1672        return pos;
1673}
1674
1675static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1676{
1677        struct dentry *dentry = file->f_path.dentry;
1678        struct kernfs_node *parent = kernfs_dentry_node(dentry);
1679        struct kernfs_node *pos = file->private_data;
1680        const void *ns = NULL;
1681
1682        if (!dir_emit_dots(file, ctx))
1683                return 0;
1684        down_read(&kernfs_rwsem);
1685
1686        if (kernfs_ns_enabled(parent))
1687                ns = kernfs_info(dentry->d_sb)->ns;
1688
1689        for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1690             pos;
1691             pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1692                const char *name = pos->name;
1693                unsigned int type = dt_type(pos);
1694                int len = strlen(name);
1695                ino_t ino = kernfs_ino(pos);
1696
1697                ctx->pos = pos->hash;
1698                file->private_data = pos;
1699                kernfs_get(pos);
1700
1701                up_read(&kernfs_rwsem);
1702                if (!dir_emit(ctx, name, len, ino, type))
1703                        return 0;
1704                down_read(&kernfs_rwsem);
1705        }
1706        up_read(&kernfs_rwsem);
1707        file->private_data = NULL;
1708        ctx->pos = INT_MAX;
1709        return 0;
1710}
1711
1712const struct file_operations kernfs_dir_fops = {
1713        .read           = generic_read_dir,
1714        .iterate_shared = kernfs_fop_readdir,
1715        .release        = kernfs_dir_fop_release,
1716        .llseek         = generic_file_llseek,
1717};
1718