linux/fs/kernfs/dir.c
<<
>>
Prefs
   1/*
   2 * fs/kernfs/dir.c - kernfs directory implementation
   3 *
   4 * Copyright (c) 2001-3 Patrick Mochel
   5 * Copyright (c) 2007 SUSE Linux Products GmbH
   6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
   7 *
   8 * This file is released under the GPLv2.
   9 */
  10
  11#include <linux/sched.h>
  12#include <linux/fs.h>
  13#include <linux/namei.h>
  14#include <linux/idr.h>
  15#include <linux/slab.h>
  16#include <linux/security.h>
  17#include <linux/hash.h>
  18
  19#include "kernfs-internal.h"
  20
  21DEFINE_MUTEX(kernfs_mutex);
  22static DEFINE_SPINLOCK(kernfs_rename_lock);     /* kn->parent and ->name */
  23static char kernfs_pr_cont_buf[PATH_MAX];       /* protected by rename_lock */
  24
  25#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
  26
  27static bool kernfs_active(struct kernfs_node *kn)
  28{
  29        lockdep_assert_held(&kernfs_mutex);
  30        return atomic_read(&kn->active) >= 0;
  31}
  32
  33static bool kernfs_lockdep(struct kernfs_node *kn)
  34{
  35#ifdef CONFIG_DEBUG_LOCK_ALLOC
  36        return kn->flags & KERNFS_LOCKDEP;
  37#else
  38        return false;
  39#endif
  40}
  41
  42static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
  43{
  44        return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
  45}
  46
  47static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
  48                                              size_t buflen)
  49{
  50        char *p = buf + buflen;
  51        int len;
  52
  53        *--p = '\0';
  54
  55        do {
  56                len = strlen(kn->name);
  57                if (p - buf < len + 1) {
  58                        buf[0] = '\0';
  59                        p = NULL;
  60                        break;
  61                }
  62                p -= len;
  63                memcpy(p, kn->name, len);
  64                *--p = '/';
  65                kn = kn->parent;
  66        } while (kn && kn->parent);
  67
  68        return p;
  69}
  70
  71/**
  72 * kernfs_name - obtain the name of a given node
  73 * @kn: kernfs_node of interest
  74 * @buf: buffer to copy @kn's name into
  75 * @buflen: size of @buf
  76 *
  77 * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
  78 * similar to strlcpy().  It returns the length of @kn's name and if @buf
  79 * isn't long enough, it's filled upto @buflen-1 and nul terminated.
  80 *
  81 * This function can be called from any context.
  82 */
  83int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
  84{
  85        unsigned long flags;
  86        int ret;
  87
  88        spin_lock_irqsave(&kernfs_rename_lock, flags);
  89        ret = kernfs_name_locked(kn, buf, buflen);
  90        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
  91        return ret;
  92}
  93
  94/**
  95 * kernfs_path - build full path of a given node
  96 * @kn: kernfs_node of interest
  97 * @buf: buffer to copy @kn's name into
  98 * @buflen: size of @buf
  99 *
 100 * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
 101 * path is built from the end of @buf so the returned pointer usually
 102 * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
 103 * and %NULL is returned.
 104 */
 105char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
 106{
 107        unsigned long flags;
 108        char *p;
 109
 110        spin_lock_irqsave(&kernfs_rename_lock, flags);
 111        p = kernfs_path_locked(kn, buf, buflen);
 112        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 113        return p;
 114}
 115
 116/**
 117 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
 118 * @kn: kernfs_node of interest
 119 *
 120 * This function can be called from any context.
 121 */
 122void pr_cont_kernfs_name(struct kernfs_node *kn)
 123{
 124        unsigned long flags;
 125
 126        spin_lock_irqsave(&kernfs_rename_lock, flags);
 127
 128        kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
 129        pr_cont("%s", kernfs_pr_cont_buf);
 130
 131        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 132}
 133
 134/**
 135 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
 136 * @kn: kernfs_node of interest
 137 *
 138 * This function can be called from any context.
 139 */
 140void pr_cont_kernfs_path(struct kernfs_node *kn)
 141{
 142        unsigned long flags;
 143        char *p;
 144
 145        spin_lock_irqsave(&kernfs_rename_lock, flags);
 146
 147        p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
 148                               sizeof(kernfs_pr_cont_buf));
 149        if (p)
 150                pr_cont("%s", p);
 151        else
 152                pr_cont("<name too long>");
 153
 154        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 155}
 156
 157/**
 158 * kernfs_get_parent - determine the parent node and pin it
 159 * @kn: kernfs_node of interest
 160 *
 161 * Determines @kn's parent, pins and returns it.  This function can be
 162 * called from any context.
 163 */
 164struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
 165{
 166        struct kernfs_node *parent;
 167        unsigned long flags;
 168
 169        spin_lock_irqsave(&kernfs_rename_lock, flags);
 170        parent = kn->parent;
 171        kernfs_get(parent);
 172        spin_unlock_irqrestore(&kernfs_rename_lock, flags);
 173
 174        return parent;
 175}
 176
 177/**
 178 *      kernfs_name_hash
 179 *      @name: Null terminated string to hash
 180 *      @ns:   Namespace tag to hash
 181 *
 182 *      Returns 31 bit hash of ns + name (so it fits in an off_t )
 183 */
 184static unsigned int kernfs_name_hash(const char *name, const void *ns)
 185{
 186        unsigned long hash = init_name_hash();
 187        unsigned int len = strlen(name);
 188        while (len--)
 189                hash = partial_name_hash(*name++, hash);
 190        hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
 191        hash &= 0x7fffffffU;
 192        /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
 193        if (hash < 2)
 194                hash += 2;
 195        if (hash >= INT_MAX)
 196                hash = INT_MAX - 1;
 197        return hash;
 198}
 199
 200static int kernfs_name_compare(unsigned int hash, const char *name,
 201                               const void *ns, const struct kernfs_node *kn)
 202{
 203        if (hash != kn->hash)
 204                return hash - kn->hash;
 205        if (ns != kn->ns)
 206                return ns - kn->ns;
 207        return strcmp(name, kn->name);
 208}
 209
 210static int kernfs_sd_compare(const struct kernfs_node *left,
 211                             const struct kernfs_node *right)
 212{
 213        return kernfs_name_compare(left->hash, left->name, left->ns, right);
 214}
 215
 216/**
 217 *      kernfs_link_sibling - link kernfs_node into sibling rbtree
 218 *      @kn: kernfs_node of interest
 219 *
 220 *      Link @kn into its sibling rbtree which starts from
 221 *      @kn->parent->dir.children.
 222 *
 223 *      Locking:
 224 *      mutex_lock(kernfs_mutex)
 225 *
 226 *      RETURNS:
 227 *      0 on susccess -EEXIST on failure.
 228 */
 229static int kernfs_link_sibling(struct kernfs_node *kn)
 230{
 231        struct rb_node **node = &kn->parent->dir.children.rb_node;
 232        struct rb_node *parent = NULL;
 233
 234        while (*node) {
 235                struct kernfs_node *pos;
 236                int result;
 237
 238                pos = rb_to_kn(*node);
 239                parent = *node;
 240                result = kernfs_sd_compare(kn, pos);
 241                if (result < 0)
 242                        node = &pos->rb.rb_left;
 243                else if (result > 0)
 244                        node = &pos->rb.rb_right;
 245                else
 246                        return -EEXIST;
 247        }
 248
 249        /* add new node and rebalance the tree */
 250        rb_link_node(&kn->rb, parent, node);
 251        rb_insert_color(&kn->rb, &kn->parent->dir.children);
 252
 253        /* successfully added, account subdir number */
 254        if (kernfs_type(kn) == KERNFS_DIR)
 255                kn->parent->dir.subdirs++;
 256
 257        return 0;
 258}
 259
 260/**
 261 *      kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
 262 *      @kn: kernfs_node of interest
 263 *
 264 *      Try to unlink @kn from its sibling rbtree which starts from
 265 *      kn->parent->dir.children.  Returns %true if @kn was actually
 266 *      removed, %false if @kn wasn't on the rbtree.
 267 *
 268 *      Locking:
 269 *      mutex_lock(kernfs_mutex)
 270 */
 271static bool kernfs_unlink_sibling(struct kernfs_node *kn)
 272{
 273        if (RB_EMPTY_NODE(&kn->rb))
 274                return false;
 275
 276        if (kernfs_type(kn) == KERNFS_DIR)
 277                kn->parent->dir.subdirs--;
 278
 279        rb_erase(&kn->rb, &kn->parent->dir.children);
 280        RB_CLEAR_NODE(&kn->rb);
 281        return true;
 282}
 283
 284/**
 285 *      kernfs_get_active - get an active reference to kernfs_node
 286 *      @kn: kernfs_node to get an active reference to
 287 *
 288 *      Get an active reference of @kn.  This function is noop if @kn
 289 *      is NULL.
 290 *
 291 *      RETURNS:
 292 *      Pointer to @kn on success, NULL on failure.
 293 */
 294struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
 295{
 296        if (unlikely(!kn))
 297                return NULL;
 298
 299        if (!atomic_inc_unless_negative(&kn->active))
 300                return NULL;
 301
 302        if (kernfs_lockdep(kn))
 303                rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
 304        return kn;
 305}
 306
 307/**
 308 *      kernfs_put_active - put an active reference to kernfs_node
 309 *      @kn: kernfs_node to put an active reference to
 310 *
 311 *      Put an active reference to @kn.  This function is noop if @kn
 312 *      is NULL.
 313 */
 314void kernfs_put_active(struct kernfs_node *kn)
 315{
 316        struct kernfs_root *root = kernfs_root(kn);
 317        int v;
 318
 319        if (unlikely(!kn))
 320                return;
 321
 322        if (kernfs_lockdep(kn))
 323                rwsem_release(&kn->dep_map, 1, _RET_IP_);
 324        v = atomic_dec_return(&kn->active);
 325        if (likely(v != KN_DEACTIVATED_BIAS))
 326                return;
 327
 328        wake_up_all(&root->deactivate_waitq);
 329}
 330
 331/**
 332 * kernfs_drain - drain kernfs_node
 333 * @kn: kernfs_node to drain
 334 *
 335 * Drain existing usages and nuke all existing mmaps of @kn.  Mutiple
 336 * removers may invoke this function concurrently on @kn and all will
 337 * return after draining is complete.
 338 */
 339static void kernfs_drain(struct kernfs_node *kn)
 340        __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
 341{
 342        struct kernfs_root *root = kernfs_root(kn);
 343
 344        lockdep_assert_held(&kernfs_mutex);
 345        WARN_ON_ONCE(kernfs_active(kn));
 346
 347        mutex_unlock(&kernfs_mutex);
 348
 349        if (kernfs_lockdep(kn)) {
 350                rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
 351                if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
 352                        lock_contended(&kn->dep_map, _RET_IP_);
 353        }
 354
 355        /* but everyone should wait for draining */
 356        wait_event(root->deactivate_waitq,
 357                   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
 358
 359        if (kernfs_lockdep(kn)) {
 360                lock_acquired(&kn->dep_map, _RET_IP_);
 361                rwsem_release(&kn->dep_map, 1, _RET_IP_);
 362        }
 363
 364        kernfs_unmap_bin_file(kn);
 365
 366        mutex_lock(&kernfs_mutex);
 367}
 368
 369/**
 370 * kernfs_get - get a reference count on a kernfs_node
 371 * @kn: the target kernfs_node
 372 */
 373void kernfs_get(struct kernfs_node *kn)
 374{
 375        if (kn) {
 376                WARN_ON(!atomic_read(&kn->count));
 377                atomic_inc(&kn->count);
 378        }
 379}
 380EXPORT_SYMBOL_GPL(kernfs_get);
 381
 382/**
 383 * kernfs_put - put a reference count on a kernfs_node
 384 * @kn: the target kernfs_node
 385 *
 386 * Put a reference count of @kn and destroy it if it reached zero.
 387 */
 388void kernfs_put(struct kernfs_node *kn)
 389{
 390        struct kernfs_node *parent;
 391        struct kernfs_root *root;
 392
 393        if (!kn || !atomic_dec_and_test(&kn->count))
 394                return;
 395        root = kernfs_root(kn);
 396 repeat:
 397        /*
 398         * Moving/renaming is always done while holding reference.
 399         * kn->parent won't change beneath us.
 400         */
 401        parent = kn->parent;
 402
 403        WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
 404                  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
 405                  parent ? parent->name : "", kn->name, atomic_read(&kn->active));
 406
 407        if (kernfs_type(kn) == KERNFS_LINK)
 408                kernfs_put(kn->symlink.target_kn);
 409        if (!(kn->flags & KERNFS_STATIC_NAME))
 410                kfree(kn->name);
 411        if (kn->iattr) {
 412                if (kn->iattr->ia_secdata)
 413                        security_release_secctx(kn->iattr->ia_secdata,
 414                                                kn->iattr->ia_secdata_len);
 415                simple_xattrs_free(&kn->iattr->xattrs);
 416        }
 417        kfree(kn->iattr);
 418        ida_simple_remove(&root->ino_ida, kn->ino);
 419        kmem_cache_free(kernfs_node_cache, kn);
 420
 421        kn = parent;
 422        if (kn) {
 423                if (atomic_dec_and_test(&kn->count))
 424                        goto repeat;
 425        } else {
 426                /* just released the root kn, free @root too */
 427                ida_destroy(&root->ino_ida);
 428                kfree(root);
 429        }
 430}
 431EXPORT_SYMBOL_GPL(kernfs_put);
 432
 433static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 434{
 435        struct kernfs_node *kn;
 436
 437        if (flags & LOOKUP_RCU)
 438                return -ECHILD;
 439
 440        /* Always perform fresh lookup for negatives */
 441        if (!dentry->d_inode)
 442                goto out_bad_unlocked;
 443
 444        kn = dentry->d_fsdata;
 445        mutex_lock(&kernfs_mutex);
 446
 447        /* The kernfs node has been deactivated */
 448        if (!kernfs_active(kn))
 449                goto out_bad;
 450
 451        /* The kernfs node has been moved? */
 452        if (dentry->d_parent->d_fsdata != kn->parent)
 453                goto out_bad;
 454
 455        /* The kernfs node has been renamed */
 456        if (strcmp(dentry->d_name.name, kn->name) != 0)
 457                goto out_bad;
 458
 459        /* The kernfs node has been moved to a different namespace */
 460        if (kn->parent && kernfs_ns_enabled(kn->parent) &&
 461            kernfs_info(dentry->d_sb)->ns != kn->ns)
 462                goto out_bad;
 463
 464        mutex_unlock(&kernfs_mutex);
 465        return 1;
 466out_bad:
 467        mutex_unlock(&kernfs_mutex);
 468out_bad_unlocked:
 469        return 0;
 470}
 471
 472static void kernfs_dop_release(struct dentry *dentry)
 473{
 474        kernfs_put(dentry->d_fsdata);
 475}
 476
 477const struct dentry_operations kernfs_dops = {
 478        .d_revalidate   = kernfs_dop_revalidate,
 479        .d_release      = kernfs_dop_release,
 480};
 481
 482/**
 483 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
 484 * @dentry: the dentry in question
 485 *
 486 * Return the kernfs_node associated with @dentry.  If @dentry is not a
 487 * kernfs one, %NULL is returned.
 488 *
 489 * While the returned kernfs_node will stay accessible as long as @dentry
 490 * is accessible, the returned node can be in any state and the caller is
 491 * fully responsible for determining what's accessible.
 492 */
 493struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 494{
 495        if (dentry->d_sb->s_op == &kernfs_sops)
 496                return dentry->d_fsdata;
 497        return NULL;
 498}
 499
 500static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 501                                             const char *name, umode_t mode,
 502                                             unsigned flags)
 503{
 504        char *dup_name = NULL;
 505        struct kernfs_node *kn;
 506        int ret;
 507
 508        if (!(flags & KERNFS_STATIC_NAME)) {
 509                name = dup_name = kstrdup(name, GFP_KERNEL);
 510                if (!name)
 511                        return NULL;
 512        }
 513
 514        kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
 515        if (!kn)
 516                goto err_out1;
 517
 518        ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
 519        if (ret < 0)
 520                goto err_out2;
 521        kn->ino = ret;
 522
 523        atomic_set(&kn->count, 1);
 524        atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
 525        RB_CLEAR_NODE(&kn->rb);
 526
 527        kn->name = name;
 528        kn->mode = mode;
 529        kn->flags = flags;
 530
 531        return kn;
 532
 533 err_out2:
 534        kmem_cache_free(kernfs_node_cache, kn);
 535 err_out1:
 536        kfree(dup_name);
 537        return NULL;
 538}
 539
 540struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 541                                    const char *name, umode_t mode,
 542                                    unsigned flags)
 543{
 544        struct kernfs_node *kn;
 545
 546        kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
 547        if (kn) {
 548                kernfs_get(parent);
 549                kn->parent = parent;
 550        }
 551        return kn;
 552}
 553
 554/**
 555 *      kernfs_add_one - add kernfs_node to parent without warning
 556 *      @kn: kernfs_node to be added
 557 *
 558 *      The caller must already have initialized @kn->parent.  This
 559 *      function increments nlink of the parent's inode if @kn is a
 560 *      directory and link into the children list of the parent.
 561 *
 562 *      RETURNS:
 563 *      0 on success, -EEXIST if entry with the given name already
 564 *      exists.
 565 */
 566int kernfs_add_one(struct kernfs_node *kn)
 567{
 568        struct kernfs_node *parent = kn->parent;
 569        struct kernfs_iattrs *ps_iattr;
 570        bool has_ns;
 571        int ret;
 572
 573        mutex_lock(&kernfs_mutex);
 574
 575        ret = -EINVAL;
 576        has_ns = kernfs_ns_enabled(parent);
 577        if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 578                 has_ns ? "required" : "invalid", parent->name, kn->name))
 579                goto out_unlock;
 580
 581        if (kernfs_type(parent) != KERNFS_DIR)
 582                goto out_unlock;
 583
 584        ret = -ENOENT;
 585        if (parent->flags & KERNFS_EMPTY_DIR)
 586                goto out_unlock;
 587
 588        if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
 589                goto out_unlock;
 590
 591        kn->hash = kernfs_name_hash(kn->name, kn->ns);
 592
 593        ret = kernfs_link_sibling(kn);
 594        if (ret)
 595                goto out_unlock;
 596
 597        /* Update timestamps on the parent */
 598        ps_iattr = parent->iattr;
 599        if (ps_iattr) {
 600                struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
 601                ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
 602        }
 603
 604        mutex_unlock(&kernfs_mutex);
 605
 606        /*
 607         * Activate the new node unless CREATE_DEACTIVATED is requested.
 608         * If not activated here, the kernfs user is responsible for
 609         * activating the node with kernfs_activate().  A node which hasn't
 610         * been activated is not visible to userland and its removal won't
 611         * trigger deactivation.
 612         */
 613        if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
 614                kernfs_activate(kn);
 615        return 0;
 616
 617out_unlock:
 618        mutex_unlock(&kernfs_mutex);
 619        return ret;
 620}
 621
 622/**
 623 * kernfs_find_ns - find kernfs_node with the given name
 624 * @parent: kernfs_node to search under
 625 * @name: name to look for
 626 * @ns: the namespace tag to use
 627 *
 628 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 629 * the found kernfs_node on success, %NULL on failure.
 630 */
 631static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 632                                          const unsigned char *name,
 633                                          const void *ns)
 634{
 635        struct rb_node *node = parent->dir.children.rb_node;
 636        bool has_ns = kernfs_ns_enabled(parent);
 637        unsigned int hash;
 638
 639        lockdep_assert_held(&kernfs_mutex);
 640
 641        if (has_ns != (bool)ns) {
 642                WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
 643                     has_ns ? "required" : "invalid", parent->name, name);
 644                return NULL;
 645        }
 646
 647        hash = kernfs_name_hash(name, ns);
 648        while (node) {
 649                struct kernfs_node *kn;
 650                int result;
 651
 652                kn = rb_to_kn(node);
 653                result = kernfs_name_compare(hash, name, ns, kn);
 654                if (result < 0)
 655                        node = node->rb_left;
 656                else if (result > 0)
 657                        node = node->rb_right;
 658                else
 659                        return kn;
 660        }
 661        return NULL;
 662}
 663
 664/**
 665 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 666 * @parent: kernfs_node to search under
 667 * @name: name to look for
 668 * @ns: the namespace tag to use
 669 *
 670 * Look for kernfs_node with name @name under @parent and get a reference
 671 * if found.  This function may sleep and returns pointer to the found
 672 * kernfs_node on success, %NULL on failure.
 673 */
 674struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
 675                                           const char *name, const void *ns)
 676{
 677        struct kernfs_node *kn;
 678
 679        mutex_lock(&kernfs_mutex);
 680        kn = kernfs_find_ns(parent, name, ns);
 681        kernfs_get(kn);
 682        mutex_unlock(&kernfs_mutex);
 683
 684        return kn;
 685}
 686EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
 687
 688/**
 689 * kernfs_create_root - create a new kernfs hierarchy
 690 * @scops: optional syscall operations for the hierarchy
 691 * @flags: KERNFS_ROOT_* flags
 692 * @priv: opaque data associated with the new directory
 693 *
 694 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 695 * failure.
 696 */
 697struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 698                                       unsigned int flags, void *priv)
 699{
 700        struct kernfs_root *root;
 701        struct kernfs_node *kn;
 702
 703        root = kzalloc(sizeof(*root), GFP_KERNEL);
 704        if (!root)
 705                return ERR_PTR(-ENOMEM);
 706
 707        ida_init(&root->ino_ida);
 708        INIT_LIST_HEAD(&root->supers);
 709
 710        kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
 711                               KERNFS_DIR);
 712        if (!kn) {
 713                ida_destroy(&root->ino_ida);
 714                kfree(root);
 715                return ERR_PTR(-ENOMEM);
 716        }
 717
 718        kn->priv = priv;
 719        kn->dir.root = root;
 720
 721        root->syscall_ops = scops;
 722        root->flags = flags;
 723        root->kn = kn;
 724        init_waitqueue_head(&root->deactivate_waitq);
 725
 726        if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
 727                kernfs_activate(kn);
 728
 729        return root;
 730}
 731
 732/**
 733 * kernfs_destroy_root - destroy a kernfs hierarchy
 734 * @root: root of the hierarchy to destroy
 735 *
 736 * Destroy the hierarchy anchored at @root by removing all existing
 737 * directories and destroying @root.
 738 */
 739void kernfs_destroy_root(struct kernfs_root *root)
 740{
 741        kernfs_remove(root->kn);        /* will also free @root */
 742}
 743
 744/**
 745 * kernfs_create_dir_ns - create a directory
 746 * @parent: parent in which to create a new directory
 747 * @name: name of the new directory
 748 * @mode: mode of the new directory
 749 * @priv: opaque data associated with the new directory
 750 * @ns: optional namespace tag of the directory
 751 *
 752 * Returns the created node on success, ERR_PTR() value on failure.
 753 */
 754struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 755                                         const char *name, umode_t mode,
 756                                         void *priv, const void *ns)
 757{
 758        struct kernfs_node *kn;
 759        int rc;
 760
 761        /* allocate */
 762        kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
 763        if (!kn)
 764                return ERR_PTR(-ENOMEM);
 765
 766        kn->dir.root = parent->dir.root;
 767        kn->ns = ns;
 768        kn->priv = priv;
 769
 770        /* link in */
 771        rc = kernfs_add_one(kn);
 772        if (!rc)
 773                return kn;
 774
 775        kernfs_put(kn);
 776        return ERR_PTR(rc);
 777}
 778
 779/**
 780 * kernfs_create_empty_dir - create an always empty directory
 781 * @parent: parent in which to create a new directory
 782 * @name: name of the new directory
 783 *
 784 * Returns the created node on success, ERR_PTR() value on failure.
 785 */
 786struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 787                                            const char *name)
 788{
 789        struct kernfs_node *kn;
 790        int rc;
 791
 792        /* allocate */
 793        kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
 794        if (!kn)
 795                return ERR_PTR(-ENOMEM);
 796
 797        kn->flags |= KERNFS_EMPTY_DIR;
 798        kn->dir.root = parent->dir.root;
 799        kn->ns = NULL;
 800        kn->priv = NULL;
 801
 802        /* link in */
 803        rc = kernfs_add_one(kn);
 804        if (!rc)
 805                return kn;
 806
 807        kernfs_put(kn);
 808        return ERR_PTR(rc);
 809}
 810
 811static struct dentry *kernfs_iop_lookup(struct inode *dir,
 812                                        struct dentry *dentry,
 813                                        unsigned int flags)
 814{
 815        struct dentry *ret;
 816        struct kernfs_node *parent = dentry->d_parent->d_fsdata;
 817        struct kernfs_node *kn;
 818        struct inode *inode;
 819        const void *ns = NULL;
 820
 821        mutex_lock(&kernfs_mutex);
 822
 823        if (kernfs_ns_enabled(parent))
 824                ns = kernfs_info(dir->i_sb)->ns;
 825
 826        kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
 827
 828        /* no such entry */
 829        if (!kn || !kernfs_active(kn)) {
 830                ret = NULL;
 831                goto out_unlock;
 832        }
 833        kernfs_get(kn);
 834        dentry->d_fsdata = kn;
 835
 836        /* attach dentry and inode */
 837        inode = kernfs_get_inode(dir->i_sb, kn);
 838        if (!inode) {
 839                ret = ERR_PTR(-ENOMEM);
 840                goto out_unlock;
 841        }
 842
 843        /* instantiate and hash dentry */
 844        ret = d_materialise_unique(dentry, inode);
 845 out_unlock:
 846        mutex_unlock(&kernfs_mutex);
 847        return ret;
 848}
 849
 850static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
 851                            umode_t mode)
 852{
 853        struct kernfs_node *parent = dir->i_private;
 854        struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
 855        int ret;
 856
 857        if (!scops || !scops->mkdir)
 858                return -EPERM;
 859
 860        if (!kernfs_get_active(parent))
 861                return -ENODEV;
 862
 863        ret = scops->mkdir(parent, dentry->d_name.name, mode);
 864
 865        kernfs_put_active(parent);
 866        return ret;
 867}
 868
 869static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
 870{
 871        struct kernfs_node *kn  = dentry->d_fsdata;
 872        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
 873        int ret;
 874
 875        if (!scops || !scops->rmdir)
 876                return -EPERM;
 877
 878        if (!kernfs_get_active(kn))
 879                return -ENODEV;
 880
 881        ret = scops->rmdir(kn);
 882
 883        kernfs_put_active(kn);
 884        return ret;
 885}
 886
 887static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
 888                             struct inode *new_dir, struct dentry *new_dentry)
 889{
 890        struct kernfs_node *kn  = old_dentry->d_fsdata;
 891        struct kernfs_node *new_parent = new_dir->i_private;
 892        struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
 893        int ret;
 894
 895        if (!scops || !scops->rename)
 896                return -EPERM;
 897
 898        if (!kernfs_get_active(kn))
 899                return -ENODEV;
 900
 901        if (!kernfs_get_active(new_parent)) {
 902                kernfs_put_active(kn);
 903                return -ENODEV;
 904        }
 905
 906        ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
 907
 908        kernfs_put_active(new_parent);
 909        kernfs_put_active(kn);
 910        return ret;
 911}
 912
 913const struct inode_operations kernfs_dir_iops = {
 914        .lookup         = kernfs_iop_lookup,
 915        .permission     = kernfs_iop_permission,
 916        .setattr        = kernfs_iop_setattr,
 917        .getattr        = kernfs_iop_getattr,
 918        .setxattr       = kernfs_iop_setxattr,
 919        .removexattr    = kernfs_iop_removexattr,
 920        .getxattr       = kernfs_iop_getxattr,
 921        .listxattr      = kernfs_iop_listxattr,
 922
 923        .mkdir          = kernfs_iop_mkdir,
 924        .rmdir          = kernfs_iop_rmdir,
 925        .rename         = kernfs_iop_rename,
 926};
 927
 928static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
 929{
 930        struct kernfs_node *last;
 931
 932        while (true) {
 933                struct rb_node *rbn;
 934
 935                last = pos;
 936
 937                if (kernfs_type(pos) != KERNFS_DIR)
 938                        break;
 939
 940                rbn = rb_first(&pos->dir.children);
 941                if (!rbn)
 942                        break;
 943
 944                pos = rb_to_kn(rbn);
 945        }
 946
 947        return last;
 948}
 949
 950/**
 951 * kernfs_next_descendant_post - find the next descendant for post-order walk
 952 * @pos: the current position (%NULL to initiate traversal)
 953 * @root: kernfs_node whose descendants to walk
 954 *
 955 * Find the next descendant to visit for post-order traversal of @root's
 956 * descendants.  @root is included in the iteration and the last node to be
 957 * visited.
 958 */
 959static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 960                                                       struct kernfs_node *root)
 961{
 962        struct rb_node *rbn;
 963
 964        lockdep_assert_held(&kernfs_mutex);
 965
 966        /* if first iteration, visit leftmost descendant which may be root */
 967        if (!pos)
 968                return kernfs_leftmost_descendant(root);
 969
 970        /* if we visited @root, we're done */
 971        if (pos == root)
 972                return NULL;
 973
 974        /* if there's an unvisited sibling, visit its leftmost descendant */
 975        rbn = rb_next(&pos->rb);
 976        if (rbn)
 977                return kernfs_leftmost_descendant(rb_to_kn(rbn));
 978
 979        /* no sibling left, visit parent */
 980        return pos->parent;
 981}
 982
 983/**
 984 * kernfs_activate - activate a node which started deactivated
 985 * @kn: kernfs_node whose subtree is to be activated
 986 *
 987 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
 988 * needs to be explicitly activated.  A node which hasn't been activated
 989 * isn't visible to userland and deactivation is skipped during its
 990 * removal.  This is useful to construct atomic init sequences where
 991 * creation of multiple nodes should either succeed or fail atomically.
 992 *
 993 * The caller is responsible for ensuring that this function is not called
 994 * after kernfs_remove*() is invoked on @kn.
 995 */
 996void kernfs_activate(struct kernfs_node *kn)
 997{
 998        struct kernfs_node *pos;
 999
1000        mutex_lock(&kernfs_mutex);
1001
1002        pos = NULL;
1003        while ((pos = kernfs_next_descendant_post(pos, kn))) {
1004                if (!pos || (pos->flags & KERNFS_ACTIVATED))
1005                        continue;
1006
1007                WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
1008                WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
1009
1010                atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
1011                pos->flags |= KERNFS_ACTIVATED;
1012        }
1013
1014        mutex_unlock(&kernfs_mutex);
1015}
1016
1017static void __kernfs_remove(struct kernfs_node *kn)
1018{
1019        struct kernfs_node *pos;
1020
1021        lockdep_assert_held(&kernfs_mutex);
1022
1023        /*
1024         * Short-circuit if non-root @kn has already finished removal.
1025         * This is for kernfs_remove_self() which plays with active ref
1026         * after removal.
1027         */
1028        if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
1029                return;
1030
1031        pr_debug("kernfs %s: removing\n", kn->name);
1032
1033        /* prevent any new usage under @kn by deactivating all nodes */
1034        pos = NULL;
1035        while ((pos = kernfs_next_descendant_post(pos, kn)))
1036                if (kernfs_active(pos))
1037                        atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1038
1039        /* deactivate and unlink the subtree node-by-node */
1040        do {
1041                pos = kernfs_leftmost_descendant(kn);
1042
1043                /*
1044                 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
1045                 * base ref could have been put by someone else by the time
1046                 * the function returns.  Make sure it doesn't go away
1047                 * underneath us.
1048                 */
1049                kernfs_get(pos);
1050
1051                /*
1052                 * Drain iff @kn was activated.  This avoids draining and
1053                 * its lockdep annotations for nodes which have never been
1054                 * activated and allows embedding kernfs_remove() in create
1055                 * error paths without worrying about draining.
1056                 */
1057                if (kn->flags & KERNFS_ACTIVATED)
1058                        kernfs_drain(pos);
1059                else
1060                        WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1061
1062                /*
1063                 * kernfs_unlink_sibling() succeeds once per node.  Use it
1064                 * to decide who's responsible for cleanups.
1065                 */
1066                if (!pos->parent || kernfs_unlink_sibling(pos)) {
1067                        struct kernfs_iattrs *ps_iattr =
1068                                pos->parent ? pos->parent->iattr : NULL;
1069
1070                        /* update timestamps on the parent */
1071                        if (ps_iattr) {
1072                                ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
1073                                ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
1074                        }
1075
1076                        kernfs_put(pos);
1077                }
1078
1079                kernfs_put(pos);
1080        } while (pos != kn);
1081}
1082
1083/**
1084 * kernfs_remove - remove a kernfs_node recursively
1085 * @kn: the kernfs_node to remove
1086 *
1087 * Remove @kn along with all its subdirectories and files.
1088 */
1089void kernfs_remove(struct kernfs_node *kn)
1090{
1091        mutex_lock(&kernfs_mutex);
1092        __kernfs_remove(kn);
1093        mutex_unlock(&kernfs_mutex);
1094}
1095
1096/**
1097 * kernfs_break_active_protection - break out of active protection
1098 * @kn: the self kernfs_node
1099 *
1100 * The caller must be running off of a kernfs operation which is invoked
1101 * with an active reference - e.g. one of kernfs_ops.  Each invocation of
1102 * this function must also be matched with an invocation of
1103 * kernfs_unbreak_active_protection().
1104 *
1105 * This function releases the active reference of @kn the caller is
1106 * holding.  Once this function is called, @kn may be removed at any point
1107 * and the caller is solely responsible for ensuring that the objects it
1108 * dereferences are accessible.
1109 */
1110void kernfs_break_active_protection(struct kernfs_node *kn)
1111{
1112        /*
1113         * Take out ourself out of the active ref dependency chain.  If
1114         * we're called without an active ref, lockdep will complain.
1115         */
1116        kernfs_put_active(kn);
1117}
1118
1119/**
1120 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1121 * @kn: the self kernfs_node
1122 *
1123 * If kernfs_break_active_protection() was called, this function must be
1124 * invoked before finishing the kernfs operation.  Note that while this
1125 * function restores the active reference, it doesn't and can't actually
1126 * restore the active protection - @kn may already or be in the process of
1127 * being removed.  Once kernfs_break_active_protection() is invoked, that
1128 * protection is irreversibly gone for the kernfs operation instance.
1129 *
1130 * While this function may be called at any point after
1131 * kernfs_break_active_protection() is invoked, its most useful location
1132 * would be right before the enclosing kernfs operation returns.
1133 */
1134void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1135{
1136        /*
1137         * @kn->active could be in any state; however, the increment we do
1138         * here will be undone as soon as the enclosing kernfs operation
1139         * finishes and this temporary bump can't break anything.  If @kn
1140         * is alive, nothing changes.  If @kn is being deactivated, the
1141         * soon-to-follow put will either finish deactivation or restore
1142         * deactivated state.  If @kn is already removed, the temporary
1143         * bump is guaranteed to be gone before @kn is released.
1144         */
1145        atomic_inc(&kn->active);
1146        if (kernfs_lockdep(kn))
1147                rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1148}
1149
1150/**
1151 * kernfs_remove_self - remove a kernfs_node from its own method
1152 * @kn: the self kernfs_node to remove
1153 *
1154 * The caller must be running off of a kernfs operation which is invoked
1155 * with an active reference - e.g. one of kernfs_ops.  This can be used to
1156 * implement a file operation which deletes itself.
1157 *
1158 * For example, the "delete" file for a sysfs device directory can be
1159 * implemented by invoking kernfs_remove_self() on the "delete" file
1160 * itself.  This function breaks the circular dependency of trying to
1161 * deactivate self while holding an active ref itself.  It isn't necessary
1162 * to modify the usual removal path to use kernfs_remove_self().  The
1163 * "delete" implementation can simply invoke kernfs_remove_self() on self
1164 * before proceeding with the usual removal path.  kernfs will ignore later
1165 * kernfs_remove() on self.
1166 *
1167 * kernfs_remove_self() can be called multiple times concurrently on the
1168 * same kernfs_node.  Only the first one actually performs removal and
1169 * returns %true.  All others will wait until the kernfs operation which
1170 * won self-removal finishes and return %false.  Note that the losers wait
1171 * for the completion of not only the winning kernfs_remove_self() but also
1172 * the whole kernfs_ops which won the arbitration.  This can be used to
1173 * guarantee, for example, all concurrent writes to a "delete" file to
1174 * finish only after the whole operation is complete.
1175 */
1176bool kernfs_remove_self(struct kernfs_node *kn)
1177{
1178        bool ret;
1179
1180        mutex_lock(&kernfs_mutex);
1181        kernfs_break_active_protection(kn);
1182
1183        /*
1184         * SUICIDAL is used to arbitrate among competing invocations.  Only
1185         * the first one will actually perform removal.  When the removal
1186         * is complete, SUICIDED is set and the active ref is restored
1187         * while holding kernfs_mutex.  The ones which lost arbitration
1188         * waits for SUICDED && drained which can happen only after the
1189         * enclosing kernfs operation which executed the winning instance
1190         * of kernfs_remove_self() finished.
1191         */
1192        if (!(kn->flags & KERNFS_SUICIDAL)) {
1193                kn->flags |= KERNFS_SUICIDAL;
1194                __kernfs_remove(kn);
1195                kn->flags |= KERNFS_SUICIDED;
1196                ret = true;
1197        } else {
1198                wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1199                DEFINE_WAIT(wait);
1200
1201                while (true) {
1202                        prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1203
1204                        if ((kn->flags & KERNFS_SUICIDED) &&
1205                            atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1206                                break;
1207
1208                        mutex_unlock(&kernfs_mutex);
1209                        schedule();
1210                        mutex_lock(&kernfs_mutex);
1211                }
1212                finish_wait(waitq, &wait);
1213                WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1214                ret = false;
1215        }
1216
1217        /*
1218         * This must be done while holding kernfs_mutex; otherwise, waiting
1219         * for SUICIDED && deactivated could finish prematurely.
1220         */
1221        kernfs_unbreak_active_protection(kn);
1222
1223        mutex_unlock(&kernfs_mutex);
1224        return ret;
1225}
1226
1227/**
1228 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
1229 * @parent: parent of the target
1230 * @name: name of the kernfs_node to remove
1231 * @ns: namespace tag of the kernfs_node to remove
1232 *
1233 * Look for the kernfs_node with @name and @ns under @parent and remove it.
1234 * Returns 0 on success, -ENOENT if such entry doesn't exist.
1235 */
1236int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
1237                             const void *ns)
1238{
1239        struct kernfs_node *kn;
1240
1241        if (!parent) {
1242                WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
1243                        name);
1244                return -ENOENT;
1245        }
1246
1247        mutex_lock(&kernfs_mutex);
1248
1249        kn = kernfs_find_ns(parent, name, ns);
1250        if (kn)
1251                __kernfs_remove(kn);
1252
1253        mutex_unlock(&kernfs_mutex);
1254
1255        if (kn)
1256                return 0;
1257        else
1258                return -ENOENT;
1259}
1260
1261/**
1262 * kernfs_rename_ns - move and rename a kernfs_node
1263 * @kn: target node
1264 * @new_parent: new parent to put @sd under
1265 * @new_name: new name
1266 * @new_ns: new namespace tag
1267 */
1268int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
1269                     const char *new_name, const void *new_ns)
1270{
1271        struct kernfs_node *old_parent;
1272        const char *old_name = NULL;
1273        int error;
1274
1275        /* can't move or rename root */
1276        if (!kn->parent)
1277                return -EINVAL;
1278
1279        mutex_lock(&kernfs_mutex);
1280
1281        error = -ENOENT;
1282        if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
1283            (new_parent->flags & KERNFS_EMPTY_DIR))
1284                goto out;
1285
1286        error = 0;
1287        if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
1288            (strcmp(kn->name, new_name) == 0))
1289                goto out;       /* nothing to rename */
1290
1291        error = -EEXIST;
1292        if (kernfs_find_ns(new_parent, new_name, new_ns))
1293                goto out;
1294
1295        /* rename kernfs_node */
1296        if (strcmp(kn->name, new_name) != 0) {
1297                error = -ENOMEM;
1298                new_name = kstrdup(new_name, GFP_KERNEL);
1299                if (!new_name)
1300                        goto out;
1301        } else {
1302                new_name = NULL;
1303        }
1304
1305        /*
1306         * Move to the appropriate place in the appropriate directories rbtree.
1307         */
1308        kernfs_unlink_sibling(kn);
1309        kernfs_get(new_parent);
1310
1311        /* rename_lock protects ->parent and ->name accessors */
1312        spin_lock_irq(&kernfs_rename_lock);
1313
1314        old_parent = kn->parent;
1315        kn->parent = new_parent;
1316
1317        kn->ns = new_ns;
1318        if (new_name) {
1319                if (!(kn->flags & KERNFS_STATIC_NAME))
1320                        old_name = kn->name;
1321                kn->flags &= ~KERNFS_STATIC_NAME;
1322                kn->name = new_name;
1323        }
1324
1325        spin_unlock_irq(&kernfs_rename_lock);
1326
1327        kn->hash = kernfs_name_hash(kn->name, kn->ns);
1328        kernfs_link_sibling(kn);
1329
1330        kernfs_put(old_parent);
1331        kfree(old_name);
1332
1333        error = 0;
1334 out:
1335        mutex_unlock(&kernfs_mutex);
1336        return error;
1337}
1338
1339/* Relationship between s_mode and the DT_xxx types */
1340static inline unsigned char dt_type(struct kernfs_node *kn)
1341{
1342        return (kn->mode >> 12) & 15;
1343}
1344
1345static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
1346{
1347        kernfs_put(filp->private_data);
1348        return 0;
1349}
1350
1351static struct kernfs_node *kernfs_dir_pos(const void *ns,
1352        struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
1353{
1354        if (pos) {
1355                int valid = kernfs_active(pos) &&
1356                        pos->parent == parent && hash == pos->hash;
1357                kernfs_put(pos);
1358                if (!valid)
1359                        pos = NULL;
1360        }
1361        if (!pos && (hash > 1) && (hash < INT_MAX)) {
1362                struct rb_node *node = parent->dir.children.rb_node;
1363                while (node) {
1364                        pos = rb_to_kn(node);
1365
1366                        if (hash < pos->hash)
1367                                node = node->rb_left;
1368                        else if (hash > pos->hash)
1369                                node = node->rb_right;
1370                        else
1371                                break;
1372                }
1373        }
1374        /* Skip over entries which are dying/dead or in the wrong namespace */
1375        while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
1376                struct rb_node *node = rb_next(&pos->rb);
1377                if (!node)
1378                        pos = NULL;
1379                else
1380                        pos = rb_to_kn(node);
1381        }
1382        return pos;
1383}
1384
1385static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1386        struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1387{
1388        pos = kernfs_dir_pos(ns, parent, ino, pos);
1389        if (pos) {
1390                do {
1391                        struct rb_node *node = rb_next(&pos->rb);
1392                        if (!node)
1393                                pos = NULL;
1394                        else
1395                                pos = rb_to_kn(node);
1396                } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1397        }
1398        return pos;
1399}
1400
1401static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1402{
1403        struct dentry *dentry = file->f_path.dentry;
1404        struct kernfs_node *parent = dentry->d_fsdata;
1405        struct kernfs_node *pos = file->private_data;
1406        const void *ns = NULL;
1407
1408        if (!dir_emit_dots(file, ctx))
1409                return 0;
1410        mutex_lock(&kernfs_mutex);
1411
1412        if (kernfs_ns_enabled(parent))
1413                ns = kernfs_info(dentry->d_sb)->ns;
1414
1415        for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1416             pos;
1417             pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1418                const char *name = pos->name;
1419                unsigned int type = dt_type(pos);
1420                int len = strlen(name);
1421                ino_t ino = pos->ino;
1422
1423                ctx->pos = pos->hash;
1424                file->private_data = pos;
1425                kernfs_get(pos);
1426
1427                mutex_unlock(&kernfs_mutex);
1428                if (!dir_emit(ctx, name, len, ino, type))
1429                        return 0;
1430                mutex_lock(&kernfs_mutex);
1431        }
1432        mutex_unlock(&kernfs_mutex);
1433        file->private_data = NULL;
1434        ctx->pos = INT_MAX;
1435        return 0;
1436}
1437
1438static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
1439                                    int whence)
1440{
1441        struct inode *inode = file_inode(file);
1442        loff_t ret;
1443
1444        mutex_lock(&inode->i_mutex);
1445        ret = generic_file_llseek(file, offset, whence);
1446        mutex_unlock(&inode->i_mutex);
1447
1448        return ret;
1449}
1450
1451static int kernfs_dir_open(struct inode *inode, struct file *file)
1452{
1453        /* Let the kernel safely know that iterate is present */
1454        file->f_mode |= FMODE_KABI_ITERATE;
1455        return 0;
1456}
1457
1458const struct file_operations kernfs_dir_fops = {
1459        .open           = kernfs_dir_open,
1460        .read           = generic_read_dir,
1461        .iterate        = kernfs_fop_readdir,
1462        .release        = kernfs_dir_fop_release,
1463        .llseek         = kernfs_dir_fop_llseek,
1464};
1465