linux/fs/proc/proc_sysctl.c
<<
>>
Prefs
   1/*
   2 * /proc/sys support
   3 */
   4#include <linux/init.h>
   5#include <linux/sysctl.h>
   6#include <linux/poll.h>
   7#include <linux/proc_fs.h>
   8#include <linux/printk.h>
   9#include <linux/security.h>
  10#include <linux/sched.h>
  11#include <linux/cred.h>
  12#include <linux/namei.h>
  13#include <linux/mm.h>
  14#include <linux/module.h>
  15#include "internal.h"
  16
  17static const struct dentry_operations proc_sys_dentry_operations;
  18static const struct file_operations proc_sys_file_operations;
  19static const struct inode_operations proc_sys_inode_operations;
  20static const struct file_operations proc_sys_dir_file_operations;
  21static const struct inode_operations proc_sys_dir_operations;
  22
  23/* Support for permanently empty directories */
  24
  25struct ctl_table sysctl_mount_point[] = {
  26        { }
  27};
  28
  29static bool is_empty_dir(struct ctl_table_header *head)
  30{
  31        return head->ctl_table[0].child == sysctl_mount_point;
  32}
  33
  34static void set_empty_dir(struct ctl_dir *dir)
  35{
  36        dir->header.ctl_table[0].child = sysctl_mount_point;
  37}
  38
  39static void clear_empty_dir(struct ctl_dir *dir)
  40
  41{
  42        dir->header.ctl_table[0].child = NULL;
  43}
  44
  45void proc_sys_poll_notify(struct ctl_table_poll *poll)
  46{
  47        if (!poll)
  48                return;
  49
  50        atomic_inc(&poll->event);
  51        wake_up_interruptible(&poll->wait);
  52}
  53
  54static struct ctl_table root_table[] = {
  55        {
  56                .procname = "",
  57                .mode = S_IFDIR|S_IRUGO|S_IXUGO,
  58        },
  59        { }
  60};
  61static struct ctl_table_root sysctl_table_root = {
  62        .default_set.dir.header = {
  63                {{.count = 1,
  64                  .nreg = 1,
  65                  .ctl_table = root_table }},
  66                .ctl_table_arg = root_table,
  67                .root = &sysctl_table_root,
  68                .set = &sysctl_table_root.default_set,
  69        },
  70};
  71
  72static DEFINE_SPINLOCK(sysctl_lock);
  73
  74static void drop_sysctl_table(struct ctl_table_header *header);
  75static int sysctl_follow_link(struct ctl_table_header **phead,
  76        struct ctl_table **pentry);
  77static int insert_links(struct ctl_table_header *head);
  78static void put_links(struct ctl_table_header *header);
  79
  80static void sysctl_print_dir(struct ctl_dir *dir)
  81{
  82        if (dir->header.parent)
  83                sysctl_print_dir(dir->header.parent);
  84        pr_cont("%s/", dir->header.ctl_table[0].procname);
  85}
  86
  87static int namecmp(const char *name1, int len1, const char *name2, int len2)
  88{
  89        int minlen;
  90        int cmp;
  91
  92        minlen = len1;
  93        if (minlen > len2)
  94                minlen = len2;
  95
  96        cmp = memcmp(name1, name2, minlen);
  97        if (cmp == 0)
  98                cmp = len1 - len2;
  99        return cmp;
 100}
 101
 102/* Called under sysctl_lock */
 103static struct ctl_table *find_entry(struct ctl_table_header **phead,
 104        struct ctl_dir *dir, const char *name, int namelen)
 105{
 106        struct ctl_table_header *head;
 107        struct ctl_table *entry;
 108        struct rb_node *node = dir->root.rb_node;
 109
 110        while (node)
 111        {
 112                struct ctl_node *ctl_node;
 113                const char *procname;
 114                int cmp;
 115
 116                ctl_node = rb_entry(node, struct ctl_node, node);
 117                head = ctl_node->header;
 118                entry = &head->ctl_table[ctl_node - head->node];
 119                procname = entry->procname;
 120
 121                cmp = namecmp(name, namelen, procname, strlen(procname));
 122                if (cmp < 0)
 123                        node = node->rb_left;
 124                else if (cmp > 0)
 125                        node = node->rb_right;
 126                else {
 127                        *phead = head;
 128                        return entry;
 129                }
 130        }
 131        return NULL;
 132}
 133
 134static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
 135{
 136        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 137        struct rb_node **p = &head->parent->root.rb_node;
 138        struct rb_node *parent = NULL;
 139        const char *name = entry->procname;
 140        int namelen = strlen(name);
 141
 142        while (*p) {
 143                struct ctl_table_header *parent_head;
 144                struct ctl_table *parent_entry;
 145                struct ctl_node *parent_node;
 146                const char *parent_name;
 147                int cmp;
 148
 149                parent = *p;
 150                parent_node = rb_entry(parent, struct ctl_node, node);
 151                parent_head = parent_node->header;
 152                parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
 153                parent_name = parent_entry->procname;
 154
 155                cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
 156                if (cmp < 0)
 157                        p = &(*p)->rb_left;
 158                else if (cmp > 0)
 159                        p = &(*p)->rb_right;
 160                else {
 161                        pr_err("sysctl duplicate entry: ");
 162                        sysctl_print_dir(head->parent);
 163                        pr_cont("/%s\n", entry->procname);
 164                        return -EEXIST;
 165                }
 166        }
 167
 168        rb_link_node(node, parent, p);
 169        rb_insert_color(node, &head->parent->root);
 170        return 0;
 171}
 172
 173static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
 174{
 175        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 176
 177        rb_erase(node, &head->parent->root);
 178}
 179
 180static void init_header(struct ctl_table_header *head,
 181        struct ctl_table_root *root, struct ctl_table_set *set,
 182        struct ctl_node *node, struct ctl_table *table)
 183{
 184        head->ctl_table = table;
 185        head->ctl_table_arg = table;
 186        head->used = 0;
 187        head->count = 1;
 188        head->nreg = 1;
 189        head->unregistering = NULL;
 190        head->root = root;
 191        head->set = set;
 192        head->parent = NULL;
 193        head->node = node;
 194        INIT_LIST_HEAD(&head->inodes);
 195        if (node) {
 196                struct ctl_table *entry;
 197                for (entry = table; entry->procname; entry++, node++)
 198                        node->header = head;
 199        }
 200}
 201
 202static void erase_header(struct ctl_table_header *head)
 203{
 204        struct ctl_table *entry;
 205        for (entry = head->ctl_table; entry->procname; entry++)
 206                erase_entry(head, entry);
 207}
 208
 209static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
 210{
 211        struct ctl_table *entry;
 212        int err;
 213
 214        /* Is this a permanently empty directory? */
 215        if (is_empty_dir(&dir->header))
 216                return -EROFS;
 217
 218        /* Am I creating a permanently empty directory? */
 219        if (header->ctl_table == sysctl_mount_point) {
 220                if (!RB_EMPTY_ROOT(&dir->root))
 221                        return -EINVAL;
 222                set_empty_dir(dir);
 223        }
 224
 225        dir->header.nreg++;
 226        header->parent = dir;
 227        err = insert_links(header);
 228        if (err)
 229                goto fail_links;
 230        for (entry = header->ctl_table; entry->procname; entry++) {
 231                err = insert_entry(header, entry);
 232                if (err)
 233                        goto fail;
 234        }
 235        return 0;
 236fail:
 237        erase_header(header);
 238        put_links(header);
 239fail_links:
 240        if (header->ctl_table == sysctl_mount_point)
 241                clear_empty_dir(dir);
 242        header->parent = NULL;
 243        drop_sysctl_table(&dir->header);
 244        return err;
 245}
 246
 247/* called under sysctl_lock */
 248static int use_table(struct ctl_table_header *p)
 249{
 250        if (unlikely(p->unregistering))
 251                return 0;
 252        p->used++;
 253        return 1;
 254}
 255
 256/* called under sysctl_lock */
 257static void unuse_table(struct ctl_table_header *p)
 258{
 259        if (!--p->used)
 260                if (unlikely(p->unregistering))
 261                        complete(p->unregistering);
 262}
 263
 264/* called under sysctl_lock */
 265static void proc_sys_prune_dcache(struct ctl_table_header *head)
 266{
 267        struct inode *inode, *prev = NULL;
 268        struct proc_inode *ei;
 269
 270        rcu_read_lock();
 271        list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
 272                inode = igrab(&ei->vfs_inode);
 273                if (inode) {
 274                        rcu_read_unlock();
 275                        iput(prev);
 276                        prev = inode;
 277                        d_prune_aliases(inode);
 278                        rcu_read_lock();
 279                }
 280        }
 281        rcu_read_unlock();
 282        iput(prev);
 283}
 284
 285/* called under sysctl_lock, will reacquire if has to wait */
 286static void start_unregistering(struct ctl_table_header *p)
 287{
 288        /*
 289         * if p->used is 0, nobody will ever touch that entry again;
 290         * we'll eliminate all paths to it before dropping sysctl_lock
 291         */
 292        if (unlikely(p->used)) {
 293                struct completion wait;
 294                init_completion(&wait);
 295                p->unregistering = &wait;
 296                spin_unlock(&sysctl_lock);
 297                wait_for_completion(&wait);
 298        } else {
 299                /* anything non-NULL; we'll never dereference it */
 300                p->unregistering = ERR_PTR(-EINVAL);
 301                spin_unlock(&sysctl_lock);
 302        }
 303        /*
 304         * Prune dentries for unregistered sysctls: namespaced sysctls
 305         * can have duplicate names and contaminate dcache very badly.
 306         */
 307        proc_sys_prune_dcache(p);
 308        /*
 309         * do not remove from the list until nobody holds it; walking the
 310         * list in do_sysctl() relies on that.
 311         */
 312        spin_lock(&sysctl_lock);
 313        erase_header(p);
 314}
 315
 316static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
 317{
 318        BUG_ON(!head);
 319        spin_lock(&sysctl_lock);
 320        if (!use_table(head))
 321                head = ERR_PTR(-ENOENT);
 322        spin_unlock(&sysctl_lock);
 323        return head;
 324}
 325
 326static void sysctl_head_finish(struct ctl_table_header *head)
 327{
 328        if (!head)
 329                return;
 330        spin_lock(&sysctl_lock);
 331        unuse_table(head);
 332        spin_unlock(&sysctl_lock);
 333}
 334
 335static struct ctl_table_set *
 336lookup_header_set(struct ctl_table_root *root)
 337{
 338        struct ctl_table_set *set = &root->default_set;
 339        if (root->lookup)
 340                set = root->lookup(root);
 341        return set;
 342}
 343
 344static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
 345                                      struct ctl_dir *dir,
 346                                      const char *name, int namelen)
 347{
 348        struct ctl_table_header *head;
 349        struct ctl_table *entry;
 350
 351        spin_lock(&sysctl_lock);
 352        entry = find_entry(&head, dir, name, namelen);
 353        if (entry && use_table(head))
 354                *phead = head;
 355        else
 356                entry = NULL;
 357        spin_unlock(&sysctl_lock);
 358        return entry;
 359}
 360
 361static struct ctl_node *first_usable_entry(struct rb_node *node)
 362{
 363        struct ctl_node *ctl_node;
 364
 365        for (;node; node = rb_next(node)) {
 366                ctl_node = rb_entry(node, struct ctl_node, node);
 367                if (use_table(ctl_node->header))
 368                        return ctl_node;
 369        }
 370        return NULL;
 371}
 372
 373static void first_entry(struct ctl_dir *dir,
 374        struct ctl_table_header **phead, struct ctl_table **pentry)
 375{
 376        struct ctl_table_header *head = NULL;
 377        struct ctl_table *entry = NULL;
 378        struct ctl_node *ctl_node;
 379
 380        spin_lock(&sysctl_lock);
 381        ctl_node = first_usable_entry(rb_first(&dir->root));
 382        spin_unlock(&sysctl_lock);
 383        if (ctl_node) {
 384                head = ctl_node->header;
 385                entry = &head->ctl_table[ctl_node - head->node];
 386        }
 387        *phead = head;
 388        *pentry = entry;
 389}
 390
 391static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
 392{
 393        struct ctl_table_header *head = *phead;
 394        struct ctl_table *entry = *pentry;
 395        struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
 396
 397        spin_lock(&sysctl_lock);
 398        unuse_table(head);
 399
 400        ctl_node = first_usable_entry(rb_next(&ctl_node->node));
 401        spin_unlock(&sysctl_lock);
 402        head = NULL;
 403        if (ctl_node) {
 404                head = ctl_node->header;
 405                entry = &head->ctl_table[ctl_node - head->node];
 406        }
 407        *phead = head;
 408        *pentry = entry;
 409}
 410
 411void register_sysctl_root(struct ctl_table_root *root)
 412{
 413}
 414
 415/*
 416 * sysctl_perm does NOT grant the superuser all rights automatically, because
 417 * some sysctl variables are readonly even to root.
 418 */
 419
 420static int test_perm(int mode, int op)
 421{
 422        if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
 423                mode >>= 6;
 424        else if (in_egroup_p(GLOBAL_ROOT_GID))
 425                mode >>= 3;
 426        if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
 427                return 0;
 428        return -EACCES;
 429}
 430
 431static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
 432{
 433        struct ctl_table_root *root = head->root;
 434        int mode;
 435
 436        if (root->permissions)
 437                mode = root->permissions(head, table);
 438        else
 439                mode = table->mode;
 440
 441        return test_perm(mode, op);
 442}
 443
 444static struct inode *proc_sys_make_inode(struct super_block *sb,
 445                struct ctl_table_header *head, struct ctl_table *table)
 446{
 447        struct ctl_table_root *root = head->root;
 448        struct inode *inode;
 449        struct proc_inode *ei;
 450
 451        inode = new_inode(sb);
 452        if (!inode)
 453                goto out;
 454
 455        inode->i_ino = get_next_ino();
 456
 457        ei = PROC_I(inode);
 458
 459        spin_lock(&sysctl_lock);
 460        if (unlikely(head->unregistering)) {
 461                spin_unlock(&sysctl_lock);
 462                iput(inode);
 463                inode = NULL;
 464                goto out;
 465        }
 466        ei->sysctl = head;
 467        ei->sysctl_entry = table;
 468        list_add_rcu(&ei->sysctl_inodes, &head->inodes);
 469        head->count++;
 470        spin_unlock(&sysctl_lock);
 471
 472        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 473        inode->i_mode = table->mode;
 474        if (!S_ISDIR(table->mode)) {
 475                inode->i_mode |= S_IFREG;
 476                inode->i_op = &proc_sys_inode_operations;
 477                inode->i_fop = &proc_sys_file_operations;
 478        } else {
 479                inode->i_mode |= S_IFDIR;
 480                inode->i_op = &proc_sys_dir_operations;
 481                inode->i_fop = &proc_sys_dir_file_operations;
 482                if (is_empty_dir(head))
 483                        make_empty_dir_inode(inode);
 484        }
 485
 486        if (root->set_ownership)
 487                root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
 488
 489out:
 490        return inode;
 491}
 492
 493void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 494{
 495        spin_lock(&sysctl_lock);
 496        list_del_rcu(&PROC_I(inode)->sysctl_inodes);
 497        if (!--head->count)
 498                kfree_rcu(head, rcu);
 499        spin_unlock(&sysctl_lock);
 500}
 501
 502static struct ctl_table_header *grab_header(struct inode *inode)
 503{
 504        struct ctl_table_header *head = PROC_I(inode)->sysctl;
 505        if (!head)
 506                head = &sysctl_table_root.default_set.dir.header;
 507        return sysctl_head_grab(head);
 508}
 509
 510static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 511                                        unsigned int flags)
 512{
 513        struct ctl_table_header *head = grab_header(dir);
 514        struct ctl_table_header *h = NULL;
 515        const struct qstr *name = &dentry->d_name;
 516        struct ctl_table *p;
 517        struct inode *inode;
 518        struct dentry *err = ERR_PTR(-ENOENT);
 519        struct ctl_dir *ctl_dir;
 520        int ret;
 521
 522        if (IS_ERR(head))
 523                return ERR_CAST(head);
 524
 525        ctl_dir = container_of(head, struct ctl_dir, header);
 526
 527        p = lookup_entry(&h, ctl_dir, name->name, name->len);
 528        if (!p)
 529                goto out;
 530
 531        if (S_ISLNK(p->mode)) {
 532                ret = sysctl_follow_link(&h, &p);
 533                err = ERR_PTR(ret);
 534                if (ret)
 535                        goto out;
 536        }
 537
 538        err = ERR_PTR(-ENOMEM);
 539        inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
 540        if (!inode)
 541                goto out;
 542
 543        err = NULL;
 544        d_set_d_op(dentry, &proc_sys_dentry_operations);
 545        d_add(dentry, inode);
 546
 547out:
 548        if (h)
 549                sysctl_head_finish(h);
 550        sysctl_head_finish(head);
 551        return err;
 552}
 553
 554static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 555                size_t count, loff_t *ppos, int write)
 556{
 557        struct inode *inode = file_inode(filp);
 558        struct ctl_table_header *head = grab_header(inode);
 559        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 560        ssize_t error;
 561        size_t res;
 562
 563        if (IS_ERR(head))
 564                return PTR_ERR(head);
 565
 566        /*
 567         * At this point we know that the sysctl was not unregistered
 568         * and won't be until we finish.
 569         */
 570        error = -EPERM;
 571        if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
 572                goto out;
 573
 574        /* if that can happen at all, it should be -EINVAL, not -EISDIR */
 575        error = -EINVAL;
 576        if (!table->proc_handler)
 577                goto out;
 578
 579        /* careful: calling conventions are nasty here */
 580        res = count;
 581        error = table->proc_handler(table, write, buf, &res, ppos);
 582        if (!error)
 583                error = res;
 584out:
 585        sysctl_head_finish(head);
 586
 587        return error;
 588}
 589
 590static ssize_t proc_sys_read(struct file *filp, char __user *buf,
 591                                size_t count, loff_t *ppos)
 592{
 593        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
 594}
 595
 596static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 597                                size_t count, loff_t *ppos)
 598{
 599        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 600}
 601
 602static int proc_sys_open(struct inode *inode, struct file *filp)
 603{
 604        struct ctl_table_header *head = grab_header(inode);
 605        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 606
 607        /* sysctl was unregistered */
 608        if (IS_ERR(head))
 609                return PTR_ERR(head);
 610
 611        if (table->poll)
 612                filp->private_data = proc_sys_poll_event(table->poll);
 613
 614        sysctl_head_finish(head);
 615
 616        return 0;
 617}
 618
 619static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 620{
 621        struct inode *inode = file_inode(filp);
 622        struct ctl_table_header *head = grab_header(inode);
 623        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 624        unsigned int ret = DEFAULT_POLLMASK;
 625        unsigned long event;
 626
 627        /* sysctl was unregistered */
 628        if (IS_ERR(head))
 629                return POLLERR | POLLHUP;
 630
 631        if (!table->proc_handler)
 632                goto out;
 633
 634        if (!table->poll)
 635                goto out;
 636
 637        event = (unsigned long)filp->private_data;
 638        poll_wait(filp, &table->poll->wait, wait);
 639
 640        if (event != atomic_read(&table->poll->event)) {
 641                filp->private_data = proc_sys_poll_event(table->poll);
 642                ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
 643        }
 644
 645out:
 646        sysctl_head_finish(head);
 647
 648        return ret;
 649}
 650
 651static bool proc_sys_fill_cache(struct file *file,
 652                                struct dir_context *ctx,
 653                                struct ctl_table_header *head,
 654                                struct ctl_table *table)
 655{
 656        struct dentry *child, *dir = file->f_path.dentry;
 657        struct inode *inode;
 658        struct qstr qname;
 659        ino_t ino = 0;
 660        unsigned type = DT_UNKNOWN;
 661
 662        qname.name = table->procname;
 663        qname.len  = strlen(table->procname);
 664        qname.hash = full_name_hash(dir, qname.name, qname.len);
 665
 666        child = d_lookup(dir, &qname);
 667        if (!child) {
 668                DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 669                child = d_alloc_parallel(dir, &qname, &wq);
 670                if (IS_ERR(child))
 671                        return false;
 672                if (d_in_lookup(child)) {
 673                        inode = proc_sys_make_inode(dir->d_sb, head, table);
 674                        if (!inode) {
 675                                d_lookup_done(child);
 676                                dput(child);
 677                                return false;
 678                        }
 679                        d_set_d_op(child, &proc_sys_dentry_operations);
 680                        d_add(child, inode);
 681                }
 682        }
 683        inode = d_inode(child);
 684        ino  = inode->i_ino;
 685        type = inode->i_mode >> 12;
 686        dput(child);
 687        return dir_emit(ctx, qname.name, qname.len, ino, type);
 688}
 689
 690static bool proc_sys_link_fill_cache(struct file *file,
 691                                    struct dir_context *ctx,
 692                                    struct ctl_table_header *head,
 693                                    struct ctl_table *table)
 694{
 695        bool ret = true;
 696        head = sysctl_head_grab(head);
 697
 698        if (S_ISLNK(table->mode)) {
 699                /* It is not an error if we can not follow the link ignore it */
 700                int err = sysctl_follow_link(&head, &table);
 701                if (err)
 702                        goto out;
 703        }
 704
 705        ret = proc_sys_fill_cache(file, ctx, head, table);
 706out:
 707        sysctl_head_finish(head);
 708        return ret;
 709}
 710
 711static int scan(struct ctl_table_header *head, struct ctl_table *table,
 712                unsigned long *pos, struct file *file,
 713                struct dir_context *ctx)
 714{
 715        bool res;
 716
 717        if ((*pos)++ < ctx->pos)
 718                return true;
 719
 720        if (unlikely(S_ISLNK(table->mode)))
 721                res = proc_sys_link_fill_cache(file, ctx, head, table);
 722        else
 723                res = proc_sys_fill_cache(file, ctx, head, table);
 724
 725        if (res)
 726                ctx->pos = *pos;
 727
 728        return res;
 729}
 730
 731static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 732{
 733        struct ctl_table_header *head = grab_header(file_inode(file));
 734        struct ctl_table_header *h = NULL;
 735        struct ctl_table *entry;
 736        struct ctl_dir *ctl_dir;
 737        unsigned long pos;
 738
 739        if (IS_ERR(head))
 740                return PTR_ERR(head);
 741
 742        ctl_dir = container_of(head, struct ctl_dir, header);
 743
 744        if (!dir_emit_dots(file, ctx))
 745                goto out;
 746
 747        pos = 2;
 748
 749        for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
 750                if (!scan(h, entry, &pos, file, ctx)) {
 751                        sysctl_head_finish(h);
 752                        break;
 753                }
 754        }
 755out:
 756        sysctl_head_finish(head);
 757        return 0;
 758}
 759
 760static int proc_sys_permission(struct inode *inode, int mask)
 761{
 762        /*
 763         * sysctl entries that are not writeable,
 764         * are _NOT_ writeable, capabilities or not.
 765         */
 766        struct ctl_table_header *head;
 767        struct ctl_table *table;
 768        int error;
 769
 770        /* Executable files are not allowed under /proc/sys/ */
 771        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
 772                return -EACCES;
 773
 774        head = grab_header(inode);
 775        if (IS_ERR(head))
 776                return PTR_ERR(head);
 777
 778        table = PROC_I(inode)->sysctl_entry;
 779        if (!table) /* global root - r-xr-xr-x */
 780                error = mask & MAY_WRITE ? -EACCES : 0;
 781        else /* Use the permissions on the sysctl table entry */
 782                error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);
 783
 784        sysctl_head_finish(head);
 785        return error;
 786}
 787
 788static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 789{
 790        struct inode *inode = d_inode(dentry);
 791        int error;
 792
 793        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 794                return -EPERM;
 795
 796        error = setattr_prepare(dentry, attr);
 797        if (error)
 798                return error;
 799
 800        setattr_copy(inode, attr);
 801        mark_inode_dirty(inode);
 802        return 0;
 803}
 804
 805static int proc_sys_getattr(const struct path *path, struct kstat *stat,
 806                            u32 request_mask, unsigned int query_flags)
 807{
 808        struct inode *inode = d_inode(path->dentry);
 809        struct ctl_table_header *head = grab_header(inode);
 810        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 811
 812        if (IS_ERR(head))
 813                return PTR_ERR(head);
 814
 815        generic_fillattr(inode, stat);
 816        if (table)
 817                stat->mode = (stat->mode & S_IFMT) | table->mode;
 818
 819        sysctl_head_finish(head);
 820        return 0;
 821}
 822
 823static const struct file_operations proc_sys_file_operations = {
 824        .open           = proc_sys_open,
 825        .poll           = proc_sys_poll,
 826        .read           = proc_sys_read,
 827        .write          = proc_sys_write,
 828        .llseek         = default_llseek,
 829};
 830
 831static const struct file_operations proc_sys_dir_file_operations = {
 832        .read           = generic_read_dir,
 833        .iterate_shared = proc_sys_readdir,
 834        .llseek         = generic_file_llseek,
 835};
 836
 837static const struct inode_operations proc_sys_inode_operations = {
 838        .permission     = proc_sys_permission,
 839        .setattr        = proc_sys_setattr,
 840        .getattr        = proc_sys_getattr,
 841};
 842
 843static const struct inode_operations proc_sys_dir_operations = {
 844        .lookup         = proc_sys_lookup,
 845        .permission     = proc_sys_permission,
 846        .setattr        = proc_sys_setattr,
 847        .getattr        = proc_sys_getattr,
 848};
 849
 850static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 851{
 852        if (flags & LOOKUP_RCU)
 853                return -ECHILD;
 854        return !PROC_I(d_inode(dentry))->sysctl->unregistering;
 855}
 856
 857static int proc_sys_delete(const struct dentry *dentry)
 858{
 859        return !!PROC_I(d_inode(dentry))->sysctl->unregistering;
 860}
 861
 862static int sysctl_is_seen(struct ctl_table_header *p)
 863{
 864        struct ctl_table_set *set = p->set;
 865        int res;
 866        spin_lock(&sysctl_lock);
 867        if (p->unregistering)
 868                res = 0;
 869        else if (!set->is_seen)
 870                res = 1;
 871        else
 872                res = set->is_seen(set);
 873        spin_unlock(&sysctl_lock);
 874        return res;
 875}
 876
 877static int proc_sys_compare(const struct dentry *dentry,
 878                unsigned int len, const char *str, const struct qstr *name)
 879{
 880        struct ctl_table_header *head;
 881        struct inode *inode;
 882
 883        /* Although proc doesn't have negative dentries, rcu-walk means
 884         * that inode here can be NULL */
 885        /* AV: can it, indeed? */
 886        inode = d_inode_rcu(dentry);
 887        if (!inode)
 888                return 1;
 889        if (name->len != len)
 890                return 1;
 891        if (memcmp(name->name, str, len))
 892                return 1;
 893        head = rcu_dereference(PROC_I(inode)->sysctl);
 894        return !head || !sysctl_is_seen(head);
 895}
 896
 897static const struct dentry_operations proc_sys_dentry_operations = {
 898        .d_revalidate   = proc_sys_revalidate,
 899        .d_delete       = proc_sys_delete,
 900        .d_compare      = proc_sys_compare,
 901};
 902
 903static struct ctl_dir *find_subdir(struct ctl_dir *dir,
 904                                   const char *name, int namelen)
 905{
 906        struct ctl_table_header *head;
 907        struct ctl_table *entry;
 908
 909        entry = find_entry(&head, dir, name, namelen);
 910        if (!entry)
 911                return ERR_PTR(-ENOENT);
 912        if (!S_ISDIR(entry->mode))
 913                return ERR_PTR(-ENOTDIR);
 914        return container_of(head, struct ctl_dir, header);
 915}
 916
 917static struct ctl_dir *new_dir(struct ctl_table_set *set,
 918                               const char *name, int namelen)
 919{
 920        struct ctl_table *table;
 921        struct ctl_dir *new;
 922        struct ctl_node *node;
 923        char *new_name;
 924
 925        new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
 926                      sizeof(struct ctl_table)*2 +  namelen + 1,
 927                      GFP_KERNEL);
 928        if (!new)
 929                return NULL;
 930
 931        node = (struct ctl_node *)(new + 1);
 932        table = (struct ctl_table *)(node + 1);
 933        new_name = (char *)(table + 2);
 934        memcpy(new_name, name, namelen);
 935        new_name[namelen] = '\0';
 936        table[0].procname = new_name;
 937        table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
 938        init_header(&new->header, set->dir.header.root, set, node, table);
 939
 940        return new;
 941}
 942
 943/**
 944 * get_subdir - find or create a subdir with the specified name.
 945 * @dir:  Directory to create the subdirectory in
 946 * @name: The name of the subdirectory to find or create
 947 * @namelen: The length of name
 948 *
 949 * Takes a directory with an elevated reference count so we know that
 950 * if we drop the lock the directory will not go away.  Upon success
 951 * the reference is moved from @dir to the returned subdirectory.
 952 * Upon error an error code is returned and the reference on @dir is
 953 * simply dropped.
 954 */
 955static struct ctl_dir *get_subdir(struct ctl_dir *dir,
 956                                  const char *name, int namelen)
 957{
 958        struct ctl_table_set *set = dir->header.set;
 959        struct ctl_dir *subdir, *new = NULL;
 960        int err;
 961
 962        spin_lock(&sysctl_lock);
 963        subdir = find_subdir(dir, name, namelen);
 964        if (!IS_ERR(subdir))
 965                goto found;
 966        if (PTR_ERR(subdir) != -ENOENT)
 967                goto failed;
 968
 969        spin_unlock(&sysctl_lock);
 970        new = new_dir(set, name, namelen);
 971        spin_lock(&sysctl_lock);
 972        subdir = ERR_PTR(-ENOMEM);
 973        if (!new)
 974                goto failed;
 975
 976        /* Was the subdir added while we dropped the lock? */
 977        subdir = find_subdir(dir, name, namelen);
 978        if (!IS_ERR(subdir))
 979                goto found;
 980        if (PTR_ERR(subdir) != -ENOENT)
 981                goto failed;
 982
 983        /* Nope.  Use the our freshly made directory entry. */
 984        err = insert_header(dir, &new->header);
 985        subdir = ERR_PTR(err);
 986        if (err)
 987                goto failed;
 988        subdir = new;
 989found:
 990        subdir->header.nreg++;
 991failed:
 992        if (IS_ERR(subdir)) {
 993                pr_err("sysctl could not get directory: ");
 994                sysctl_print_dir(dir);
 995                pr_cont("/%*.*s %ld\n",
 996                        namelen, namelen, name, PTR_ERR(subdir));
 997        }
 998        drop_sysctl_table(&dir->header);
 999        if (new)
1000                drop_sysctl_table(&new->header);
1001        spin_unlock(&sysctl_lock);
1002        return subdir;
1003}
1004
1005static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
1006{
1007        struct ctl_dir *parent;
1008        const char *procname;
1009        if (!dir->header.parent)
1010                return &set->dir;
1011        parent = xlate_dir(set, dir->header.parent);
1012        if (IS_ERR(parent))
1013                return parent;
1014        procname = dir->header.ctl_table[0].procname;
1015        return find_subdir(parent, procname, strlen(procname));
1016}
1017
1018static int sysctl_follow_link(struct ctl_table_header **phead,
1019        struct ctl_table **pentry)
1020{
1021        struct ctl_table_header *head;
1022        struct ctl_table_root *root;
1023        struct ctl_table_set *set;
1024        struct ctl_table *entry;
1025        struct ctl_dir *dir;
1026        int ret;
1027
1028        ret = 0;
1029        spin_lock(&sysctl_lock);
1030        root = (*pentry)->data;
1031        set = lookup_header_set(root);
1032        dir = xlate_dir(set, (*phead)->parent);
1033        if (IS_ERR(dir))
1034                ret = PTR_ERR(dir);
1035        else {
1036                const char *procname = (*pentry)->procname;
1037                head = NULL;
1038                entry = find_entry(&head, dir, procname, strlen(procname));
1039                ret = -ENOENT;
1040                if (entry && use_table(head)) {
1041                        unuse_table(*phead);
1042                        *phead = head;
1043                        *pentry = entry;
1044                        ret = 0;
1045                }
1046        }
1047
1048        spin_unlock(&sysctl_lock);
1049        return ret;
1050}
1051
1052static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
1053{
1054        struct va_format vaf;
1055        va_list args;
1056
1057        va_start(args, fmt);
1058        vaf.fmt = fmt;
1059        vaf.va = &args;
1060
1061        pr_err("sysctl table check failed: %s/%s %pV\n",
1062               path, table->procname, &vaf);
1063
1064        va_end(args);
1065        return -EINVAL;
1066}
1067
1068static int sysctl_check_table(const char *path, struct ctl_table *table)
1069{
1070        int err = 0;
1071        for (; table->procname; table++) {
1072                if (table->child)
1073                        err = sysctl_err(path, table, "Not a file");
1074
1075                if ((table->proc_handler == proc_dostring) ||
1076                    (table->proc_handler == proc_dointvec) ||
1077                    (table->proc_handler == proc_douintvec) ||
1078                    (table->proc_handler == proc_dointvec_minmax) ||
1079                    (table->proc_handler == proc_dointvec_jiffies) ||
1080                    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1081                    (table->proc_handler == proc_dointvec_ms_jiffies) ||
1082                    (table->proc_handler == proc_doulongvec_minmax) ||
1083                    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1084                        if (!table->data)
1085                                err = sysctl_err(path, table, "No data");
1086                        if (!table->maxlen)
1087                                err = sysctl_err(path, table, "No maxlen");
1088                }
1089                if (!table->proc_handler)
1090                        err = sysctl_err(path, table, "No proc_handler");
1091
1092                if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1093                        err = sysctl_err(path, table, "bogus .mode 0%o",
1094                                table->mode);
1095        }
1096        return err;
1097}
1098
1099static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1100        struct ctl_table_root *link_root)
1101{
1102        struct ctl_table *link_table, *entry, *link;
1103        struct ctl_table_header *links;
1104        struct ctl_node *node;
1105        char *link_name;
1106        int nr_entries, name_bytes;
1107
1108        name_bytes = 0;
1109        nr_entries = 0;
1110        for (entry = table; entry->procname; entry++) {
1111                nr_entries++;
1112                name_bytes += strlen(entry->procname) + 1;
1113        }
1114
1115        links = kzalloc(sizeof(struct ctl_table_header) +
1116                        sizeof(struct ctl_node)*nr_entries +
1117                        sizeof(struct ctl_table)*(nr_entries + 1) +
1118                        name_bytes,
1119                        GFP_KERNEL);
1120
1121        if (!links)
1122                return NULL;
1123
1124        node = (struct ctl_node *)(links + 1);
1125        link_table = (struct ctl_table *)(node + nr_entries);
1126        link_name = (char *)&link_table[nr_entries + 1];
1127
1128        for (link = link_table, entry = table; entry->procname; link++, entry++) {
1129                int len = strlen(entry->procname) + 1;
1130                memcpy(link_name, entry->procname, len);
1131                link->procname = link_name;
1132                link->mode = S_IFLNK|S_IRWXUGO;
1133                link->data = link_root;
1134                link_name += len;
1135        }
1136        init_header(links, dir->header.root, dir->header.set, node, link_table);
1137        links->nreg = nr_entries;
1138
1139        return links;
1140}
1141
1142static bool get_links(struct ctl_dir *dir,
1143        struct ctl_table *table, struct ctl_table_root *link_root)
1144{
1145        struct ctl_table_header *head;
1146        struct ctl_table *entry, *link;
1147
1148        /* Are there links available for every entry in table? */
1149        for (entry = table; entry->procname; entry++) {
1150                const char *procname = entry->procname;
1151                link = find_entry(&head, dir, procname, strlen(procname));
1152                if (!link)
1153                        return false;
1154                if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1155                        continue;
1156                if (S_ISLNK(link->mode) && (link->data == link_root))
1157                        continue;
1158                return false;
1159        }
1160
1161        /* The checks passed.  Increase the registration count on the links */
1162        for (entry = table; entry->procname; entry++) {
1163                const char *procname = entry->procname;
1164                link = find_entry(&head, dir, procname, strlen(procname));
1165                head->nreg++;
1166        }
1167        return true;
1168}
1169
1170static int insert_links(struct ctl_table_header *head)
1171{
1172        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1173        struct ctl_dir *core_parent = NULL;
1174        struct ctl_table_header *links;
1175        int err;
1176
1177        if (head->set == root_set)
1178                return 0;
1179
1180        core_parent = xlate_dir(root_set, head->parent);
1181        if (IS_ERR(core_parent))
1182                return 0;
1183
1184        if (get_links(core_parent, head->ctl_table, head->root))
1185                return 0;
1186
1187        core_parent->header.nreg++;
1188        spin_unlock(&sysctl_lock);
1189
1190        links = new_links(core_parent, head->ctl_table, head->root);
1191
1192        spin_lock(&sysctl_lock);
1193        err = -ENOMEM;
1194        if (!links)
1195                goto out;
1196
1197        err = 0;
1198        if (get_links(core_parent, head->ctl_table, head->root)) {
1199                kfree(links);
1200                goto out;
1201        }
1202
1203        err = insert_header(core_parent, links);
1204        if (err)
1205                kfree(links);
1206out:
1207        drop_sysctl_table(&core_parent->header);
1208        return err;
1209}
1210
1211/**
1212 * __register_sysctl_table - register a leaf sysctl table
1213 * @set: Sysctl tree to register on
1214 * @path: The path to the directory the sysctl table is in.
1215 * @table: the top-level table structure
1216 *
1217 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1218 * array. A completely 0 filled entry terminates the table.
1219 *
1220 * The members of the &struct ctl_table structure are used as follows:
1221 *
1222 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1223 *            enter a sysctl file
1224 *
1225 * data - a pointer to data for use by proc_handler
1226 *
1227 * maxlen - the maximum size in bytes of the data
1228 *
1229 * mode - the file permissions for the /proc/sys file
1230 *
1231 * child - must be %NULL.
1232 *
1233 * proc_handler - the text handler routine (described below)
1234 *
1235 * extra1, extra2 - extra pointers usable by the proc handler routines
1236 *
1237 * Leaf nodes in the sysctl tree will be represented by a single file
1238 * under /proc; non-leaf nodes will be represented by directories.
1239 *
1240 * There must be a proc_handler routine for any terminal nodes.
1241 * Several default handlers are available to cover common cases -
1242 *
1243 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1244 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1245 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1246 *
1247 * It is the handler's job to read the input buffer from user memory
1248 * and process it. The handler should return 0 on success.
1249 *
1250 * This routine returns %NULL on a failure to register, and a pointer
1251 * to the table header on success.
1252 */
1253struct ctl_table_header *__register_sysctl_table(
1254        struct ctl_table_set *set,
1255        const char *path, struct ctl_table *table)
1256{
1257        struct ctl_table_root *root = set->dir.header.root;
1258        struct ctl_table_header *header;
1259        const char *name, *nextname;
1260        struct ctl_dir *dir;
1261        struct ctl_table *entry;
1262        struct ctl_node *node;
1263        int nr_entries = 0;
1264
1265        for (entry = table; entry->procname; entry++)
1266                nr_entries++;
1267
1268        header = kzalloc(sizeof(struct ctl_table_header) +
1269                         sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1270        if (!header)
1271                return NULL;
1272
1273        node = (struct ctl_node *)(header + 1);
1274        init_header(header, root, set, node, table);
1275        if (sysctl_check_table(path, table))
1276                goto fail;
1277
1278        spin_lock(&sysctl_lock);
1279        dir = &set->dir;
1280        /* Reference moved down the diretory tree get_subdir */
1281        dir->header.nreg++;
1282        spin_unlock(&sysctl_lock);
1283
1284        /* Find the directory for the ctl_table */
1285        for (name = path; name; name = nextname) {
1286                int namelen;
1287                nextname = strchr(name, '/');
1288                if (nextname) {
1289                        namelen = nextname - name;
1290                        nextname++;
1291                } else {
1292                        namelen = strlen(name);
1293                }
1294                if (namelen == 0)
1295                        continue;
1296
1297                dir = get_subdir(dir, name, namelen);
1298                if (IS_ERR(dir))
1299                        goto fail;
1300        }
1301
1302        spin_lock(&sysctl_lock);
1303        if (insert_header(dir, header))
1304                goto fail_put_dir_locked;
1305
1306        drop_sysctl_table(&dir->header);
1307        spin_unlock(&sysctl_lock);
1308
1309        return header;
1310
1311fail_put_dir_locked:
1312        drop_sysctl_table(&dir->header);
1313        spin_unlock(&sysctl_lock);
1314fail:
1315        kfree(header);
1316        dump_stack();
1317        return NULL;
1318}
1319
1320/**
1321 * register_sysctl - register a sysctl table
1322 * @path: The path to the directory the sysctl table is in.
1323 * @table: the table structure
1324 *
1325 * Register a sysctl table. @table should be a filled in ctl_table
1326 * array. A completely 0 filled entry terminates the table.
1327 *
1328 * See __register_sysctl_table for more details.
1329 */
1330struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1331{
1332        return __register_sysctl_table(&sysctl_table_root.default_set,
1333                                        path, table);
1334}
1335EXPORT_SYMBOL(register_sysctl);
1336
1337static char *append_path(const char *path, char *pos, const char *name)
1338{
1339        int namelen;
1340        namelen = strlen(name);
1341        if (((pos - path) + namelen + 2) >= PATH_MAX)
1342                return NULL;
1343        memcpy(pos, name, namelen);
1344        pos[namelen] = '/';
1345        pos[namelen + 1] = '\0';
1346        pos += namelen + 1;
1347        return pos;
1348}
1349
1350static int count_subheaders(struct ctl_table *table)
1351{
1352        int has_files = 0;
1353        int nr_subheaders = 0;
1354        struct ctl_table *entry;
1355
1356        /* special case: no directory and empty directory */
1357        if (!table || !table->procname)
1358                return 1;
1359
1360        for (entry = table; entry->procname; entry++) {
1361                if (entry->child)
1362                        nr_subheaders += count_subheaders(entry->child);
1363                else
1364                        has_files = 1;
1365        }
1366        return nr_subheaders + has_files;
1367}
1368
1369static int register_leaf_sysctl_tables(const char *path, char *pos,
1370        struct ctl_table_header ***subheader, struct ctl_table_set *set,
1371        struct ctl_table *table)
1372{
1373        struct ctl_table *ctl_table_arg = NULL;
1374        struct ctl_table *entry, *files;
1375        int nr_files = 0;
1376        int nr_dirs = 0;
1377        int err = -ENOMEM;
1378
1379        for (entry = table; entry->procname; entry++) {
1380                if (entry->child)
1381                        nr_dirs++;
1382                else
1383                        nr_files++;
1384        }
1385
1386        files = table;
1387        /* If there are mixed files and directories we need a new table */
1388        if (nr_dirs && nr_files) {
1389                struct ctl_table *new;
1390                files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1391                                GFP_KERNEL);
1392                if (!files)
1393                        goto out;
1394
1395                ctl_table_arg = files;
1396                for (new = files, entry = table; entry->procname; entry++) {
1397                        if (entry->child)
1398                                continue;
1399                        *new = *entry;
1400                        new++;
1401                }
1402        }
1403
1404        /* Register everything except a directory full of subdirectories */
1405        if (nr_files || !nr_dirs) {
1406                struct ctl_table_header *header;
1407                header = __register_sysctl_table(set, path, files);
1408                if (!header) {
1409                        kfree(ctl_table_arg);
1410                        goto out;
1411                }
1412
1413                /* Remember if we need to free the file table */
1414                header->ctl_table_arg = ctl_table_arg;
1415                **subheader = header;
1416                (*subheader)++;
1417        }
1418
1419        /* Recurse into the subdirectories. */
1420        for (entry = table; entry->procname; entry++) {
1421                char *child_pos;
1422
1423                if (!entry->child)
1424                        continue;
1425
1426                err = -ENAMETOOLONG;
1427                child_pos = append_path(path, pos, entry->procname);
1428                if (!child_pos)
1429                        goto out;
1430
1431                err = register_leaf_sysctl_tables(path, child_pos, subheader,
1432                                                  set, entry->child);
1433                pos[0] = '\0';
1434                if (err)
1435                        goto out;
1436        }
1437        err = 0;
1438out:
1439        /* On failure our caller will unregister all registered subheaders */
1440        return err;
1441}
1442
1443/**
1444 * __register_sysctl_paths - register a sysctl table hierarchy
1445 * @set: Sysctl tree to register on
1446 * @path: The path to the directory the sysctl table is in.
1447 * @table: the top-level table structure
1448 *
1449 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1450 * array. A completely 0 filled entry terminates the table.
1451 *
1452 * See __register_sysctl_table for more details.
1453 */
1454struct ctl_table_header *__register_sysctl_paths(
1455        struct ctl_table_set *set,
1456        const struct ctl_path *path, struct ctl_table *table)
1457{
1458        struct ctl_table *ctl_table_arg = table;
1459        int nr_subheaders = count_subheaders(table);
1460        struct ctl_table_header *header = NULL, **subheaders, **subheader;
1461        const struct ctl_path *component;
1462        char *new_path, *pos;
1463
1464        pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1465        if (!new_path)
1466                return NULL;
1467
1468        pos[0] = '\0';
1469        for (component = path; component->procname; component++) {
1470                pos = append_path(new_path, pos, component->procname);
1471                if (!pos)
1472                        goto out;
1473        }
1474        while (table->procname && table->child && !table[1].procname) {
1475                pos = append_path(new_path, pos, table->procname);
1476                if (!pos)
1477                        goto out;
1478                table = table->child;
1479        }
1480        if (nr_subheaders == 1) {
1481                header = __register_sysctl_table(set, new_path, table);
1482                if (header)
1483                        header->ctl_table_arg = ctl_table_arg;
1484        } else {
1485                header = kzalloc(sizeof(*header) +
1486                                 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1487                if (!header)
1488                        goto out;
1489
1490                subheaders = (struct ctl_table_header **) (header + 1);
1491                subheader = subheaders;
1492                header->ctl_table_arg = ctl_table_arg;
1493
1494                if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1495                                                set, table))
1496                        goto err_register_leaves;
1497        }
1498
1499out:
1500        kfree(new_path);
1501        return header;
1502
1503err_register_leaves:
1504        while (subheader > subheaders) {
1505                struct ctl_table_header *subh = *(--subheader);
1506                struct ctl_table *table = subh->ctl_table_arg;
1507                unregister_sysctl_table(subh);
1508                kfree(table);
1509        }
1510        kfree(header);
1511        header = NULL;
1512        goto out;
1513}
1514
1515/**
1516 * register_sysctl_table_path - register a sysctl table hierarchy
1517 * @path: The path to the directory the sysctl table is in.
1518 * @table: the top-level table structure
1519 *
1520 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1521 * array. A completely 0 filled entry terminates the table.
1522 *
1523 * See __register_sysctl_paths for more details.
1524 */
1525struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1526                                                struct ctl_table *table)
1527{
1528        return __register_sysctl_paths(&sysctl_table_root.default_set,
1529                                        path, table);
1530}
1531EXPORT_SYMBOL(register_sysctl_paths);
1532
1533/**
1534 * register_sysctl_table - register a sysctl table hierarchy
1535 * @table: the top-level table structure
1536 *
1537 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1538 * array. A completely 0 filled entry terminates the table.
1539 *
1540 * See register_sysctl_paths for more details.
1541 */
1542struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1543{
1544        static const struct ctl_path null_path[] = { {} };
1545
1546        return register_sysctl_paths(null_path, table);
1547}
1548EXPORT_SYMBOL(register_sysctl_table);
1549
1550static void put_links(struct ctl_table_header *header)
1551{
1552        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1553        struct ctl_table_root *root = header->root;
1554        struct ctl_dir *parent = header->parent;
1555        struct ctl_dir *core_parent;
1556        struct ctl_table *entry;
1557
1558        if (header->set == root_set)
1559                return;
1560
1561        core_parent = xlate_dir(root_set, parent);
1562        if (IS_ERR(core_parent))
1563                return;
1564
1565        for (entry = header->ctl_table; entry->procname; entry++) {
1566                struct ctl_table_header *link_head;
1567                struct ctl_table *link;
1568                const char *name = entry->procname;
1569
1570                link = find_entry(&link_head, core_parent, name, strlen(name));
1571                if (link &&
1572                    ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1573                     (S_ISLNK(link->mode) && (link->data == root)))) {
1574                        drop_sysctl_table(link_head);
1575                }
1576                else {
1577                        pr_err("sysctl link missing during unregister: ");
1578                        sysctl_print_dir(parent);
1579                        pr_cont("/%s\n", name);
1580                }
1581        }
1582}
1583
1584static void drop_sysctl_table(struct ctl_table_header *header)
1585{
1586        struct ctl_dir *parent = header->parent;
1587
1588        if (--header->nreg)
1589                return;
1590
1591        put_links(header);
1592        start_unregistering(header);
1593        if (!--header->count)
1594                kfree_rcu(header, rcu);
1595
1596        if (parent)
1597                drop_sysctl_table(&parent->header);
1598}
1599
1600/**
1601 * unregister_sysctl_table - unregister a sysctl table hierarchy
1602 * @header: the header returned from register_sysctl_table
1603 *
1604 * Unregisters the sysctl table and all children. proc entries may not
1605 * actually be removed until they are no longer used by anyone.
1606 */
1607void unregister_sysctl_table(struct ctl_table_header * header)
1608{
1609        int nr_subheaders;
1610        might_sleep();
1611
1612        if (header == NULL)
1613                return;
1614
1615        nr_subheaders = count_subheaders(header->ctl_table_arg);
1616        if (unlikely(nr_subheaders > 1)) {
1617                struct ctl_table_header **subheaders;
1618                int i;
1619
1620                subheaders = (struct ctl_table_header **)(header + 1);
1621                for (i = nr_subheaders -1; i >= 0; i--) {
1622                        struct ctl_table_header *subh = subheaders[i];
1623                        struct ctl_table *table = subh->ctl_table_arg;
1624                        unregister_sysctl_table(subh);
1625                        kfree(table);
1626                }
1627                kfree(header);
1628                return;
1629        }
1630
1631        spin_lock(&sysctl_lock);
1632        drop_sysctl_table(header);
1633        spin_unlock(&sysctl_lock);
1634}
1635EXPORT_SYMBOL(unregister_sysctl_table);
1636
1637void setup_sysctl_set(struct ctl_table_set *set,
1638        struct ctl_table_root *root,
1639        int (*is_seen)(struct ctl_table_set *))
1640{
1641        memset(set, 0, sizeof(*set));
1642        set->is_seen = is_seen;
1643        init_header(&set->dir.header, root, set, NULL, root_table);
1644}
1645
1646void retire_sysctl_set(struct ctl_table_set *set)
1647{
1648        WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1649}
1650
1651int __init proc_sys_init(void)
1652{
1653        struct proc_dir_entry *proc_sys_root;
1654
1655        proc_sys_root = proc_mkdir("sys", NULL);
1656        proc_sys_root->proc_iops = &proc_sys_dir_operations;
1657        proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
1658        proc_sys_root->nlink = 0;
1659
1660        return sysctl_init();
1661}
1662