linux/fs/proc/proc_sysctl.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * /proc/sys support
   4 */
   5#include <linux/init.h>
   6#include <linux/sysctl.h>
   7#include <linux/poll.h>
   8#include <linux/proc_fs.h>
   9#include <linux/printk.h>
  10#include <linux/security.h>
  11#include <linux/sched.h>
  12#include <linux/cred.h>
  13#include <linux/namei.h>
  14#include <linux/mm.h>
  15#include <linux/module.h>
  16#include <linux/bpf-cgroup.h>
  17#include "internal.h"
  18
  19static const struct dentry_operations proc_sys_dentry_operations;
  20static const struct file_operations proc_sys_file_operations;
  21static const struct inode_operations proc_sys_inode_operations;
  22static const struct file_operations proc_sys_dir_file_operations;
  23static const struct inode_operations proc_sys_dir_operations;
  24
  25/* Support for permanently empty directories */
  26
  27struct ctl_table sysctl_mount_point[] = {
  28        { }
  29};
  30
  31static bool is_empty_dir(struct ctl_table_header *head)
  32{
  33        return head->ctl_table[0].child == sysctl_mount_point;
  34}
  35
  36static void set_empty_dir(struct ctl_dir *dir)
  37{
  38        dir->header.ctl_table[0].child = sysctl_mount_point;
  39}
  40
  41static void clear_empty_dir(struct ctl_dir *dir)
  42
  43{
  44        dir->header.ctl_table[0].child = NULL;
  45}
  46
  47void proc_sys_poll_notify(struct ctl_table_poll *poll)
  48{
  49        if (!poll)
  50                return;
  51
  52        atomic_inc(&poll->event);
  53        wake_up_interruptible(&poll->wait);
  54}
  55
  56static struct ctl_table root_table[] = {
  57        {
  58                .procname = "",
  59                .mode = S_IFDIR|S_IRUGO|S_IXUGO,
  60        },
  61        { }
  62};
  63static struct ctl_table_root sysctl_table_root = {
  64        .default_set.dir.header = {
  65                {{.count = 1,
  66                  .nreg = 1,
  67                  .ctl_table = root_table }},
  68                .ctl_table_arg = root_table,
  69                .root = &sysctl_table_root,
  70                .set = &sysctl_table_root.default_set,
  71        },
  72};
  73
  74static DEFINE_SPINLOCK(sysctl_lock);
  75
  76static void drop_sysctl_table(struct ctl_table_header *header);
  77static int sysctl_follow_link(struct ctl_table_header **phead,
  78        struct ctl_table **pentry);
  79static int insert_links(struct ctl_table_header *head);
  80static void put_links(struct ctl_table_header *header);
  81
  82static void sysctl_print_dir(struct ctl_dir *dir)
  83{
  84        if (dir->header.parent)
  85                sysctl_print_dir(dir->header.parent);
  86        pr_cont("%s/", dir->header.ctl_table[0].procname);
  87}
  88
  89static int namecmp(const char *name1, int len1, const char *name2, int len2)
  90{
  91        int minlen;
  92        int cmp;
  93
  94        minlen = len1;
  95        if (minlen > len2)
  96                minlen = len2;
  97
  98        cmp = memcmp(name1, name2, minlen);
  99        if (cmp == 0)
 100                cmp = len1 - len2;
 101        return cmp;
 102}
 103
 104/* Called under sysctl_lock */
 105static struct ctl_table *find_entry(struct ctl_table_header **phead,
 106        struct ctl_dir *dir, const char *name, int namelen)
 107{
 108        struct ctl_table_header *head;
 109        struct ctl_table *entry;
 110        struct rb_node *node = dir->root.rb_node;
 111
 112        while (node)
 113        {
 114                struct ctl_node *ctl_node;
 115                const char *procname;
 116                int cmp;
 117
 118                ctl_node = rb_entry(node, struct ctl_node, node);
 119                head = ctl_node->header;
 120                entry = &head->ctl_table[ctl_node - head->node];
 121                procname = entry->procname;
 122
 123                cmp = namecmp(name, namelen, procname, strlen(procname));
 124                if (cmp < 0)
 125                        node = node->rb_left;
 126                else if (cmp > 0)
 127                        node = node->rb_right;
 128                else {
 129                        *phead = head;
 130                        return entry;
 131                }
 132        }
 133        return NULL;
 134}
 135
 136static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
 137{
 138        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 139        struct rb_node **p = &head->parent->root.rb_node;
 140        struct rb_node *parent = NULL;
 141        const char *name = entry->procname;
 142        int namelen = strlen(name);
 143
 144        while (*p) {
 145                struct ctl_table_header *parent_head;
 146                struct ctl_table *parent_entry;
 147                struct ctl_node *parent_node;
 148                const char *parent_name;
 149                int cmp;
 150
 151                parent = *p;
 152                parent_node = rb_entry(parent, struct ctl_node, node);
 153                parent_head = parent_node->header;
 154                parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
 155                parent_name = parent_entry->procname;
 156
 157                cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
 158                if (cmp < 0)
 159                        p = &(*p)->rb_left;
 160                else if (cmp > 0)
 161                        p = &(*p)->rb_right;
 162                else {
 163                        pr_err("sysctl duplicate entry: ");
 164                        sysctl_print_dir(head->parent);
 165                        pr_cont("/%s\n", entry->procname);
 166                        return -EEXIST;
 167                }
 168        }
 169
 170        rb_link_node(node, parent, p);
 171        rb_insert_color(node, &head->parent->root);
 172        return 0;
 173}
 174
 175static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
 176{
 177        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 178
 179        rb_erase(node, &head->parent->root);
 180}
 181
 182static void init_header(struct ctl_table_header *head,
 183        struct ctl_table_root *root, struct ctl_table_set *set,
 184        struct ctl_node *node, struct ctl_table *table)
 185{
 186        head->ctl_table = table;
 187        head->ctl_table_arg = table;
 188        head->used = 0;
 189        head->count = 1;
 190        head->nreg = 1;
 191        head->unregistering = NULL;
 192        head->root = root;
 193        head->set = set;
 194        head->parent = NULL;
 195        head->node = node;
 196        INIT_HLIST_HEAD(&head->inodes);
 197        if (node) {
 198                struct ctl_table *entry;
 199                for (entry = table; entry->procname; entry++, node++)
 200                        node->header = head;
 201        }
 202}
 203
 204static void erase_header(struct ctl_table_header *head)
 205{
 206        struct ctl_table *entry;
 207        for (entry = head->ctl_table; entry->procname; entry++)
 208                erase_entry(head, entry);
 209}
 210
 211static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
 212{
 213        struct ctl_table *entry;
 214        int err;
 215
 216        /* Is this a permanently empty directory? */
 217        if (is_empty_dir(&dir->header))
 218                return -EROFS;
 219
 220        /* Am I creating a permanently empty directory? */
 221        if (header->ctl_table == sysctl_mount_point) {
 222                if (!RB_EMPTY_ROOT(&dir->root))
 223                        return -EINVAL;
 224                set_empty_dir(dir);
 225        }
 226
 227        dir->header.nreg++;
 228        header->parent = dir;
 229        err = insert_links(header);
 230        if (err)
 231                goto fail_links;
 232        for (entry = header->ctl_table; entry->procname; entry++) {
 233                err = insert_entry(header, entry);
 234                if (err)
 235                        goto fail;
 236        }
 237        return 0;
 238fail:
 239        erase_header(header);
 240        put_links(header);
 241fail_links:
 242        if (header->ctl_table == sysctl_mount_point)
 243                clear_empty_dir(dir);
 244        header->parent = NULL;
 245        drop_sysctl_table(&dir->header);
 246        return err;
 247}
 248
 249/* called under sysctl_lock */
 250static int use_table(struct ctl_table_header *p)
 251{
 252        if (unlikely(p->unregistering))
 253                return 0;
 254        p->used++;
 255        return 1;
 256}
 257
 258/* called under sysctl_lock */
 259static void unuse_table(struct ctl_table_header *p)
 260{
 261        if (!--p->used)
 262                if (unlikely(p->unregistering))
 263                        complete(p->unregistering);
 264}
 265
 266static void proc_sys_prune_dcache(struct ctl_table_header *head)
 267{
 268        struct inode *inode;
 269        struct proc_inode *ei;
 270        struct hlist_node *node;
 271        struct super_block *sb;
 272
 273        rcu_read_lock();
 274        for (;;) {
 275                node = hlist_first_rcu(&head->inodes);
 276                if (!node)
 277                        break;
 278                ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
 279                spin_lock(&sysctl_lock);
 280                hlist_del_init_rcu(&ei->sysctl_inodes);
 281                spin_unlock(&sysctl_lock);
 282
 283                inode = &ei->vfs_inode;
 284                sb = inode->i_sb;
 285                if (!atomic_inc_not_zero(&sb->s_active))
 286                        continue;
 287                inode = igrab(inode);
 288                rcu_read_unlock();
 289                if (unlikely(!inode)) {
 290                        deactivate_super(sb);
 291                        rcu_read_lock();
 292                        continue;
 293                }
 294
 295                d_prune_aliases(inode);
 296                iput(inode);
 297                deactivate_super(sb);
 298
 299                rcu_read_lock();
 300        }
 301        rcu_read_unlock();
 302}
 303
 304/* called under sysctl_lock, will reacquire if has to wait */
 305static void start_unregistering(struct ctl_table_header *p)
 306{
 307        /*
 308         * if p->used is 0, nobody will ever touch that entry again;
 309         * we'll eliminate all paths to it before dropping sysctl_lock
 310         */
 311        if (unlikely(p->used)) {
 312                struct completion wait;
 313                init_completion(&wait);
 314                p->unregistering = &wait;
 315                spin_unlock(&sysctl_lock);
 316                wait_for_completion(&wait);
 317        } else {
 318                /* anything non-NULL; we'll never dereference it */
 319                p->unregistering = ERR_PTR(-EINVAL);
 320                spin_unlock(&sysctl_lock);
 321        }
 322        /*
 323         * Prune dentries for unregistered sysctls: namespaced sysctls
 324         * can have duplicate names and contaminate dcache very badly.
 325         */
 326        proc_sys_prune_dcache(p);
 327        /*
 328         * do not remove from the list until nobody holds it; walking the
 329         * list in do_sysctl() relies on that.
 330         */
 331        spin_lock(&sysctl_lock);
 332        erase_header(p);
 333}
 334
 335static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
 336{
 337        BUG_ON(!head);
 338        spin_lock(&sysctl_lock);
 339        if (!use_table(head))
 340                head = ERR_PTR(-ENOENT);
 341        spin_unlock(&sysctl_lock);
 342        return head;
 343}
 344
 345static void sysctl_head_finish(struct ctl_table_header *head)
 346{
 347        if (!head)
 348                return;
 349        spin_lock(&sysctl_lock);
 350        unuse_table(head);
 351        spin_unlock(&sysctl_lock);
 352}
 353
 354static struct ctl_table_set *
 355lookup_header_set(struct ctl_table_root *root)
 356{
 357        struct ctl_table_set *set = &root->default_set;
 358        if (root->lookup)
 359                set = root->lookup(root);
 360        return set;
 361}
 362
 363static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
 364                                      struct ctl_dir *dir,
 365                                      const char *name, int namelen)
 366{
 367        struct ctl_table_header *head;
 368        struct ctl_table *entry;
 369
 370        spin_lock(&sysctl_lock);
 371        entry = find_entry(&head, dir, name, namelen);
 372        if (entry && use_table(head))
 373                *phead = head;
 374        else
 375                entry = NULL;
 376        spin_unlock(&sysctl_lock);
 377        return entry;
 378}
 379
 380static struct ctl_node *first_usable_entry(struct rb_node *node)
 381{
 382        struct ctl_node *ctl_node;
 383
 384        for (;node; node = rb_next(node)) {
 385                ctl_node = rb_entry(node, struct ctl_node, node);
 386                if (use_table(ctl_node->header))
 387                        return ctl_node;
 388        }
 389        return NULL;
 390}
 391
 392static void first_entry(struct ctl_dir *dir,
 393        struct ctl_table_header **phead, struct ctl_table **pentry)
 394{
 395        struct ctl_table_header *head = NULL;
 396        struct ctl_table *entry = NULL;
 397        struct ctl_node *ctl_node;
 398
 399        spin_lock(&sysctl_lock);
 400        ctl_node = first_usable_entry(rb_first(&dir->root));
 401        spin_unlock(&sysctl_lock);
 402        if (ctl_node) {
 403                head = ctl_node->header;
 404                entry = &head->ctl_table[ctl_node - head->node];
 405        }
 406        *phead = head;
 407        *pentry = entry;
 408}
 409
 410static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
 411{
 412        struct ctl_table_header *head = *phead;
 413        struct ctl_table *entry = *pentry;
 414        struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
 415
 416        spin_lock(&sysctl_lock);
 417        unuse_table(head);
 418
 419        ctl_node = first_usable_entry(rb_next(&ctl_node->node));
 420        spin_unlock(&sysctl_lock);
 421        head = NULL;
 422        if (ctl_node) {
 423                head = ctl_node->header;
 424                entry = &head->ctl_table[ctl_node - head->node];
 425        }
 426        *phead = head;
 427        *pentry = entry;
 428}
 429
 430/*
 431 * sysctl_perm does NOT grant the superuser all rights automatically, because
 432 * some sysctl variables are readonly even to root.
 433 */
 434
 435static int test_perm(int mode, int op)
 436{
 437        if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
 438                mode >>= 6;
 439        else if (in_egroup_p(GLOBAL_ROOT_GID))
 440                mode >>= 3;
 441        if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
 442                return 0;
 443        return -EACCES;
 444}
 445
 446static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
 447{
 448        struct ctl_table_root *root = head->root;
 449        int mode;
 450
 451        if (root->permissions)
 452                mode = root->permissions(head, table);
 453        else
 454                mode = table->mode;
 455
 456        return test_perm(mode, op);
 457}
 458
 459static struct inode *proc_sys_make_inode(struct super_block *sb,
 460                struct ctl_table_header *head, struct ctl_table *table)
 461{
 462        struct ctl_table_root *root = head->root;
 463        struct inode *inode;
 464        struct proc_inode *ei;
 465
 466        inode = new_inode(sb);
 467        if (!inode)
 468                return ERR_PTR(-ENOMEM);
 469
 470        inode->i_ino = get_next_ino();
 471
 472        ei = PROC_I(inode);
 473
 474        spin_lock(&sysctl_lock);
 475        if (unlikely(head->unregistering)) {
 476                spin_unlock(&sysctl_lock);
 477                iput(inode);
 478                return ERR_PTR(-ENOENT);
 479        }
 480        ei->sysctl = head;
 481        ei->sysctl_entry = table;
 482        hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
 483        head->count++;
 484        spin_unlock(&sysctl_lock);
 485
 486        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 487        inode->i_mode = table->mode;
 488        if (!S_ISDIR(table->mode)) {
 489                inode->i_mode |= S_IFREG;
 490                inode->i_op = &proc_sys_inode_operations;
 491                inode->i_fop = &proc_sys_file_operations;
 492        } else {
 493                inode->i_mode |= S_IFDIR;
 494                inode->i_op = &proc_sys_dir_operations;
 495                inode->i_fop = &proc_sys_dir_file_operations;
 496                if (is_empty_dir(head))
 497                        make_empty_dir_inode(inode);
 498        }
 499
 500        if (root->set_ownership)
 501                root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
 502
 503        return inode;
 504}
 505
 506void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
 507{
 508        spin_lock(&sysctl_lock);
 509        hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
 510        if (!--head->count)
 511                kfree_rcu(head, rcu);
 512        spin_unlock(&sysctl_lock);
 513}
 514
 515static struct ctl_table_header *grab_header(struct inode *inode)
 516{
 517        struct ctl_table_header *head = PROC_I(inode)->sysctl;
 518        if (!head)
 519                head = &sysctl_table_root.default_set.dir.header;
 520        return sysctl_head_grab(head);
 521}
 522
 523static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 524                                        unsigned int flags)
 525{
 526        struct ctl_table_header *head = grab_header(dir);
 527        struct ctl_table_header *h = NULL;
 528        const struct qstr *name = &dentry->d_name;
 529        struct ctl_table *p;
 530        struct inode *inode;
 531        struct dentry *err = ERR_PTR(-ENOENT);
 532        struct ctl_dir *ctl_dir;
 533        int ret;
 534
 535        if (IS_ERR(head))
 536                return ERR_CAST(head);
 537
 538        ctl_dir = container_of(head, struct ctl_dir, header);
 539
 540        p = lookup_entry(&h, ctl_dir, name->name, name->len);
 541        if (!p)
 542                goto out;
 543
 544        if (S_ISLNK(p->mode)) {
 545                ret = sysctl_follow_link(&h, &p);
 546                err = ERR_PTR(ret);
 547                if (ret)
 548                        goto out;
 549        }
 550
 551        inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
 552        if (IS_ERR(inode)) {
 553                err = ERR_CAST(inode);
 554                goto out;
 555        }
 556
 557        d_set_d_op(dentry, &proc_sys_dentry_operations);
 558        err = d_splice_alias(inode, dentry);
 559
 560out:
 561        if (h)
 562                sysctl_head_finish(h);
 563        sysctl_head_finish(head);
 564        return err;
 565}
 566
 567static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 568                size_t count, loff_t *ppos, int write)
 569{
 570        struct inode *inode = file_inode(filp);
 571        struct ctl_table_header *head = grab_header(inode);
 572        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 573        void *new_buf = NULL;
 574        ssize_t error;
 575
 576        if (IS_ERR(head))
 577                return PTR_ERR(head);
 578
 579        /*
 580         * At this point we know that the sysctl was not unregistered
 581         * and won't be until we finish.
 582         */
 583        error = -EPERM;
 584        if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
 585                goto out;
 586
 587        /* if that can happen at all, it should be -EINVAL, not -EISDIR */
 588        error = -EINVAL;
 589        if (!table->proc_handler)
 590                goto out;
 591
 592        error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
 593                                           ppos, &new_buf);
 594        if (error)
 595                goto out;
 596
 597        /* careful: calling conventions are nasty here */
 598        if (new_buf) {
 599                mm_segment_t old_fs;
 600
 601                old_fs = get_fs();
 602                set_fs(KERNEL_DS);
 603                error = table->proc_handler(table, write, (void __user *)new_buf,
 604                                            &count, ppos);
 605                set_fs(old_fs);
 606                kfree(new_buf);
 607        } else {
 608                error = table->proc_handler(table, write, buf, &count, ppos);
 609        }
 610
 611        if (!error)
 612                error = count;
 613out:
 614        sysctl_head_finish(head);
 615
 616        return error;
 617}
 618
 619static ssize_t proc_sys_read(struct file *filp, char __user *buf,
 620                                size_t count, loff_t *ppos)
 621{
 622        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
 623}
 624
 625static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 626                                size_t count, loff_t *ppos)
 627{
 628        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 629}
 630
 631static int proc_sys_open(struct inode *inode, struct file *filp)
 632{
 633        struct ctl_table_header *head = grab_header(inode);
 634        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 635
 636        /* sysctl was unregistered */
 637        if (IS_ERR(head))
 638                return PTR_ERR(head);
 639
 640        if (table->poll)
 641                filp->private_data = proc_sys_poll_event(table->poll);
 642
 643        sysctl_head_finish(head);
 644
 645        return 0;
 646}
 647
 648static __poll_t proc_sys_poll(struct file *filp, poll_table *wait)
 649{
 650        struct inode *inode = file_inode(filp);
 651        struct ctl_table_header *head = grab_header(inode);
 652        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 653        __poll_t ret = DEFAULT_POLLMASK;
 654        unsigned long event;
 655
 656        /* sysctl was unregistered */
 657        if (IS_ERR(head))
 658                return EPOLLERR | EPOLLHUP;
 659
 660        if (!table->proc_handler)
 661                goto out;
 662
 663        if (!table->poll)
 664                goto out;
 665
 666        event = (unsigned long)filp->private_data;
 667        poll_wait(filp, &table->poll->wait, wait);
 668
 669        if (event != atomic_read(&table->poll->event)) {
 670                filp->private_data = proc_sys_poll_event(table->poll);
 671                ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
 672        }
 673
 674out:
 675        sysctl_head_finish(head);
 676
 677        return ret;
 678}
 679
 680static bool proc_sys_fill_cache(struct file *file,
 681                                struct dir_context *ctx,
 682                                struct ctl_table_header *head,
 683                                struct ctl_table *table)
 684{
 685        struct dentry *child, *dir = file->f_path.dentry;
 686        struct inode *inode;
 687        struct qstr qname;
 688        ino_t ino = 0;
 689        unsigned type = DT_UNKNOWN;
 690
 691        qname.name = table->procname;
 692        qname.len  = strlen(table->procname);
 693        qname.hash = full_name_hash(dir, qname.name, qname.len);
 694
 695        child = d_lookup(dir, &qname);
 696        if (!child) {
 697                DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 698                child = d_alloc_parallel(dir, &qname, &wq);
 699                if (IS_ERR(child))
 700                        return false;
 701                if (d_in_lookup(child)) {
 702                        struct dentry *res;
 703                        inode = proc_sys_make_inode(dir->d_sb, head, table);
 704                        if (IS_ERR(inode)) {
 705                                d_lookup_done(child);
 706                                dput(child);
 707                                return false;
 708                        }
 709                        d_set_d_op(child, &proc_sys_dentry_operations);
 710                        res = d_splice_alias(inode, child);
 711                        d_lookup_done(child);
 712                        if (unlikely(res)) {
 713                                if (IS_ERR(res)) {
 714                                        dput(child);
 715                                        return false;
 716                                }
 717                                dput(child);
 718                                child = res;
 719                        }
 720                }
 721        }
 722        inode = d_inode(child);
 723        ino  = inode->i_ino;
 724        type = inode->i_mode >> 12;
 725        dput(child);
 726        return dir_emit(ctx, qname.name, qname.len, ino, type);
 727}
 728
 729static bool proc_sys_link_fill_cache(struct file *file,
 730                                    struct dir_context *ctx,
 731                                    struct ctl_table_header *head,
 732                                    struct ctl_table *table)
 733{
 734        bool ret = true;
 735
 736        head = sysctl_head_grab(head);
 737        if (IS_ERR(head))
 738                return false;
 739
 740        /* It is not an error if we can not follow the link ignore it */
 741        if (sysctl_follow_link(&head, &table))
 742                goto out;
 743
 744        ret = proc_sys_fill_cache(file, ctx, head, table);
 745out:
 746        sysctl_head_finish(head);
 747        return ret;
 748}
 749
 750static int scan(struct ctl_table_header *head, struct ctl_table *table,
 751                unsigned long *pos, struct file *file,
 752                struct dir_context *ctx)
 753{
 754        bool res;
 755
 756        if ((*pos)++ < ctx->pos)
 757                return true;
 758
 759        if (unlikely(S_ISLNK(table->mode)))
 760                res = proc_sys_link_fill_cache(file, ctx, head, table);
 761        else
 762                res = proc_sys_fill_cache(file, ctx, head, table);
 763
 764        if (res)
 765                ctx->pos = *pos;
 766
 767        return res;
 768}
 769
 770static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 771{
 772        struct ctl_table_header *head = grab_header(file_inode(file));
 773        struct ctl_table_header *h = NULL;
 774        struct ctl_table *entry;
 775        struct ctl_dir *ctl_dir;
 776        unsigned long pos;
 777
 778        if (IS_ERR(head))
 779                return PTR_ERR(head);
 780
 781        ctl_dir = container_of(head, struct ctl_dir, header);
 782
 783        if (!dir_emit_dots(file, ctx))
 784                goto out;
 785
 786        pos = 2;
 787
 788        for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
 789                if (!scan(h, entry, &pos, file, ctx)) {
 790                        sysctl_head_finish(h);
 791                        break;
 792                }
 793        }
 794out:
 795        sysctl_head_finish(head);
 796        return 0;
 797}
 798
 799static int proc_sys_permission(struct inode *inode, int mask)
 800{
 801        /*
 802         * sysctl entries that are not writeable,
 803         * are _NOT_ writeable, capabilities or not.
 804         */
 805        struct ctl_table_header *head;
 806        struct ctl_table *table;
 807        int error;
 808
 809        /* Executable files are not allowed under /proc/sys/ */
 810        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
 811                return -EACCES;
 812
 813        head = grab_header(inode);
 814        if (IS_ERR(head))
 815                return PTR_ERR(head);
 816
 817        table = PROC_I(inode)->sysctl_entry;
 818        if (!table) /* global root - r-xr-xr-x */
 819                error = mask & MAY_WRITE ? -EACCES : 0;
 820        else /* Use the permissions on the sysctl table entry */
 821                error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);
 822
 823        sysctl_head_finish(head);
 824        return error;
 825}
 826
 827static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 828{
 829        struct inode *inode = d_inode(dentry);
 830        int error;
 831
 832        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 833                return -EPERM;
 834
 835        error = setattr_prepare(dentry, attr);
 836        if (error)
 837                return error;
 838
 839        setattr_copy(inode, attr);
 840        mark_inode_dirty(inode);
 841        return 0;
 842}
 843
 844static int proc_sys_getattr(const struct path *path, struct kstat *stat,
 845                            u32 request_mask, unsigned int query_flags)
 846{
 847        struct inode *inode = d_inode(path->dentry);
 848        struct ctl_table_header *head = grab_header(inode);
 849        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 850
 851        if (IS_ERR(head))
 852                return PTR_ERR(head);
 853
 854        generic_fillattr(inode, stat);
 855        if (table)
 856                stat->mode = (stat->mode & S_IFMT) | table->mode;
 857
 858        sysctl_head_finish(head);
 859        return 0;
 860}
 861
 862static const struct file_operations proc_sys_file_operations = {
 863        .open           = proc_sys_open,
 864        .poll           = proc_sys_poll,
 865        .read           = proc_sys_read,
 866        .write          = proc_sys_write,
 867        .llseek         = default_llseek,
 868};
 869
 870static const struct file_operations proc_sys_dir_file_operations = {
 871        .read           = generic_read_dir,
 872        .iterate_shared = proc_sys_readdir,
 873        .llseek         = generic_file_llseek,
 874};
 875
 876static const struct inode_operations proc_sys_inode_operations = {
 877        .permission     = proc_sys_permission,
 878        .setattr        = proc_sys_setattr,
 879        .getattr        = proc_sys_getattr,
 880};
 881
 882static const struct inode_operations proc_sys_dir_operations = {
 883        .lookup         = proc_sys_lookup,
 884        .permission     = proc_sys_permission,
 885        .setattr        = proc_sys_setattr,
 886        .getattr        = proc_sys_getattr,
 887};
 888
 889static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 890{
 891        if (flags & LOOKUP_RCU)
 892                return -ECHILD;
 893        return !PROC_I(d_inode(dentry))->sysctl->unregistering;
 894}
 895
 896static int proc_sys_delete(const struct dentry *dentry)
 897{
 898        return !!PROC_I(d_inode(dentry))->sysctl->unregistering;
 899}
 900
 901static int sysctl_is_seen(struct ctl_table_header *p)
 902{
 903        struct ctl_table_set *set = p->set;
 904        int res;
 905        spin_lock(&sysctl_lock);
 906        if (p->unregistering)
 907                res = 0;
 908        else if (!set->is_seen)
 909                res = 1;
 910        else
 911                res = set->is_seen(set);
 912        spin_unlock(&sysctl_lock);
 913        return res;
 914}
 915
 916static int proc_sys_compare(const struct dentry *dentry,
 917                unsigned int len, const char *str, const struct qstr *name)
 918{
 919        struct ctl_table_header *head;
 920        struct inode *inode;
 921
 922        /* Although proc doesn't have negative dentries, rcu-walk means
 923         * that inode here can be NULL */
 924        /* AV: can it, indeed? */
 925        inode = d_inode_rcu(dentry);
 926        if (!inode)
 927                return 1;
 928        if (name->len != len)
 929                return 1;
 930        if (memcmp(name->name, str, len))
 931                return 1;
 932        head = rcu_dereference(PROC_I(inode)->sysctl);
 933        return !head || !sysctl_is_seen(head);
 934}
 935
 936static const struct dentry_operations proc_sys_dentry_operations = {
 937        .d_revalidate   = proc_sys_revalidate,
 938        .d_delete       = proc_sys_delete,
 939        .d_compare      = proc_sys_compare,
 940};
 941
 942static struct ctl_dir *find_subdir(struct ctl_dir *dir,
 943                                   const char *name, int namelen)
 944{
 945        struct ctl_table_header *head;
 946        struct ctl_table *entry;
 947
 948        entry = find_entry(&head, dir, name, namelen);
 949        if (!entry)
 950                return ERR_PTR(-ENOENT);
 951        if (!S_ISDIR(entry->mode))
 952                return ERR_PTR(-ENOTDIR);
 953        return container_of(head, struct ctl_dir, header);
 954}
 955
 956static struct ctl_dir *new_dir(struct ctl_table_set *set,
 957                               const char *name, int namelen)
 958{
 959        struct ctl_table *table;
 960        struct ctl_dir *new;
 961        struct ctl_node *node;
 962        char *new_name;
 963
 964        new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
 965                      sizeof(struct ctl_table)*2 +  namelen + 1,
 966                      GFP_KERNEL);
 967        if (!new)
 968                return NULL;
 969
 970        node = (struct ctl_node *)(new + 1);
 971        table = (struct ctl_table *)(node + 1);
 972        new_name = (char *)(table + 2);
 973        memcpy(new_name, name, namelen);
 974        new_name[namelen] = '\0';
 975        table[0].procname = new_name;
 976        table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
 977        init_header(&new->header, set->dir.header.root, set, node, table);
 978
 979        return new;
 980}
 981
 982/**
 983 * get_subdir - find or create a subdir with the specified name.
 984 * @dir:  Directory to create the subdirectory in
 985 * @name: The name of the subdirectory to find or create
 986 * @namelen: The length of name
 987 *
 988 * Takes a directory with an elevated reference count so we know that
 989 * if we drop the lock the directory will not go away.  Upon success
 990 * the reference is moved from @dir to the returned subdirectory.
 991 * Upon error an error code is returned and the reference on @dir is
 992 * simply dropped.
 993 */
 994static struct ctl_dir *get_subdir(struct ctl_dir *dir,
 995                                  const char *name, int namelen)
 996{
 997        struct ctl_table_set *set = dir->header.set;
 998        struct ctl_dir *subdir, *new = NULL;
 999        int err;
1000
1001        spin_lock(&sysctl_lock);
1002        subdir = find_subdir(dir, name, namelen);
1003        if (!IS_ERR(subdir))
1004                goto found;
1005        if (PTR_ERR(subdir) != -ENOENT)
1006                goto failed;
1007
1008        spin_unlock(&sysctl_lock);
1009        new = new_dir(set, name, namelen);
1010        spin_lock(&sysctl_lock);
1011        subdir = ERR_PTR(-ENOMEM);
1012        if (!new)
1013                goto failed;
1014
1015        /* Was the subdir added while we dropped the lock? */
1016        subdir = find_subdir(dir, name, namelen);
1017        if (!IS_ERR(subdir))
1018                goto found;
1019        if (PTR_ERR(subdir) != -ENOENT)
1020                goto failed;
1021
1022        /* Nope.  Use the our freshly made directory entry. */
1023        err = insert_header(dir, &new->header);
1024        subdir = ERR_PTR(err);
1025        if (err)
1026                goto failed;
1027        subdir = new;
1028found:
1029        subdir->header.nreg++;
1030failed:
1031        if (IS_ERR(subdir)) {
1032                pr_err("sysctl could not get directory: ");
1033                sysctl_print_dir(dir);
1034                pr_cont("/%*.*s %ld\n",
1035                        namelen, namelen, name, PTR_ERR(subdir));
1036        }
1037        drop_sysctl_table(&dir->header);
1038        if (new)
1039                drop_sysctl_table(&new->header);
1040        spin_unlock(&sysctl_lock);
1041        return subdir;
1042}
1043
1044static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
1045{
1046        struct ctl_dir *parent;
1047        const char *procname;
1048        if (!dir->header.parent)
1049                return &set->dir;
1050        parent = xlate_dir(set, dir->header.parent);
1051        if (IS_ERR(parent))
1052                return parent;
1053        procname = dir->header.ctl_table[0].procname;
1054        return find_subdir(parent, procname, strlen(procname));
1055}
1056
1057static int sysctl_follow_link(struct ctl_table_header **phead,
1058        struct ctl_table **pentry)
1059{
1060        struct ctl_table_header *head;
1061        struct ctl_table_root *root;
1062        struct ctl_table_set *set;
1063        struct ctl_table *entry;
1064        struct ctl_dir *dir;
1065        int ret;
1066
1067        ret = 0;
1068        spin_lock(&sysctl_lock);
1069        root = (*pentry)->data;
1070        set = lookup_header_set(root);
1071        dir = xlate_dir(set, (*phead)->parent);
1072        if (IS_ERR(dir))
1073                ret = PTR_ERR(dir);
1074        else {
1075                const char *procname = (*pentry)->procname;
1076                head = NULL;
1077                entry = find_entry(&head, dir, procname, strlen(procname));
1078                ret = -ENOENT;
1079                if (entry && use_table(head)) {
1080                        unuse_table(*phead);
1081                        *phead = head;
1082                        *pentry = entry;
1083                        ret = 0;
1084                }
1085        }
1086
1087        spin_unlock(&sysctl_lock);
1088        return ret;
1089}
1090
1091static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
1092{
1093        struct va_format vaf;
1094        va_list args;
1095
1096        va_start(args, fmt);
1097        vaf.fmt = fmt;
1098        vaf.va = &args;
1099
1100        pr_err("sysctl table check failed: %s/%s %pV\n",
1101               path, table->procname, &vaf);
1102
1103        va_end(args);
1104        return -EINVAL;
1105}
1106
1107static int sysctl_check_table_array(const char *path, struct ctl_table *table)
1108{
1109        int err = 0;
1110
1111        if ((table->proc_handler == proc_douintvec) ||
1112            (table->proc_handler == proc_douintvec_minmax)) {
1113                if (table->maxlen != sizeof(unsigned int))
1114                        err |= sysctl_err(path, table, "array not allowed");
1115        }
1116
1117        return err;
1118}
1119
1120static int sysctl_check_table(const char *path, struct ctl_table *table)
1121{
1122        int err = 0;
1123        for (; table->procname; table++) {
1124                if (table->child)
1125                        err |= sysctl_err(path, table, "Not a file");
1126
1127                if ((table->proc_handler == proc_dostring) ||
1128                    (table->proc_handler == proc_dointvec) ||
1129                    (table->proc_handler == proc_douintvec) ||
1130                    (table->proc_handler == proc_douintvec_minmax) ||
1131                    (table->proc_handler == proc_dointvec_minmax) ||
1132                    (table->proc_handler == proc_dointvec_jiffies) ||
1133                    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1134                    (table->proc_handler == proc_dointvec_ms_jiffies) ||
1135                    (table->proc_handler == proc_doulongvec_minmax) ||
1136                    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1137                        if (!table->data)
1138                                err |= sysctl_err(path, table, "No data");
1139                        if (!table->maxlen)
1140                                err |= sysctl_err(path, table, "No maxlen");
1141                        else
1142                                err |= sysctl_check_table_array(path, table);
1143                }
1144                if (!table->proc_handler)
1145                        err |= sysctl_err(path, table, "No proc_handler");
1146
1147                if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1148                        err |= sysctl_err(path, table, "bogus .mode 0%o",
1149                                table->mode);
1150        }
1151        return err;
1152}
1153
1154static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1155        struct ctl_table_root *link_root)
1156{
1157        struct ctl_table *link_table, *entry, *link;
1158        struct ctl_table_header *links;
1159        struct ctl_node *node;
1160        char *link_name;
1161        int nr_entries, name_bytes;
1162
1163        name_bytes = 0;
1164        nr_entries = 0;
1165        for (entry = table; entry->procname; entry++) {
1166                nr_entries++;
1167                name_bytes += strlen(entry->procname) + 1;
1168        }
1169
1170        links = kzalloc(sizeof(struct ctl_table_header) +
1171                        sizeof(struct ctl_node)*nr_entries +
1172                        sizeof(struct ctl_table)*(nr_entries + 1) +
1173                        name_bytes,
1174                        GFP_KERNEL);
1175
1176        if (!links)
1177                return NULL;
1178
1179        node = (struct ctl_node *)(links + 1);
1180        link_table = (struct ctl_table *)(node + nr_entries);
1181        link_name = (char *)&link_table[nr_entries + 1];
1182
1183        for (link = link_table, entry = table; entry->procname; link++, entry++) {
1184                int len = strlen(entry->procname) + 1;
1185                memcpy(link_name, entry->procname, len);
1186                link->procname = link_name;
1187                link->mode = S_IFLNK|S_IRWXUGO;
1188                link->data = link_root;
1189                link_name += len;
1190        }
1191        init_header(links, dir->header.root, dir->header.set, node, link_table);
1192        links->nreg = nr_entries;
1193
1194        return links;
1195}
1196
1197static bool get_links(struct ctl_dir *dir,
1198        struct ctl_table *table, struct ctl_table_root *link_root)
1199{
1200        struct ctl_table_header *head;
1201        struct ctl_table *entry, *link;
1202
1203        /* Are there links available for every entry in table? */
1204        for (entry = table; entry->procname; entry++) {
1205                const char *procname = entry->procname;
1206                link = find_entry(&head, dir, procname, strlen(procname));
1207                if (!link)
1208                        return false;
1209                if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1210                        continue;
1211                if (S_ISLNK(link->mode) && (link->data == link_root))
1212                        continue;
1213                return false;
1214        }
1215
1216        /* The checks passed.  Increase the registration count on the links */
1217        for (entry = table; entry->procname; entry++) {
1218                const char *procname = entry->procname;
1219                link = find_entry(&head, dir, procname, strlen(procname));
1220                head->nreg++;
1221        }
1222        return true;
1223}
1224
1225static int insert_links(struct ctl_table_header *head)
1226{
1227        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1228        struct ctl_dir *core_parent = NULL;
1229        struct ctl_table_header *links;
1230        int err;
1231
1232        if (head->set == root_set)
1233                return 0;
1234
1235        core_parent = xlate_dir(root_set, head->parent);
1236        if (IS_ERR(core_parent))
1237                return 0;
1238
1239        if (get_links(core_parent, head->ctl_table, head->root))
1240                return 0;
1241
1242        core_parent->header.nreg++;
1243        spin_unlock(&sysctl_lock);
1244
1245        links = new_links(core_parent, head->ctl_table, head->root);
1246
1247        spin_lock(&sysctl_lock);
1248        err = -ENOMEM;
1249        if (!links)
1250                goto out;
1251
1252        err = 0;
1253        if (get_links(core_parent, head->ctl_table, head->root)) {
1254                kfree(links);
1255                goto out;
1256        }
1257
1258        err = insert_header(core_parent, links);
1259        if (err)
1260                kfree(links);
1261out:
1262        drop_sysctl_table(&core_parent->header);
1263        return err;
1264}
1265
1266/**
1267 * __register_sysctl_table - register a leaf sysctl table
1268 * @set: Sysctl tree to register on
1269 * @path: The path to the directory the sysctl table is in.
1270 * @table: the top-level table structure
1271 *
1272 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1273 * array. A completely 0 filled entry terminates the table.
1274 *
1275 * The members of the &struct ctl_table structure are used as follows:
1276 *
1277 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1278 *            enter a sysctl file
1279 *
1280 * data - a pointer to data for use by proc_handler
1281 *
1282 * maxlen - the maximum size in bytes of the data
1283 *
1284 * mode - the file permissions for the /proc/sys file
1285 *
1286 * child - must be %NULL.
1287 *
1288 * proc_handler - the text handler routine (described below)
1289 *
1290 * extra1, extra2 - extra pointers usable by the proc handler routines
1291 *
1292 * Leaf nodes in the sysctl tree will be represented by a single file
1293 * under /proc; non-leaf nodes will be represented by directories.
1294 *
1295 * There must be a proc_handler routine for any terminal nodes.
1296 * Several default handlers are available to cover common cases -
1297 *
1298 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1299 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1300 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1301 *
1302 * It is the handler's job to read the input buffer from user memory
1303 * and process it. The handler should return 0 on success.
1304 *
1305 * This routine returns %NULL on a failure to register, and a pointer
1306 * to the table header on success.
1307 */
1308struct ctl_table_header *__register_sysctl_table(
1309        struct ctl_table_set *set,
1310        const char *path, struct ctl_table *table)
1311{
1312        struct ctl_table_root *root = set->dir.header.root;
1313        struct ctl_table_header *header;
1314        const char *name, *nextname;
1315        struct ctl_dir *dir;
1316        struct ctl_table *entry;
1317        struct ctl_node *node;
1318        int nr_entries = 0;
1319
1320        for (entry = table; entry->procname; entry++)
1321                nr_entries++;
1322
1323        header = kzalloc(sizeof(struct ctl_table_header) +
1324                         sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1325        if (!header)
1326                return NULL;
1327
1328        node = (struct ctl_node *)(header + 1);
1329        init_header(header, root, set, node, table);
1330        if (sysctl_check_table(path, table))
1331                goto fail;
1332
1333        spin_lock(&sysctl_lock);
1334        dir = &set->dir;
1335        /* Reference moved down the diretory tree get_subdir */
1336        dir->header.nreg++;
1337        spin_unlock(&sysctl_lock);
1338
1339        /* Find the directory for the ctl_table */
1340        for (name = path; name; name = nextname) {
1341                int namelen;
1342                nextname = strchr(name, '/');
1343                if (nextname) {
1344                        namelen = nextname - name;
1345                        nextname++;
1346                } else {
1347                        namelen = strlen(name);
1348                }
1349                if (namelen == 0)
1350                        continue;
1351
1352                dir = get_subdir(dir, name, namelen);
1353                if (IS_ERR(dir))
1354                        goto fail;
1355        }
1356
1357        spin_lock(&sysctl_lock);
1358        if (insert_header(dir, header))
1359                goto fail_put_dir_locked;
1360
1361        drop_sysctl_table(&dir->header);
1362        spin_unlock(&sysctl_lock);
1363
1364        return header;
1365
1366fail_put_dir_locked:
1367        drop_sysctl_table(&dir->header);
1368        spin_unlock(&sysctl_lock);
1369fail:
1370        kfree(header);
1371        dump_stack();
1372        return NULL;
1373}
1374
1375/**
1376 * register_sysctl - register a sysctl table
1377 * @path: The path to the directory the sysctl table is in.
1378 * @table: the table structure
1379 *
1380 * Register a sysctl table. @table should be a filled in ctl_table
1381 * array. A completely 0 filled entry terminates the table.
1382 *
1383 * See __register_sysctl_table for more details.
1384 */
1385struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1386{
1387        return __register_sysctl_table(&sysctl_table_root.default_set,
1388                                        path, table);
1389}
1390EXPORT_SYMBOL(register_sysctl);
1391
1392static char *append_path(const char *path, char *pos, const char *name)
1393{
1394        int namelen;
1395        namelen = strlen(name);
1396        if (((pos - path) + namelen + 2) >= PATH_MAX)
1397                return NULL;
1398        memcpy(pos, name, namelen);
1399        pos[namelen] = '/';
1400        pos[namelen + 1] = '\0';
1401        pos += namelen + 1;
1402        return pos;
1403}
1404
1405static int count_subheaders(struct ctl_table *table)
1406{
1407        int has_files = 0;
1408        int nr_subheaders = 0;
1409        struct ctl_table *entry;
1410
1411        /* special case: no directory and empty directory */
1412        if (!table || !table->procname)
1413                return 1;
1414
1415        for (entry = table; entry->procname; entry++) {
1416                if (entry->child)
1417                        nr_subheaders += count_subheaders(entry->child);
1418                else
1419                        has_files = 1;
1420        }
1421        return nr_subheaders + has_files;
1422}
1423
1424static int register_leaf_sysctl_tables(const char *path, char *pos,
1425        struct ctl_table_header ***subheader, struct ctl_table_set *set,
1426        struct ctl_table *table)
1427{
1428        struct ctl_table *ctl_table_arg = NULL;
1429        struct ctl_table *entry, *files;
1430        int nr_files = 0;
1431        int nr_dirs = 0;
1432        int err = -ENOMEM;
1433
1434        for (entry = table; entry->procname; entry++) {
1435                if (entry->child)
1436                        nr_dirs++;
1437                else
1438                        nr_files++;
1439        }
1440
1441        files = table;
1442        /* If there are mixed files and directories we need a new table */
1443        if (nr_dirs && nr_files) {
1444                struct ctl_table *new;
1445                files = kcalloc(nr_files + 1, sizeof(struct ctl_table),
1446                                GFP_KERNEL);
1447                if (!files)
1448                        goto out;
1449
1450                ctl_table_arg = files;
1451                for (new = files, entry = table; entry->procname; entry++) {
1452                        if (entry->child)
1453                                continue;
1454                        *new = *entry;
1455                        new++;
1456                }
1457        }
1458
1459        /* Register everything except a directory full of subdirectories */
1460        if (nr_files || !nr_dirs) {
1461                struct ctl_table_header *header;
1462                header = __register_sysctl_table(set, path, files);
1463                if (!header) {
1464                        kfree(ctl_table_arg);
1465                        goto out;
1466                }
1467
1468                /* Remember if we need to free the file table */
1469                header->ctl_table_arg = ctl_table_arg;
1470                **subheader = header;
1471                (*subheader)++;
1472        }
1473
1474        /* Recurse into the subdirectories. */
1475        for (entry = table; entry->procname; entry++) {
1476                char *child_pos;
1477
1478                if (!entry->child)
1479                        continue;
1480
1481                err = -ENAMETOOLONG;
1482                child_pos = append_path(path, pos, entry->procname);
1483                if (!child_pos)
1484                        goto out;
1485
1486                err = register_leaf_sysctl_tables(path, child_pos, subheader,
1487                                                  set, entry->child);
1488                pos[0] = '\0';
1489                if (err)
1490                        goto out;
1491        }
1492        err = 0;
1493out:
1494        /* On failure our caller will unregister all registered subheaders */
1495        return err;
1496}
1497
1498/**
1499 * __register_sysctl_paths - register a sysctl table hierarchy
1500 * @set: Sysctl tree to register on
1501 * @path: The path to the directory the sysctl table is in.
1502 * @table: the top-level table structure
1503 *
1504 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1505 * array. A completely 0 filled entry terminates the table.
1506 *
1507 * See __register_sysctl_table for more details.
1508 */
1509struct ctl_table_header *__register_sysctl_paths(
1510        struct ctl_table_set *set,
1511        const struct ctl_path *path, struct ctl_table *table)
1512{
1513        struct ctl_table *ctl_table_arg = table;
1514        int nr_subheaders = count_subheaders(table);
1515        struct ctl_table_header *header = NULL, **subheaders, **subheader;
1516        const struct ctl_path *component;
1517        char *new_path, *pos;
1518
1519        pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1520        if (!new_path)
1521                return NULL;
1522
1523        pos[0] = '\0';
1524        for (component = path; component->procname; component++) {
1525                pos = append_path(new_path, pos, component->procname);
1526                if (!pos)
1527                        goto out;
1528        }
1529        while (table->procname && table->child && !table[1].procname) {
1530                pos = append_path(new_path, pos, table->procname);
1531                if (!pos)
1532                        goto out;
1533                table = table->child;
1534        }
1535        if (nr_subheaders == 1) {
1536                header = __register_sysctl_table(set, new_path, table);
1537                if (header)
1538                        header->ctl_table_arg = ctl_table_arg;
1539        } else {
1540                header = kzalloc(sizeof(*header) +
1541                                 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1542                if (!header)
1543                        goto out;
1544
1545                subheaders = (struct ctl_table_header **) (header + 1);
1546                subheader = subheaders;
1547                header->ctl_table_arg = ctl_table_arg;
1548
1549                if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1550                                                set, table))
1551                        goto err_register_leaves;
1552        }
1553
1554out:
1555        kfree(new_path);
1556        return header;
1557
1558err_register_leaves:
1559        while (subheader > subheaders) {
1560                struct ctl_table_header *subh = *(--subheader);
1561                struct ctl_table *table = subh->ctl_table_arg;
1562                unregister_sysctl_table(subh);
1563                kfree(table);
1564        }
1565        kfree(header);
1566        header = NULL;
1567        goto out;
1568}
1569
1570/**
1571 * register_sysctl_table_path - register a sysctl table hierarchy
1572 * @path: The path to the directory the sysctl table is in.
1573 * @table: the top-level table structure
1574 *
1575 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1576 * array. A completely 0 filled entry terminates the table.
1577 *
1578 * See __register_sysctl_paths for more details.
1579 */
1580struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1581                                                struct ctl_table *table)
1582{
1583        return __register_sysctl_paths(&sysctl_table_root.default_set,
1584                                        path, table);
1585}
1586EXPORT_SYMBOL(register_sysctl_paths);
1587
1588/**
1589 * register_sysctl_table - register a sysctl table hierarchy
1590 * @table: the top-level table structure
1591 *
1592 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1593 * array. A completely 0 filled entry terminates the table.
1594 *
1595 * See register_sysctl_paths for more details.
1596 */
1597struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1598{
1599        static const struct ctl_path null_path[] = { {} };
1600
1601        return register_sysctl_paths(null_path, table);
1602}
1603EXPORT_SYMBOL(register_sysctl_table);
1604
1605static void put_links(struct ctl_table_header *header)
1606{
1607        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1608        struct ctl_table_root *root = header->root;
1609        struct ctl_dir *parent = header->parent;
1610        struct ctl_dir *core_parent;
1611        struct ctl_table *entry;
1612
1613        if (header->set == root_set)
1614                return;
1615
1616        core_parent = xlate_dir(root_set, parent);
1617        if (IS_ERR(core_parent))
1618                return;
1619
1620        for (entry = header->ctl_table; entry->procname; entry++) {
1621                struct ctl_table_header *link_head;
1622                struct ctl_table *link;
1623                const char *name = entry->procname;
1624
1625                link = find_entry(&link_head, core_parent, name, strlen(name));
1626                if (link &&
1627                    ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1628                     (S_ISLNK(link->mode) && (link->data == root)))) {
1629                        drop_sysctl_table(link_head);
1630                }
1631                else {
1632                        pr_err("sysctl link missing during unregister: ");
1633                        sysctl_print_dir(parent);
1634                        pr_cont("/%s\n", name);
1635                }
1636        }
1637}
1638
1639static void drop_sysctl_table(struct ctl_table_header *header)
1640{
1641        struct ctl_dir *parent = header->parent;
1642
1643        if (--header->nreg)
1644                return;
1645
1646        if (parent) {
1647                put_links(header);
1648                start_unregistering(header);
1649        }
1650
1651        if (!--header->count)
1652                kfree_rcu(header, rcu);
1653
1654        if (parent)
1655                drop_sysctl_table(&parent->header);
1656}
1657
1658/**
1659 * unregister_sysctl_table - unregister a sysctl table hierarchy
1660 * @header: the header returned from register_sysctl_table
1661 *
1662 * Unregisters the sysctl table and all children. proc entries may not
1663 * actually be removed until they are no longer used by anyone.
1664 */
1665void unregister_sysctl_table(struct ctl_table_header * header)
1666{
1667        int nr_subheaders;
1668        might_sleep();
1669
1670        if (header == NULL)
1671                return;
1672
1673        nr_subheaders = count_subheaders(header->ctl_table_arg);
1674        if (unlikely(nr_subheaders > 1)) {
1675                struct ctl_table_header **subheaders;
1676                int i;
1677
1678                subheaders = (struct ctl_table_header **)(header + 1);
1679                for (i = nr_subheaders -1; i >= 0; i--) {
1680                        struct ctl_table_header *subh = subheaders[i];
1681                        struct ctl_table *table = subh->ctl_table_arg;
1682                        unregister_sysctl_table(subh);
1683                        kfree(table);
1684                }
1685                kfree(header);
1686                return;
1687        }
1688
1689        spin_lock(&sysctl_lock);
1690        drop_sysctl_table(header);
1691        spin_unlock(&sysctl_lock);
1692}
1693EXPORT_SYMBOL(unregister_sysctl_table);
1694
1695void setup_sysctl_set(struct ctl_table_set *set,
1696        struct ctl_table_root *root,
1697        int (*is_seen)(struct ctl_table_set *))
1698{
1699        memset(set, 0, sizeof(*set));
1700        set->is_seen = is_seen;
1701        init_header(&set->dir.header, root, set, NULL, root_table);
1702}
1703
1704void retire_sysctl_set(struct ctl_table_set *set)
1705{
1706        WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1707}
1708
1709int __init proc_sys_init(void)
1710{
1711        struct proc_dir_entry *proc_sys_root;
1712
1713        proc_sys_root = proc_mkdir("sys", NULL);
1714        proc_sys_root->proc_iops = &proc_sys_dir_operations;
1715        proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
1716        proc_sys_root->nlink = 0;
1717
1718        return sysctl_init();
1719}
1720