linux/fs/proc/proc_sysctl.c
<<
>>
Prefs
   1/*
   2 * /proc/sys support
   3 */
   4#include <linux/init.h>
   5#include <linux/sysctl.h>
   6#include <linux/poll.h>
   7#include <linux/proc_fs.h>
   8#include <linux/printk.h>
   9#include <linux/security.h>
  10#include <linux/sched.h>
  11#include <linux/namei.h>
  12#include <linux/mm.h>
  13#include <linux/module.h>
  14#include "internal.h"
  15
  16static const struct dentry_operations proc_sys_dentry_operations;
  17static const struct file_operations proc_sys_file_operations;
  18static const struct inode_operations proc_sys_inode_operations;
  19static const struct file_operations proc_sys_dir_file_operations;
  20static const struct inode_operations proc_sys_dir_operations;
  21
  22void proc_sys_poll_notify(struct ctl_table_poll *poll)
  23{
  24        if (!poll)
  25                return;
  26
  27        atomic_inc(&poll->event);
  28        wake_up_interruptible(&poll->wait);
  29}
  30
  31static struct ctl_table root_table[] = {
  32        {
  33                .procname = "",
  34                .mode = S_IFDIR|S_IRUGO|S_IXUGO,
  35        },
  36        { }
  37};
  38static struct ctl_table_root sysctl_table_root = {
  39        .default_set.dir.header = {
  40                {{.count = 1,
  41                  .nreg = 1,
  42                  .ctl_table = root_table }},
  43                .ctl_table_arg = root_table,
  44                .root = &sysctl_table_root,
  45                .set = &sysctl_table_root.default_set,
  46        },
  47};
  48
  49static DEFINE_SPINLOCK(sysctl_lock);
  50
  51static void drop_sysctl_table(struct ctl_table_header *header);
  52static int sysctl_follow_link(struct ctl_table_header **phead,
  53        struct ctl_table **pentry, struct nsproxy *namespaces);
  54static int insert_links(struct ctl_table_header *head);
  55static void put_links(struct ctl_table_header *header);
  56
  57static void sysctl_print_dir(struct ctl_dir *dir)
  58{
  59        if (dir->header.parent)
  60                sysctl_print_dir(dir->header.parent);
  61        pr_cont("%s/", dir->header.ctl_table[0].procname);
  62}
  63
  64static int namecmp(const char *name1, int len1, const char *name2, int len2)
  65{
  66        int minlen;
  67        int cmp;
  68
  69        minlen = len1;
  70        if (minlen > len2)
  71                minlen = len2;
  72
  73        cmp = memcmp(name1, name2, minlen);
  74        if (cmp == 0)
  75                cmp = len1 - len2;
  76        return cmp;
  77}
  78
  79/* Called under sysctl_lock */
  80static struct ctl_table *find_entry(struct ctl_table_header **phead,
  81        struct ctl_dir *dir, const char *name, int namelen)
  82{
  83        struct ctl_table_header *head;
  84        struct ctl_table *entry;
  85        struct rb_node *node = dir->root.rb_node;
  86
  87        while (node)
  88        {
  89                struct ctl_node *ctl_node;
  90                const char *procname;
  91                int cmp;
  92
  93                ctl_node = rb_entry(node, struct ctl_node, node);
  94                head = ctl_node->header;
  95                entry = &head->ctl_table[ctl_node - head->node];
  96                procname = entry->procname;
  97
  98                cmp = namecmp(name, namelen, procname, strlen(procname));
  99                if (cmp < 0)
 100                        node = node->rb_left;
 101                else if (cmp > 0)
 102                        node = node->rb_right;
 103                else {
 104                        *phead = head;
 105                        return entry;
 106                }
 107        }
 108        return NULL;
 109}
 110
 111static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
 112{
 113        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 114        struct rb_node **p = &head->parent->root.rb_node;
 115        struct rb_node *parent = NULL;
 116        const char *name = entry->procname;
 117        int namelen = strlen(name);
 118
 119        while (*p) {
 120                struct ctl_table_header *parent_head;
 121                struct ctl_table *parent_entry;
 122                struct ctl_node *parent_node;
 123                const char *parent_name;
 124                int cmp;
 125
 126                parent = *p;
 127                parent_node = rb_entry(parent, struct ctl_node, node);
 128                parent_head = parent_node->header;
 129                parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
 130                parent_name = parent_entry->procname;
 131
 132                cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
 133                if (cmp < 0)
 134                        p = &(*p)->rb_left;
 135                else if (cmp > 0)
 136                        p = &(*p)->rb_right;
 137                else {
 138                        pr_err("sysctl duplicate entry: ");
 139                        sysctl_print_dir(head->parent);
 140                        pr_cont("/%s\n", entry->procname);
 141                        return -EEXIST;
 142                }
 143        }
 144
 145        rb_link_node(node, parent, p);
 146        rb_insert_color(node, &head->parent->root);
 147        return 0;
 148}
 149
 150static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
 151{
 152        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 153
 154        rb_erase(node, &head->parent->root);
 155}
 156
 157static void init_header(struct ctl_table_header *head,
 158        struct ctl_table_root *root, struct ctl_table_set *set,
 159        struct ctl_node *node, struct ctl_table *table)
 160{
 161        head->ctl_table = table;
 162        head->ctl_table_arg = table;
 163        head->used = 0;
 164        head->count = 1;
 165        head->nreg = 1;
 166        head->unregistering = NULL;
 167        head->root = root;
 168        head->set = set;
 169        head->parent = NULL;
 170        head->node = node;
 171        if (node) {
 172                struct ctl_table *entry;
 173                for (entry = table; entry->procname; entry++, node++)
 174                        node->header = head;
 175        }
 176}
 177
 178static void erase_header(struct ctl_table_header *head)
 179{
 180        struct ctl_table *entry;
 181        for (entry = head->ctl_table; entry->procname; entry++)
 182                erase_entry(head, entry);
 183}
 184
 185static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
 186{
 187        struct ctl_table *entry;
 188        int err;
 189
 190        dir->header.nreg++;
 191        header->parent = dir;
 192        err = insert_links(header);
 193        if (err)
 194                goto fail_links;
 195        for (entry = header->ctl_table; entry->procname; entry++) {
 196                err = insert_entry(header, entry);
 197                if (err)
 198                        goto fail;
 199        }
 200        return 0;
 201fail:
 202        erase_header(header);
 203        put_links(header);
 204fail_links:
 205        header->parent = NULL;
 206        drop_sysctl_table(&dir->header);
 207        return err;
 208}
 209
 210/* called under sysctl_lock */
 211static int use_table(struct ctl_table_header *p)
 212{
 213        if (unlikely(p->unregistering))
 214                return 0;
 215        p->used++;
 216        return 1;
 217}
 218
 219/* called under sysctl_lock */
 220static void unuse_table(struct ctl_table_header *p)
 221{
 222        if (!--p->used)
 223                if (unlikely(p->unregistering))
 224                        complete(p->unregistering);
 225}
 226
 227/* called under sysctl_lock, will reacquire if has to wait */
 228static void start_unregistering(struct ctl_table_header *p)
 229{
 230        /*
 231         * if p->used is 0, nobody will ever touch that entry again;
 232         * we'll eliminate all paths to it before dropping sysctl_lock
 233         */
 234        if (unlikely(p->used)) {
 235                struct completion wait;
 236                init_completion(&wait);
 237                p->unregistering = &wait;
 238                spin_unlock(&sysctl_lock);
 239                wait_for_completion(&wait);
 240                spin_lock(&sysctl_lock);
 241        } else {
 242                /* anything non-NULL; we'll never dereference it */
 243                p->unregistering = ERR_PTR(-EINVAL);
 244        }
 245        /*
 246         * do not remove from the list until nobody holds it; walking the
 247         * list in do_sysctl() relies on that.
 248         */
 249        erase_header(p);
 250}
 251
 252static void sysctl_head_get(struct ctl_table_header *head)
 253{
 254        spin_lock(&sysctl_lock);
 255        head->count++;
 256        spin_unlock(&sysctl_lock);
 257}
 258
 259void sysctl_head_put(struct ctl_table_header *head)
 260{
 261        spin_lock(&sysctl_lock);
 262        if (!--head->count)
 263                kfree_rcu(head, rcu);
 264        spin_unlock(&sysctl_lock);
 265}
 266
 267static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
 268{
 269        BUG_ON(!head);
 270        spin_lock(&sysctl_lock);
 271        if (!use_table(head))
 272                head = ERR_PTR(-ENOENT);
 273        spin_unlock(&sysctl_lock);
 274        return head;
 275}
 276
 277static void sysctl_head_finish(struct ctl_table_header *head)
 278{
 279        if (!head)
 280                return;
 281        spin_lock(&sysctl_lock);
 282        unuse_table(head);
 283        spin_unlock(&sysctl_lock);
 284}
 285
 286static struct ctl_table_set *
 287lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
 288{
 289        struct ctl_table_set *set = &root->default_set;
 290        if (root->lookup)
 291                set = root->lookup(root, namespaces);
 292        return set;
 293}
 294
 295static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
 296                                      struct ctl_dir *dir,
 297                                      const char *name, int namelen)
 298{
 299        struct ctl_table_header *head;
 300        struct ctl_table *entry;
 301
 302        spin_lock(&sysctl_lock);
 303        entry = find_entry(&head, dir, name, namelen);
 304        if (entry && use_table(head))
 305                *phead = head;
 306        else
 307                entry = NULL;
 308        spin_unlock(&sysctl_lock);
 309        return entry;
 310}
 311
 312static struct ctl_node *first_usable_entry(struct rb_node *node)
 313{
 314        struct ctl_node *ctl_node;
 315
 316        for (;node; node = rb_next(node)) {
 317                ctl_node = rb_entry(node, struct ctl_node, node);
 318                if (use_table(ctl_node->header))
 319                        return ctl_node;
 320        }
 321        return NULL;
 322}
 323
 324static void first_entry(struct ctl_dir *dir,
 325        struct ctl_table_header **phead, struct ctl_table **pentry)
 326{
 327        struct ctl_table_header *head = NULL;
 328        struct ctl_table *entry = NULL;
 329        struct ctl_node *ctl_node;
 330
 331        spin_lock(&sysctl_lock);
 332        ctl_node = first_usable_entry(rb_first(&dir->root));
 333        spin_unlock(&sysctl_lock);
 334        if (ctl_node) {
 335                head = ctl_node->header;
 336                entry = &head->ctl_table[ctl_node - head->node];
 337        }
 338        *phead = head;
 339        *pentry = entry;
 340}
 341
 342static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
 343{
 344        struct ctl_table_header *head = *phead;
 345        struct ctl_table *entry = *pentry;
 346        struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
 347
 348        spin_lock(&sysctl_lock);
 349        unuse_table(head);
 350
 351        ctl_node = first_usable_entry(rb_next(&ctl_node->node));
 352        spin_unlock(&sysctl_lock);
 353        head = NULL;
 354        if (ctl_node) {
 355                head = ctl_node->header;
 356                entry = &head->ctl_table[ctl_node - head->node];
 357        }
 358        *phead = head;
 359        *pentry = entry;
 360}
 361
 362void register_sysctl_root(struct ctl_table_root *root)
 363{
 364}
 365
 366/*
 367 * sysctl_perm does NOT grant the superuser all rights automatically, because
 368 * some sysctl variables are readonly even to root.
 369 */
 370
 371static int test_perm(int mode, int op)
 372{
 373        if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
 374                mode >>= 6;
 375        else if (in_egroup_p(GLOBAL_ROOT_GID))
 376                mode >>= 3;
 377        if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
 378                return 0;
 379        return -EACCES;
 380}
 381
 382static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
 383{
 384        struct ctl_table_root *root = head->root;
 385        int mode;
 386
 387        if (root->permissions)
 388                mode = root->permissions(head, table);
 389        else
 390                mode = table->mode;
 391
 392        return test_perm(mode, op);
 393}
 394
 395static struct inode *proc_sys_make_inode(struct super_block *sb,
 396                struct ctl_table_header *head, struct ctl_table *table)
 397{
 398        struct inode *inode;
 399        struct proc_inode *ei;
 400
 401        inode = new_inode(sb);
 402        if (!inode)
 403                goto out;
 404
 405        inode->i_ino = get_next_ino();
 406
 407        sysctl_head_get(head);
 408        ei = PROC_I(inode);
 409        ei->sysctl = head;
 410        ei->sysctl_entry = table;
 411
 412        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 413        inode->i_mode = table->mode;
 414        if (!S_ISDIR(table->mode)) {
 415                inode->i_mode |= S_IFREG;
 416                inode->i_op = &proc_sys_inode_operations;
 417                inode->i_fop = &proc_sys_file_operations;
 418        } else {
 419                inode->i_mode |= S_IFDIR;
 420                inode->i_op = &proc_sys_dir_operations;
 421                inode->i_fop = &proc_sys_dir_file_operations;
 422        }
 423out:
 424        return inode;
 425}
 426
 427static struct ctl_table_header *grab_header(struct inode *inode)
 428{
 429        struct ctl_table_header *head = PROC_I(inode)->sysctl;
 430        if (!head)
 431                head = &sysctl_table_root.default_set.dir.header;
 432        return sysctl_head_grab(head);
 433}
 434
 435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 436                                        unsigned int flags)
 437{
 438        struct ctl_table_header *head = grab_header(dir);
 439        struct ctl_table_header *h = NULL;
 440        struct qstr *name = &dentry->d_name;
 441        struct ctl_table *p;
 442        struct inode *inode;
 443        struct dentry *err = ERR_PTR(-ENOENT);
 444        struct ctl_dir *ctl_dir;
 445        int ret;
 446
 447        if (IS_ERR(head))
 448                return ERR_CAST(head);
 449
 450        ctl_dir = container_of(head, struct ctl_dir, header);
 451
 452        p = lookup_entry(&h, ctl_dir, name->name, name->len);
 453        if (!p)
 454                goto out;
 455
 456        if (S_ISLNK(p->mode)) {
 457                ret = sysctl_follow_link(&h, &p, current->nsproxy);
 458                err = ERR_PTR(ret);
 459                if (ret)
 460                        goto out;
 461        }
 462
 463        err = ERR_PTR(-ENOMEM);
 464        inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
 465        if (!inode)
 466                goto out;
 467
 468        err = NULL;
 469        d_set_d_op(dentry, &proc_sys_dentry_operations);
 470        d_add(dentry, inode);
 471
 472out:
 473        if (h)
 474                sysctl_head_finish(h);
 475        sysctl_head_finish(head);
 476        return err;
 477}
 478
 479static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 480                size_t count, loff_t *ppos, int write)
 481{
 482        struct inode *inode = file_inode(filp);
 483        struct ctl_table_header *head = grab_header(inode);
 484        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 485        ssize_t error;
 486        size_t res;
 487
 488        if (IS_ERR(head))
 489                return PTR_ERR(head);
 490
 491        /*
 492         * At this point we know that the sysctl was not unregistered
 493         * and won't be until we finish.
 494         */
 495        error = -EPERM;
 496        if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
 497                goto out;
 498
 499        /* if that can happen at all, it should be -EINVAL, not -EISDIR */
 500        error = -EINVAL;
 501        if (!table->proc_handler)
 502                goto out;
 503
 504        /* careful: calling conventions are nasty here */
 505        res = count;
 506        error = table->proc_handler(table, write, buf, &res, ppos);
 507        if (!error)
 508                error = res;
 509out:
 510        sysctl_head_finish(head);
 511
 512        return error;
 513}
 514
 515static ssize_t proc_sys_read(struct file *filp, char __user *buf,
 516                                size_t count, loff_t *ppos)
 517{
 518        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
 519}
 520
 521static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 522                                size_t count, loff_t *ppos)
 523{
 524        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 525}
 526
 527static int proc_sys_open(struct inode *inode, struct file *filp)
 528{
 529        struct ctl_table_header *head = grab_header(inode);
 530        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 531
 532        /* sysctl was unregistered */
 533        if (IS_ERR(head))
 534                return PTR_ERR(head);
 535
 536        if (table->poll)
 537                filp->private_data = proc_sys_poll_event(table->poll);
 538
 539        sysctl_head_finish(head);
 540
 541        return 0;
 542}
 543
 544static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 545{
 546        struct inode *inode = file_inode(filp);
 547        struct ctl_table_header *head = grab_header(inode);
 548        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 549        unsigned int ret = DEFAULT_POLLMASK;
 550        unsigned long event;
 551
 552        /* sysctl was unregistered */
 553        if (IS_ERR(head))
 554                return POLLERR | POLLHUP;
 555
 556        if (!table->proc_handler)
 557                goto out;
 558
 559        if (!table->poll)
 560                goto out;
 561
 562        event = (unsigned long)filp->private_data;
 563        poll_wait(filp, &table->poll->wait, wait);
 564
 565        if (event != atomic_read(&table->poll->event)) {
 566                filp->private_data = proc_sys_poll_event(table->poll);
 567                ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
 568        }
 569
 570out:
 571        sysctl_head_finish(head);
 572
 573        return ret;
 574}
 575
 576static int proc_sys_fill_cache(struct file *filp, void *dirent,
 577                                filldir_t filldir,
 578                                struct ctl_table_header *head,
 579                                struct ctl_table *table)
 580{
 581        struct dentry *child, *dir = filp->f_path.dentry;
 582        struct inode *inode;
 583        struct qstr qname;
 584        ino_t ino = 0;
 585        unsigned type = DT_UNKNOWN;
 586
 587        qname.name = table->procname;
 588        qname.len  = strlen(table->procname);
 589        qname.hash = full_name_hash(qname.name, qname.len);
 590
 591        child = d_lookup(dir, &qname);
 592        if (!child) {
 593                child = d_alloc(dir, &qname);
 594                if (child) {
 595                        inode = proc_sys_make_inode(dir->d_sb, head, table);
 596                        if (!inode) {
 597                                dput(child);
 598                                return -ENOMEM;
 599                        } else {
 600                                d_set_d_op(child, &proc_sys_dentry_operations);
 601                                d_add(child, inode);
 602                        }
 603                } else {
 604                        return -ENOMEM;
 605                }
 606        }
 607        inode = child->d_inode;
 608        ino  = inode->i_ino;
 609        type = inode->i_mode >> 12;
 610        dput(child);
 611        return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
 612}
 613
 614static int proc_sys_link_fill_cache(struct file *filp, void *dirent,
 615                                    filldir_t filldir,
 616                                    struct ctl_table_header *head,
 617                                    struct ctl_table *table)
 618{
 619        int err, ret = 0;
 620        head = sysctl_head_grab(head);
 621
 622        if (S_ISLNK(table->mode)) {
 623                /* It is not an error if we can not follow the link ignore it */
 624                err = sysctl_follow_link(&head, &table, current->nsproxy);
 625                if (err)
 626                        goto out;
 627        }
 628
 629        ret = proc_sys_fill_cache(filp, dirent, filldir, head, table);
 630out:
 631        sysctl_head_finish(head);
 632        return ret;
 633}
 634
 635static int scan(struct ctl_table_header *head, ctl_table *table,
 636                unsigned long *pos, struct file *file,
 637                void *dirent, filldir_t filldir)
 638{
 639        int res;
 640
 641        if ((*pos)++ < file->f_pos)
 642                return 0;
 643
 644        if (unlikely(S_ISLNK(table->mode)))
 645                res = proc_sys_link_fill_cache(file, dirent, filldir, head, table);
 646        else
 647                res = proc_sys_fill_cache(file, dirent, filldir, head, table);
 648
 649        if (res == 0)
 650                file->f_pos = *pos;
 651
 652        return res;
 653}
 654
 655static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
 656{
 657        struct dentry *dentry = filp->f_path.dentry;
 658        struct inode *inode = dentry->d_inode;
 659        struct ctl_table_header *head = grab_header(inode);
 660        struct ctl_table_header *h = NULL;
 661        struct ctl_table *entry;
 662        struct ctl_dir *ctl_dir;
 663        unsigned long pos;
 664        int ret = -EINVAL;
 665
 666        if (IS_ERR(head))
 667                return PTR_ERR(head);
 668
 669        ctl_dir = container_of(head, struct ctl_dir, header);
 670
 671        ret = 0;
 672        /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
 673        if (filp->f_pos == 0) {
 674                if (filldir(dirent, ".", 1, filp->f_pos,
 675                                inode->i_ino, DT_DIR) < 0)
 676                        goto out;
 677                filp->f_pos++;
 678        }
 679        if (filp->f_pos == 1) {
 680                if (filldir(dirent, "..", 2, filp->f_pos,
 681                                parent_ino(dentry), DT_DIR) < 0)
 682                        goto out;
 683                filp->f_pos++;
 684        }
 685        pos = 2;
 686
 687        for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
 688                ret = scan(h, entry, &pos, filp, dirent, filldir);
 689                if (ret) {
 690                        sysctl_head_finish(h);
 691                        break;
 692                }
 693        }
 694        ret = 1;
 695out:
 696        sysctl_head_finish(head);
 697        return ret;
 698}
 699
 700static int proc_sys_permission(struct inode *inode, int mask)
 701{
 702        /*
 703         * sysctl entries that are not writeable,
 704         * are _NOT_ writeable, capabilities or not.
 705         */
 706        struct ctl_table_header *head;
 707        struct ctl_table *table;
 708        int error;
 709
 710        /* Executable files are not allowed under /proc/sys/ */
 711        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
 712                return -EACCES;
 713
 714        head = grab_header(inode);
 715        if (IS_ERR(head))
 716                return PTR_ERR(head);
 717
 718        table = PROC_I(inode)->sysctl_entry;
 719        if (!table) /* global root - r-xr-xr-x */
 720                error = mask & MAY_WRITE ? -EACCES : 0;
 721        else /* Use the permissions on the sysctl table entry */
 722                error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);
 723
 724        sysctl_head_finish(head);
 725        return error;
 726}
 727
 728static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 729{
 730        struct inode *inode = dentry->d_inode;
 731        int error;
 732
 733        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 734                return -EPERM;
 735
 736        error = inode_change_ok(inode, attr);
 737        if (error)
 738                return error;
 739
 740        setattr_copy(inode, attr);
 741        mark_inode_dirty(inode);
 742        return 0;
 743}
 744
 745static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 746{
 747        struct inode *inode = dentry->d_inode;
 748        struct ctl_table_header *head = grab_header(inode);
 749        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 750
 751        if (IS_ERR(head))
 752                return PTR_ERR(head);
 753
 754        generic_fillattr(inode, stat);
 755        if (table)
 756                stat->mode = (stat->mode & S_IFMT) | table->mode;
 757
 758        sysctl_head_finish(head);
 759        return 0;
 760}
 761
 762static const struct file_operations proc_sys_file_operations = {
 763        .open           = proc_sys_open,
 764        .poll           = proc_sys_poll,
 765        .read           = proc_sys_read,
 766        .write          = proc_sys_write,
 767        .llseek         = default_llseek,
 768};
 769
 770static const struct file_operations proc_sys_dir_file_operations = {
 771        .read           = generic_read_dir,
 772        .readdir        = proc_sys_readdir,
 773        .llseek         = generic_file_llseek,
 774};
 775
 776static const struct inode_operations proc_sys_inode_operations = {
 777        .permission     = proc_sys_permission,
 778        .setattr        = proc_sys_setattr,
 779        .getattr        = proc_sys_getattr,
 780};
 781
 782static const struct inode_operations proc_sys_dir_operations = {
 783        .lookup         = proc_sys_lookup,
 784        .permission     = proc_sys_permission,
 785        .setattr        = proc_sys_setattr,
 786        .getattr        = proc_sys_getattr,
 787};
 788
 789static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 790{
 791        if (flags & LOOKUP_RCU)
 792                return -ECHILD;
 793        return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 794}
 795
 796static int proc_sys_delete(const struct dentry *dentry)
 797{
 798        return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
 799}
 800
 801static int sysctl_is_seen(struct ctl_table_header *p)
 802{
 803        struct ctl_table_set *set = p->set;
 804        int res;
 805        spin_lock(&sysctl_lock);
 806        if (p->unregistering)
 807                res = 0;
 808        else if (!set->is_seen)
 809                res = 1;
 810        else
 811                res = set->is_seen(set);
 812        spin_unlock(&sysctl_lock);
 813        return res;
 814}
 815
 816static int proc_sys_compare(const struct dentry *parent,
 817                const struct inode *pinode,
 818                const struct dentry *dentry, const struct inode *inode,
 819                unsigned int len, const char *str, const struct qstr *name)
 820{
 821        struct ctl_table_header *head;
 822        /* Although proc doesn't have negative dentries, rcu-walk means
 823         * that inode here can be NULL */
 824        /* AV: can it, indeed? */
 825        if (!inode)
 826                return 1;
 827        if (name->len != len)
 828                return 1;
 829        if (memcmp(name->name, str, len))
 830                return 1;
 831        head = rcu_dereference(PROC_I(inode)->sysctl);
 832        return !head || !sysctl_is_seen(head);
 833}
 834
 835static const struct dentry_operations proc_sys_dentry_operations = {
 836        .d_revalidate   = proc_sys_revalidate,
 837        .d_delete       = proc_sys_delete,
 838        .d_compare      = proc_sys_compare,
 839};
 840
 841static struct ctl_dir *find_subdir(struct ctl_dir *dir,
 842                                   const char *name, int namelen)
 843{
 844        struct ctl_table_header *head;
 845        struct ctl_table *entry;
 846
 847        entry = find_entry(&head, dir, name, namelen);
 848        if (!entry)
 849                return ERR_PTR(-ENOENT);
 850        if (!S_ISDIR(entry->mode))
 851                return ERR_PTR(-ENOTDIR);
 852        return container_of(head, struct ctl_dir, header);
 853}
 854
 855static struct ctl_dir *new_dir(struct ctl_table_set *set,
 856                               const char *name, int namelen)
 857{
 858        struct ctl_table *table;
 859        struct ctl_dir *new;
 860        struct ctl_node *node;
 861        char *new_name;
 862
 863        new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
 864                      sizeof(struct ctl_table)*2 +  namelen + 1,
 865                      GFP_KERNEL);
 866        if (!new)
 867                return NULL;
 868
 869        node = (struct ctl_node *)(new + 1);
 870        table = (struct ctl_table *)(node + 1);
 871        new_name = (char *)(table + 2);
 872        memcpy(new_name, name, namelen);
 873        new_name[namelen] = '\0';
 874        table[0].procname = new_name;
 875        table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
 876        init_header(&new->header, set->dir.header.root, set, node, table);
 877
 878        return new;
 879}
 880
 881/**
 882 * get_subdir - find or create a subdir with the specified name.
 883 * @dir:  Directory to create the subdirectory in
 884 * @name: The name of the subdirectory to find or create
 885 * @namelen: The length of name
 886 *
 887 * Takes a directory with an elevated reference count so we know that
 888 * if we drop the lock the directory will not go away.  Upon success
 889 * the reference is moved from @dir to the returned subdirectory.
 890 * Upon error an error code is returned and the reference on @dir is
 891 * simply dropped.
 892 */
 893static struct ctl_dir *get_subdir(struct ctl_dir *dir,
 894                                  const char *name, int namelen)
 895{
 896        struct ctl_table_set *set = dir->header.set;
 897        struct ctl_dir *subdir, *new = NULL;
 898        int err;
 899
 900        spin_lock(&sysctl_lock);
 901        subdir = find_subdir(dir, name, namelen);
 902        if (!IS_ERR(subdir))
 903                goto found;
 904        if (PTR_ERR(subdir) != -ENOENT)
 905                goto failed;
 906
 907        spin_unlock(&sysctl_lock);
 908        new = new_dir(set, name, namelen);
 909        spin_lock(&sysctl_lock);
 910        subdir = ERR_PTR(-ENOMEM);
 911        if (!new)
 912                goto failed;
 913
 914        /* Was the subdir added while we dropped the lock? */
 915        subdir = find_subdir(dir, name, namelen);
 916        if (!IS_ERR(subdir))
 917                goto found;
 918        if (PTR_ERR(subdir) != -ENOENT)
 919                goto failed;
 920
 921        /* Nope.  Use the our freshly made directory entry. */
 922        err = insert_header(dir, &new->header);
 923        subdir = ERR_PTR(err);
 924        if (err)
 925                goto failed;
 926        subdir = new;
 927found:
 928        subdir->header.nreg++;
 929failed:
 930        if (unlikely(IS_ERR(subdir))) {
 931                pr_err("sysctl could not get directory: ");
 932                sysctl_print_dir(dir);
 933                pr_cont("/%*.*s %ld\n",
 934                        namelen, namelen, name, PTR_ERR(subdir));
 935        }
 936        drop_sysctl_table(&dir->header);
 937        if (new)
 938                drop_sysctl_table(&new->header);
 939        spin_unlock(&sysctl_lock);
 940        return subdir;
 941}
 942
 943static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
 944{
 945        struct ctl_dir *parent;
 946        const char *procname;
 947        if (!dir->header.parent)
 948                return &set->dir;
 949        parent = xlate_dir(set, dir->header.parent);
 950        if (IS_ERR(parent))
 951                return parent;
 952        procname = dir->header.ctl_table[0].procname;
 953        return find_subdir(parent, procname, strlen(procname));
 954}
 955
 956static int sysctl_follow_link(struct ctl_table_header **phead,
 957        struct ctl_table **pentry, struct nsproxy *namespaces)
 958{
 959        struct ctl_table_header *head;
 960        struct ctl_table_root *root;
 961        struct ctl_table_set *set;
 962        struct ctl_table *entry;
 963        struct ctl_dir *dir;
 964        int ret;
 965
 966        ret = 0;
 967        spin_lock(&sysctl_lock);
 968        root = (*pentry)->data;
 969        set = lookup_header_set(root, namespaces);
 970        dir = xlate_dir(set, (*phead)->parent);
 971        if (IS_ERR(dir))
 972                ret = PTR_ERR(dir);
 973        else {
 974                const char *procname = (*pentry)->procname;
 975                head = NULL;
 976                entry = find_entry(&head, dir, procname, strlen(procname));
 977                ret = -ENOENT;
 978                if (entry && use_table(head)) {
 979                        unuse_table(*phead);
 980                        *phead = head;
 981                        *pentry = entry;
 982                        ret = 0;
 983                }
 984        }
 985
 986        spin_unlock(&sysctl_lock);
 987        return ret;
 988}
 989
 990static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
 991{
 992        struct va_format vaf;
 993        va_list args;
 994
 995        va_start(args, fmt);
 996        vaf.fmt = fmt;
 997        vaf.va = &args;
 998
 999        pr_err("sysctl table check failed: %s/%s %pV\n",
1000               path, table->procname, &vaf);
1001
1002        va_end(args);
1003        return -EINVAL;
1004}
1005
1006static int sysctl_check_table(const char *path, struct ctl_table *table)
1007{
1008        int err = 0;
1009        for (; table->procname; table++) {
1010                if (table->child)
1011                        err = sysctl_err(path, table, "Not a file");
1012
1013                if ((table->proc_handler == proc_dostring) ||
1014                    (table->proc_handler == proc_dointvec) ||
1015                    (table->proc_handler == proc_dointvec_minmax) ||
1016                    (table->proc_handler == proc_dointvec_jiffies) ||
1017                    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1018                    (table->proc_handler == proc_dointvec_ms_jiffies) ||
1019                    (table->proc_handler == proc_doulongvec_minmax) ||
1020                    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1021                        if (!table->data)
1022                                err = sysctl_err(path, table, "No data");
1023                        if (!table->maxlen)
1024                                err = sysctl_err(path, table, "No maxlen");
1025                }
1026                if (!table->proc_handler)
1027                        err = sysctl_err(path, table, "No proc_handler");
1028
1029                if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1030                        err = sysctl_err(path, table, "bogus .mode 0%o",
1031                                table->mode);
1032        }
1033        return err;
1034}
1035
1036static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1037        struct ctl_table_root *link_root)
1038{
1039        struct ctl_table *link_table, *entry, *link;
1040        struct ctl_table_header *links;
1041        struct ctl_node *node;
1042        char *link_name;
1043        int nr_entries, name_bytes;
1044
1045        name_bytes = 0;
1046        nr_entries = 0;
1047        for (entry = table; entry->procname; entry++) {
1048                nr_entries++;
1049                name_bytes += strlen(entry->procname) + 1;
1050        }
1051
1052        links = kzalloc(sizeof(struct ctl_table_header) +
1053                        sizeof(struct ctl_node)*nr_entries +
1054                        sizeof(struct ctl_table)*(nr_entries + 1) +
1055                        name_bytes,
1056                        GFP_KERNEL);
1057
1058        if (!links)
1059                return NULL;
1060
1061        node = (struct ctl_node *)(links + 1);
1062        link_table = (struct ctl_table *)(node + nr_entries);
1063        link_name = (char *)&link_table[nr_entries + 1];
1064
1065        for (link = link_table, entry = table; entry->procname; link++, entry++) {
1066                int len = strlen(entry->procname) + 1;
1067                memcpy(link_name, entry->procname, len);
1068                link->procname = link_name;
1069                link->mode = S_IFLNK|S_IRWXUGO;
1070                link->data = link_root;
1071                link_name += len;
1072        }
1073        init_header(links, dir->header.root, dir->header.set, node, link_table);
1074        links->nreg = nr_entries;
1075
1076        return links;
1077}
1078
1079static bool get_links(struct ctl_dir *dir,
1080        struct ctl_table *table, struct ctl_table_root *link_root)
1081{
1082        struct ctl_table_header *head;
1083        struct ctl_table *entry, *link;
1084
1085        /* Are there links available for every entry in table? */
1086        for (entry = table; entry->procname; entry++) {
1087                const char *procname = entry->procname;
1088                link = find_entry(&head, dir, procname, strlen(procname));
1089                if (!link)
1090                        return false;
1091                if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1092                        continue;
1093                if (S_ISLNK(link->mode) && (link->data == link_root))
1094                        continue;
1095                return false;
1096        }
1097
1098        /* The checks passed.  Increase the registration count on the links */
1099        for (entry = table; entry->procname; entry++) {
1100                const char *procname = entry->procname;
1101                link = find_entry(&head, dir, procname, strlen(procname));
1102                head->nreg++;
1103        }
1104        return true;
1105}
1106
1107static int insert_links(struct ctl_table_header *head)
1108{
1109        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1110        struct ctl_dir *core_parent = NULL;
1111        struct ctl_table_header *links;
1112        int err;
1113
1114        if (head->set == root_set)
1115                return 0;
1116
1117        core_parent = xlate_dir(root_set, head->parent);
1118        if (IS_ERR(core_parent))
1119                return 0;
1120
1121        if (get_links(core_parent, head->ctl_table, head->root))
1122                return 0;
1123
1124        core_parent->header.nreg++;
1125        spin_unlock(&sysctl_lock);
1126
1127        links = new_links(core_parent, head->ctl_table, head->root);
1128
1129        spin_lock(&sysctl_lock);
1130        err = -ENOMEM;
1131        if (!links)
1132                goto out;
1133
1134        err = 0;
1135        if (get_links(core_parent, head->ctl_table, head->root)) {
1136                kfree(links);
1137                goto out;
1138        }
1139
1140        err = insert_header(core_parent, links);
1141        if (err)
1142                kfree(links);
1143out:
1144        drop_sysctl_table(&core_parent->header);
1145        return err;
1146}
1147
1148/**
1149 * __register_sysctl_table - register a leaf sysctl table
1150 * @set: Sysctl tree to register on
1151 * @path: The path to the directory the sysctl table is in.
1152 * @table: the top-level table structure
1153 *
1154 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1155 * array. A completely 0 filled entry terminates the table.
1156 *
1157 * The members of the &struct ctl_table structure are used as follows:
1158 *
1159 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1160 *            enter a sysctl file
1161 *
1162 * data - a pointer to data for use by proc_handler
1163 *
1164 * maxlen - the maximum size in bytes of the data
1165 *
1166 * mode - the file permissions for the /proc/sys file
1167 *
1168 * child - must be %NULL.
1169 *
1170 * proc_handler - the text handler routine (described below)
1171 *
1172 * extra1, extra2 - extra pointers usable by the proc handler routines
1173 *
1174 * Leaf nodes in the sysctl tree will be represented by a single file
1175 * under /proc; non-leaf nodes will be represented by directories.
1176 *
1177 * There must be a proc_handler routine for any terminal nodes.
1178 * Several default handlers are available to cover common cases -
1179 *
1180 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1181 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1182 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1183 *
1184 * It is the handler's job to read the input buffer from user memory
1185 * and process it. The handler should return 0 on success.
1186 *
1187 * This routine returns %NULL on a failure to register, and a pointer
1188 * to the table header on success.
1189 */
1190struct ctl_table_header *__register_sysctl_table(
1191        struct ctl_table_set *set,
1192        const char *path, struct ctl_table *table)
1193{
1194        struct ctl_table_root *root = set->dir.header.root;
1195        struct ctl_table_header *header;
1196        const char *name, *nextname;
1197        struct ctl_dir *dir;
1198        struct ctl_table *entry;
1199        struct ctl_node *node;
1200        int nr_entries = 0;
1201
1202        for (entry = table; entry->procname; entry++)
1203                nr_entries++;
1204
1205        header = kzalloc(sizeof(struct ctl_table_header) +
1206                         sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1207        if (!header)
1208                return NULL;
1209
1210        node = (struct ctl_node *)(header + 1);
1211        init_header(header, root, set, node, table);
1212        if (sysctl_check_table(path, table))
1213                goto fail;
1214
1215        spin_lock(&sysctl_lock);
1216        dir = &set->dir;
1217        /* Reference moved down the diretory tree get_subdir */
1218        dir->header.nreg++;
1219        spin_unlock(&sysctl_lock);
1220
1221        /* Find the directory for the ctl_table */
1222        for (name = path; name; name = nextname) {
1223                int namelen;
1224                nextname = strchr(name, '/');
1225                if (nextname) {
1226                        namelen = nextname - name;
1227                        nextname++;
1228                } else {
1229                        namelen = strlen(name);
1230                }
1231                if (namelen == 0)
1232                        continue;
1233
1234                dir = get_subdir(dir, name, namelen);
1235                if (IS_ERR(dir))
1236                        goto fail;
1237        }
1238
1239        spin_lock(&sysctl_lock);
1240        if (insert_header(dir, header))
1241                goto fail_put_dir_locked;
1242
1243        drop_sysctl_table(&dir->header);
1244        spin_unlock(&sysctl_lock);
1245
1246        return header;
1247
1248fail_put_dir_locked:
1249        drop_sysctl_table(&dir->header);
1250        spin_unlock(&sysctl_lock);
1251fail:
1252        kfree(header);
1253        dump_stack();
1254        return NULL;
1255}
1256
1257/**
1258 * register_sysctl - register a sysctl table
1259 * @path: The path to the directory the sysctl table is in.
1260 * @table: the table structure
1261 *
1262 * Register a sysctl table. @table should be a filled in ctl_table
1263 * array. A completely 0 filled entry terminates the table.
1264 *
1265 * See __register_sysctl_table for more details.
1266 */
1267struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1268{
1269        return __register_sysctl_table(&sysctl_table_root.default_set,
1270                                        path, table);
1271}
1272EXPORT_SYMBOL(register_sysctl);
1273
1274static char *append_path(const char *path, char *pos, const char *name)
1275{
1276        int namelen;
1277        namelen = strlen(name);
1278        if (((pos - path) + namelen + 2) >= PATH_MAX)
1279                return NULL;
1280        memcpy(pos, name, namelen);
1281        pos[namelen] = '/';
1282        pos[namelen + 1] = '\0';
1283        pos += namelen + 1;
1284        return pos;
1285}
1286
1287static int count_subheaders(struct ctl_table *table)
1288{
1289        int has_files = 0;
1290        int nr_subheaders = 0;
1291        struct ctl_table *entry;
1292
1293        /* special case: no directory and empty directory */
1294        if (!table || !table->procname)
1295                return 1;
1296
1297        for (entry = table; entry->procname; entry++) {
1298                if (entry->child)
1299                        nr_subheaders += count_subheaders(entry->child);
1300                else
1301                        has_files = 1;
1302        }
1303        return nr_subheaders + has_files;
1304}
1305
1306static int register_leaf_sysctl_tables(const char *path, char *pos,
1307        struct ctl_table_header ***subheader, struct ctl_table_set *set,
1308        struct ctl_table *table)
1309{
1310        struct ctl_table *ctl_table_arg = NULL;
1311        struct ctl_table *entry, *files;
1312        int nr_files = 0;
1313        int nr_dirs = 0;
1314        int err = -ENOMEM;
1315
1316        for (entry = table; entry->procname; entry++) {
1317                if (entry->child)
1318                        nr_dirs++;
1319                else
1320                        nr_files++;
1321        }
1322
1323        files = table;
1324        /* If there are mixed files and directories we need a new table */
1325        if (nr_dirs && nr_files) {
1326                struct ctl_table *new;
1327                files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1328                                GFP_KERNEL);
1329                if (!files)
1330                        goto out;
1331
1332                ctl_table_arg = files;
1333                for (new = files, entry = table; entry->procname; entry++) {
1334                        if (entry->child)
1335                                continue;
1336                        *new = *entry;
1337                        new++;
1338                }
1339        }
1340
1341        /* Register everything except a directory full of subdirectories */
1342        if (nr_files || !nr_dirs) {
1343                struct ctl_table_header *header;
1344                header = __register_sysctl_table(set, path, files);
1345                if (!header) {
1346                        kfree(ctl_table_arg);
1347                        goto out;
1348                }
1349
1350                /* Remember if we need to free the file table */
1351                header->ctl_table_arg = ctl_table_arg;
1352                **subheader = header;
1353                (*subheader)++;
1354        }
1355
1356        /* Recurse into the subdirectories. */
1357        for (entry = table; entry->procname; entry++) {
1358                char *child_pos;
1359
1360                if (!entry->child)
1361                        continue;
1362
1363                err = -ENAMETOOLONG;
1364                child_pos = append_path(path, pos, entry->procname);
1365                if (!child_pos)
1366                        goto out;
1367
1368                err = register_leaf_sysctl_tables(path, child_pos, subheader,
1369                                                  set, entry->child);
1370                pos[0] = '\0';
1371                if (err)
1372                        goto out;
1373        }
1374        err = 0;
1375out:
1376        /* On failure our caller will unregister all registered subheaders */
1377        return err;
1378}
1379
1380/**
1381 * __register_sysctl_paths - register a sysctl table hierarchy
1382 * @set: Sysctl tree to register on
1383 * @path: The path to the directory the sysctl table is in.
1384 * @table: the top-level table structure
1385 *
1386 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1387 * array. A completely 0 filled entry terminates the table.
1388 *
1389 * See __register_sysctl_table for more details.
1390 */
1391struct ctl_table_header *__register_sysctl_paths(
1392        struct ctl_table_set *set,
1393        const struct ctl_path *path, struct ctl_table *table)
1394{
1395        struct ctl_table *ctl_table_arg = table;
1396        int nr_subheaders = count_subheaders(table);
1397        struct ctl_table_header *header = NULL, **subheaders, **subheader;
1398        const struct ctl_path *component;
1399        char *new_path, *pos;
1400
1401        pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1402        if (!new_path)
1403                return NULL;
1404
1405        pos[0] = '\0';
1406        for (component = path; component->procname; component++) {
1407                pos = append_path(new_path, pos, component->procname);
1408                if (!pos)
1409                        goto out;
1410        }
1411        while (table->procname && table->child && !table[1].procname) {
1412                pos = append_path(new_path, pos, table->procname);
1413                if (!pos)
1414                        goto out;
1415                table = table->child;
1416        }
1417        if (nr_subheaders == 1) {
1418                header = __register_sysctl_table(set, new_path, table);
1419                if (header)
1420                        header->ctl_table_arg = ctl_table_arg;
1421        } else {
1422                header = kzalloc(sizeof(*header) +
1423                                 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1424                if (!header)
1425                        goto out;
1426
1427                subheaders = (struct ctl_table_header **) (header + 1);
1428                subheader = subheaders;
1429                header->ctl_table_arg = ctl_table_arg;
1430
1431                if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1432                                                set, table))
1433                        goto err_register_leaves;
1434        }
1435
1436out:
1437        kfree(new_path);
1438        return header;
1439
1440err_register_leaves:
1441        while (subheader > subheaders) {
1442                struct ctl_table_header *subh = *(--subheader);
1443                struct ctl_table *table = subh->ctl_table_arg;
1444                unregister_sysctl_table(subh);
1445                kfree(table);
1446        }
1447        kfree(header);
1448        header = NULL;
1449        goto out;
1450}
1451
1452/**
1453 * register_sysctl_table_path - register a sysctl table hierarchy
1454 * @path: The path to the directory the sysctl table is in.
1455 * @table: the top-level table structure
1456 *
1457 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1458 * array. A completely 0 filled entry terminates the table.
1459 *
1460 * See __register_sysctl_paths for more details.
1461 */
1462struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1463                                                struct ctl_table *table)
1464{
1465        return __register_sysctl_paths(&sysctl_table_root.default_set,
1466                                        path, table);
1467}
1468EXPORT_SYMBOL(register_sysctl_paths);
1469
1470/**
1471 * register_sysctl_table - register a sysctl table hierarchy
1472 * @table: the top-level table structure
1473 *
1474 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1475 * array. A completely 0 filled entry terminates the table.
1476 *
1477 * See register_sysctl_paths for more details.
1478 */
1479struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1480{
1481        static const struct ctl_path null_path[] = { {} };
1482
1483        return register_sysctl_paths(null_path, table);
1484}
1485EXPORT_SYMBOL(register_sysctl_table);
1486
1487static void put_links(struct ctl_table_header *header)
1488{
1489        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1490        struct ctl_table_root *root = header->root;
1491        struct ctl_dir *parent = header->parent;
1492        struct ctl_dir *core_parent;
1493        struct ctl_table *entry;
1494
1495        if (header->set == root_set)
1496                return;
1497
1498        core_parent = xlate_dir(root_set, parent);
1499        if (IS_ERR(core_parent))
1500                return;
1501
1502        for (entry = header->ctl_table; entry->procname; entry++) {
1503                struct ctl_table_header *link_head;
1504                struct ctl_table *link;
1505                const char *name = entry->procname;
1506
1507                link = find_entry(&link_head, core_parent, name, strlen(name));
1508                if (link &&
1509                    ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1510                     (S_ISLNK(link->mode) && (link->data == root)))) {
1511                        drop_sysctl_table(link_head);
1512                }
1513                else {
1514                        pr_err("sysctl link missing during unregister: ");
1515                        sysctl_print_dir(parent);
1516                        pr_cont("/%s\n", name);
1517                }
1518        }
1519}
1520
1521static void drop_sysctl_table(struct ctl_table_header *header)
1522{
1523        struct ctl_dir *parent = header->parent;
1524
1525        if (--header->nreg)
1526                return;
1527
1528        put_links(header);
1529        start_unregistering(header);
1530        if (!--header->count)
1531                kfree_rcu(header, rcu);
1532
1533        if (parent)
1534                drop_sysctl_table(&parent->header);
1535}
1536
1537/**
1538 * unregister_sysctl_table - unregister a sysctl table hierarchy
1539 * @header: the header returned from register_sysctl_table
1540 *
1541 * Unregisters the sysctl table and all children. proc entries may not
1542 * actually be removed until they are no longer used by anyone.
1543 */
1544void unregister_sysctl_table(struct ctl_table_header * header)
1545{
1546        int nr_subheaders;
1547        might_sleep();
1548
1549        if (header == NULL)
1550                return;
1551
1552        nr_subheaders = count_subheaders(header->ctl_table_arg);
1553        if (unlikely(nr_subheaders > 1)) {
1554                struct ctl_table_header **subheaders;
1555                int i;
1556
1557                subheaders = (struct ctl_table_header **)(header + 1);
1558                for (i = nr_subheaders -1; i >= 0; i--) {
1559                        struct ctl_table_header *subh = subheaders[i];
1560                        struct ctl_table *table = subh->ctl_table_arg;
1561                        unregister_sysctl_table(subh);
1562                        kfree(table);
1563                }
1564                kfree(header);
1565                return;
1566        }
1567
1568        spin_lock(&sysctl_lock);
1569        drop_sysctl_table(header);
1570        spin_unlock(&sysctl_lock);
1571}
1572EXPORT_SYMBOL(unregister_sysctl_table);
1573
1574void setup_sysctl_set(struct ctl_table_set *set,
1575        struct ctl_table_root *root,
1576        int (*is_seen)(struct ctl_table_set *))
1577{
1578        memset(set, 0, sizeof(*set));
1579        set->is_seen = is_seen;
1580        init_header(&set->dir.header, root, set, NULL, root_table);
1581}
1582
1583void retire_sysctl_set(struct ctl_table_set *set)
1584{
1585        WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1586}
1587
1588int __init proc_sys_init(void)
1589{
1590        struct proc_dir_entry *proc_sys_root;
1591
1592        proc_sys_root = proc_mkdir("sys", NULL);
1593        proc_sys_root->proc_iops = &proc_sys_dir_operations;
1594        proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
1595        proc_sys_root->nlink = 0;
1596
1597        return sysctl_init();
1598}
1599