linux/fs/proc/proc_sysctl.c
<<
>>
Prefs
   1/*
   2 * /proc/sys support
   3 */
   4#include <linux/init.h>
   5#include <linux/sysctl.h>
   6#include <linux/poll.h>
   7#include <linux/proc_fs.h>
   8#include <linux/printk.h>
   9#include <linux/security.h>
  10#include <linux/sched.h>
  11#include <linux/namei.h>
  12#include <linux/mm.h>
  13#include <linux/module.h>
  14#include "internal.h"
  15
  16static const struct dentry_operations proc_sys_dentry_operations;
  17static const struct file_operations proc_sys_file_operations;
  18static const struct inode_operations proc_sys_inode_operations;
  19static const struct file_operations proc_sys_dir_file_operations;
  20static const struct inode_operations proc_sys_dir_operations;
  21
  22void proc_sys_poll_notify(struct ctl_table_poll *poll)
  23{
  24        if (!poll)
  25                return;
  26
  27        atomic_inc(&poll->event);
  28        wake_up_interruptible(&poll->wait);
  29}
  30
  31static struct ctl_table root_table[] = {
  32        {
  33                .procname = "",
  34                .mode = S_IFDIR|S_IRUGO|S_IXUGO,
  35        },
  36        { }
  37};
  38static struct ctl_table_root sysctl_table_root = {
  39        .default_set.dir.header = {
  40                {{.count = 1,
  41                  .nreg = 1,
  42                  .ctl_table = root_table }},
  43                .ctl_table_arg = root_table,
  44                .root = &sysctl_table_root,
  45                .set = &sysctl_table_root.default_set,
  46        },
  47};
  48
  49static DEFINE_SPINLOCK(sysctl_lock);
  50
  51static void drop_sysctl_table(struct ctl_table_header *header);
  52static int sysctl_follow_link(struct ctl_table_header **phead,
  53        struct ctl_table **pentry, struct nsproxy *namespaces);
  54static int insert_links(struct ctl_table_header *head);
  55static void put_links(struct ctl_table_header *header);
  56
  57static void sysctl_print_dir(struct ctl_dir *dir)
  58{
  59        if (dir->header.parent)
  60                sysctl_print_dir(dir->header.parent);
  61        pr_cont("%s/", dir->header.ctl_table[0].procname);
  62}
  63
  64static int namecmp(const char *name1, int len1, const char *name2, int len2)
  65{
  66        int minlen;
  67        int cmp;
  68
  69        minlen = len1;
  70        if (minlen > len2)
  71                minlen = len2;
  72
  73        cmp = memcmp(name1, name2, minlen);
  74        if (cmp == 0)
  75                cmp = len1 - len2;
  76        return cmp;
  77}
  78
  79/* Called under sysctl_lock */
  80static struct ctl_table *find_entry(struct ctl_table_header **phead,
  81        struct ctl_dir *dir, const char *name, int namelen)
  82{
  83        struct ctl_table_header *head;
  84        struct ctl_table *entry;
  85        struct rb_node *node = dir->root.rb_node;
  86
  87        while (node)
  88        {
  89                struct ctl_node *ctl_node;
  90                const char *procname;
  91                int cmp;
  92
  93                ctl_node = rb_entry(node, struct ctl_node, node);
  94                head = ctl_node->header;
  95                entry = &head->ctl_table[ctl_node - head->node];
  96                procname = entry->procname;
  97
  98                cmp = namecmp(name, namelen, procname, strlen(procname));
  99                if (cmp < 0)
 100                        node = node->rb_left;
 101                else if (cmp > 0)
 102                        node = node->rb_right;
 103                else {
 104                        *phead = head;
 105                        return entry;
 106                }
 107        }
 108        return NULL;
 109}
 110
 111static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
 112{
 113        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 114        struct rb_node **p = &head->parent->root.rb_node;
 115        struct rb_node *parent = NULL;
 116        const char *name = entry->procname;
 117        int namelen = strlen(name);
 118
 119        while (*p) {
 120                struct ctl_table_header *parent_head;
 121                struct ctl_table *parent_entry;
 122                struct ctl_node *parent_node;
 123                const char *parent_name;
 124                int cmp;
 125
 126                parent = *p;
 127                parent_node = rb_entry(parent, struct ctl_node, node);
 128                parent_head = parent_node->header;
 129                parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
 130                parent_name = parent_entry->procname;
 131
 132                cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
 133                if (cmp < 0)
 134                        p = &(*p)->rb_left;
 135                else if (cmp > 0)
 136                        p = &(*p)->rb_right;
 137                else {
 138                        pr_err("sysctl duplicate entry: ");
 139                        sysctl_print_dir(head->parent);
 140                        pr_cont("/%s\n", entry->procname);
 141                        return -EEXIST;
 142                }
 143        }
 144
 145        rb_link_node(node, parent, p);
 146        rb_insert_color(node, &head->parent->root);
 147        return 0;
 148}
 149
 150static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
 151{
 152        struct rb_node *node = &head->node[entry - head->ctl_table].node;
 153
 154        rb_erase(node, &head->parent->root);
 155}
 156
 157static void init_header(struct ctl_table_header *head,
 158        struct ctl_table_root *root, struct ctl_table_set *set,
 159        struct ctl_node *node, struct ctl_table *table)
 160{
 161        head->ctl_table = table;
 162        head->ctl_table_arg = table;
 163        head->used = 0;
 164        head->count = 1;
 165        head->nreg = 1;
 166        head->unregistering = NULL;
 167        head->root = root;
 168        head->set = set;
 169        head->parent = NULL;
 170        head->node = node;
 171        if (node) {
 172                struct ctl_table *entry;
 173                for (entry = table; entry->procname; entry++, node++)
 174                        node->header = head;
 175        }
 176}
 177
 178static void erase_header(struct ctl_table_header *head)
 179{
 180        struct ctl_table *entry;
 181        for (entry = head->ctl_table; entry->procname; entry++)
 182                erase_entry(head, entry);
 183}
 184
 185static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
 186{
 187        struct ctl_table *entry;
 188        int err;
 189
 190        dir->header.nreg++;
 191        header->parent = dir;
 192        err = insert_links(header);
 193        if (err)
 194                goto fail_links;
 195        for (entry = header->ctl_table; entry->procname; entry++) {
 196                err = insert_entry(header, entry);
 197                if (err)
 198                        goto fail;
 199        }
 200        return 0;
 201fail:
 202        erase_header(header);
 203        put_links(header);
 204fail_links:
 205        header->parent = NULL;
 206        drop_sysctl_table(&dir->header);
 207        return err;
 208}
 209
 210/* called under sysctl_lock */
 211static int use_table(struct ctl_table_header *p)
 212{
 213        if (unlikely(p->unregistering))
 214                return 0;
 215        p->used++;
 216        return 1;
 217}
 218
 219/* called under sysctl_lock */
 220static void unuse_table(struct ctl_table_header *p)
 221{
 222        if (!--p->used)
 223                if (unlikely(p->unregistering))
 224                        complete(p->unregistering);
 225}
 226
 227/* called under sysctl_lock, will reacquire if has to wait */
 228static void start_unregistering(struct ctl_table_header *p)
 229{
 230        /*
 231         * if p->used is 0, nobody will ever touch that entry again;
 232         * we'll eliminate all paths to it before dropping sysctl_lock
 233         */
 234        if (unlikely(p->used)) {
 235                struct completion wait;
 236                init_completion(&wait);
 237                p->unregistering = &wait;
 238                spin_unlock(&sysctl_lock);
 239                wait_for_completion(&wait);
 240                spin_lock(&sysctl_lock);
 241        } else {
 242                /* anything non-NULL; we'll never dereference it */
 243                p->unregistering = ERR_PTR(-EINVAL);
 244        }
 245        /*
 246         * do not remove from the list until nobody holds it; walking the
 247         * list in do_sysctl() relies on that.
 248         */
 249        erase_header(p);
 250}
 251
 252static void sysctl_head_get(struct ctl_table_header *head)
 253{
 254        spin_lock(&sysctl_lock);
 255        head->count++;
 256        spin_unlock(&sysctl_lock);
 257}
 258
 259void sysctl_head_put(struct ctl_table_header *head)
 260{
 261        spin_lock(&sysctl_lock);
 262        if (!--head->count)
 263                kfree_rcu(head, rcu);
 264        spin_unlock(&sysctl_lock);
 265}
 266
 267static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
 268{
 269        BUG_ON(!head);
 270        spin_lock(&sysctl_lock);
 271        if (!use_table(head))
 272                head = ERR_PTR(-ENOENT);
 273        spin_unlock(&sysctl_lock);
 274        return head;
 275}
 276
 277static void sysctl_head_finish(struct ctl_table_header *head)
 278{
 279        if (!head)
 280                return;
 281        spin_lock(&sysctl_lock);
 282        unuse_table(head);
 283        spin_unlock(&sysctl_lock);
 284}
 285
 286static struct ctl_table_set *
 287lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
 288{
 289        struct ctl_table_set *set = &root->default_set;
 290        if (root->lookup)
 291                set = root->lookup(root, namespaces);
 292        return set;
 293}
 294
 295static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
 296                                      struct ctl_dir *dir,
 297                                      const char *name, int namelen)
 298{
 299        struct ctl_table_header *head;
 300        struct ctl_table *entry;
 301
 302        spin_lock(&sysctl_lock);
 303        entry = find_entry(&head, dir, name, namelen);
 304        if (entry && use_table(head))
 305                *phead = head;
 306        else
 307                entry = NULL;
 308        spin_unlock(&sysctl_lock);
 309        return entry;
 310}
 311
 312static struct ctl_node *first_usable_entry(struct rb_node *node)
 313{
 314        struct ctl_node *ctl_node;
 315
 316        for (;node; node = rb_next(node)) {
 317                ctl_node = rb_entry(node, struct ctl_node, node);
 318                if (use_table(ctl_node->header))
 319                        return ctl_node;
 320        }
 321        return NULL;
 322}
 323
 324static void first_entry(struct ctl_dir *dir,
 325        struct ctl_table_header **phead, struct ctl_table **pentry)
 326{
 327        struct ctl_table_header *head = NULL;
 328        struct ctl_table *entry = NULL;
 329        struct ctl_node *ctl_node;
 330
 331        spin_lock(&sysctl_lock);
 332        ctl_node = first_usable_entry(rb_first(&dir->root));
 333        spin_unlock(&sysctl_lock);
 334        if (ctl_node) {
 335                head = ctl_node->header;
 336                entry = &head->ctl_table[ctl_node - head->node];
 337        }
 338        *phead = head;
 339        *pentry = entry;
 340}
 341
 342static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
 343{
 344        struct ctl_table_header *head = *phead;
 345        struct ctl_table *entry = *pentry;
 346        struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
 347
 348        spin_lock(&sysctl_lock);
 349        unuse_table(head);
 350
 351        ctl_node = first_usable_entry(rb_next(&ctl_node->node));
 352        spin_unlock(&sysctl_lock);
 353        head = NULL;
 354        if (ctl_node) {
 355                head = ctl_node->header;
 356                entry = &head->ctl_table[ctl_node - head->node];
 357        }
 358        *phead = head;
 359        *pentry = entry;
 360}
 361
 362void register_sysctl_root(struct ctl_table_root *root)
 363{
 364}
 365
 366/*
 367 * sysctl_perm does NOT grant the superuser all rights automatically, because
 368 * some sysctl variables are readonly even to root.
 369 */
 370
 371static int test_perm(int mode, int op)
 372{
 373        if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
 374                mode >>= 6;
 375        else if (in_egroup_p(GLOBAL_ROOT_GID))
 376                mode >>= 3;
 377        if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
 378                return 0;
 379        return -EACCES;
 380}
 381
 382static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
 383{
 384        struct ctl_table_root *root = head->root;
 385        int mode;
 386
 387        if (root->permissions)
 388                mode = root->permissions(head, table);
 389        else
 390                mode = table->mode;
 391
 392        return test_perm(mode, op);
 393}
 394
 395static struct inode *proc_sys_make_inode(struct super_block *sb,
 396                struct ctl_table_header *head, struct ctl_table *table)
 397{
 398        struct inode *inode;
 399        struct proc_inode *ei;
 400
 401        inode = new_inode(sb);
 402        if (!inode)
 403                goto out;
 404
 405        inode->i_ino = get_next_ino();
 406
 407        sysctl_head_get(head);
 408        ei = PROC_I(inode);
 409        ei->sysctl = head;
 410        ei->sysctl_entry = table;
 411
 412        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 413        inode->i_mode = table->mode;
 414        if (!S_ISDIR(table->mode)) {
 415                inode->i_mode |= S_IFREG;
 416                inode->i_op = &proc_sys_inode_operations;
 417                inode->i_fop = &proc_sys_file_operations;
 418        } else {
 419                inode->i_mode |= S_IFDIR;
 420                inode->i_op = &proc_sys_dir_operations;
 421                inode->i_fop = &proc_sys_dir_file_operations;
 422        }
 423out:
 424        return inode;
 425}
 426
 427static struct ctl_table_header *grab_header(struct inode *inode)
 428{
 429        struct ctl_table_header *head = PROC_I(inode)->sysctl;
 430        if (!head)
 431                head = &sysctl_table_root.default_set.dir.header;
 432        return sysctl_head_grab(head);
 433}
 434
 435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 436                                        unsigned int flags)
 437{
 438        struct ctl_table_header *head = grab_header(dir);
 439        struct ctl_table_header *h = NULL;
 440        struct qstr *name = &dentry->d_name;
 441        struct ctl_table *p;
 442        struct inode *inode;
 443        struct dentry *err = ERR_PTR(-ENOENT);
 444        struct ctl_dir *ctl_dir;
 445        int ret;
 446
 447        if (IS_ERR(head))
 448                return ERR_CAST(head);
 449
 450        ctl_dir = container_of(head, struct ctl_dir, header);
 451
 452        p = lookup_entry(&h, ctl_dir, name->name, name->len);
 453        if (!p)
 454                goto out;
 455
 456        if (S_ISLNK(p->mode)) {
 457                ret = sysctl_follow_link(&h, &p, current->nsproxy);
 458                err = ERR_PTR(ret);
 459                if (ret)
 460                        goto out;
 461        }
 462
 463        err = ERR_PTR(-ENOMEM);
 464        inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
 465        if (!inode)
 466                goto out;
 467
 468        err = NULL;
 469        d_set_d_op(dentry, &proc_sys_dentry_operations);
 470        d_add(dentry, inode);
 471
 472out:
 473        if (h)
 474                sysctl_head_finish(h);
 475        sysctl_head_finish(head);
 476        return err;
 477}
 478
 479static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 480                size_t count, loff_t *ppos, int write)
 481{
 482        struct inode *inode = file_inode(filp);
 483        struct ctl_table_header *head = grab_header(inode);
 484        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 485        ssize_t error;
 486        size_t res;
 487
 488        if (IS_ERR(head))
 489                return PTR_ERR(head);
 490
 491        /*
 492         * At this point we know that the sysctl was not unregistered
 493         * and won't be until we finish.
 494         */
 495        error = -EPERM;
 496        if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
 497                goto out;
 498
 499        /* if that can happen at all, it should be -EINVAL, not -EISDIR */
 500        error = -EINVAL;
 501        if (!table->proc_handler)
 502                goto out;
 503
 504        /* careful: calling conventions are nasty here */
 505        res = count;
 506        error = table->proc_handler(table, write, buf, &res, ppos);
 507        if (!error)
 508                error = res;
 509out:
 510        sysctl_head_finish(head);
 511
 512        return error;
 513}
 514
 515static ssize_t proc_sys_read(struct file *filp, char __user *buf,
 516                                size_t count, loff_t *ppos)
 517{
 518        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
 519}
 520
 521static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
 522                                size_t count, loff_t *ppos)
 523{
 524        return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 525}
 526
 527static int proc_sys_open(struct inode *inode, struct file *filp)
 528{
 529        struct ctl_table_header *head = grab_header(inode);
 530        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 531
 532        /* sysctl was unregistered */
 533        if (IS_ERR(head))
 534                return PTR_ERR(head);
 535
 536        if (table->poll)
 537                filp->private_data = proc_sys_poll_event(table->poll);
 538
 539        sysctl_head_finish(head);
 540
 541        return 0;
 542}
 543
 544static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
 545{
 546        struct inode *inode = file_inode(filp);
 547        struct ctl_table_header *head = grab_header(inode);
 548        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 549        unsigned int ret = DEFAULT_POLLMASK;
 550        unsigned long event;
 551
 552        /* sysctl was unregistered */
 553        if (IS_ERR(head))
 554                return POLLERR | POLLHUP;
 555
 556        if (!table->proc_handler)
 557                goto out;
 558
 559        if (!table->poll)
 560                goto out;
 561
 562        event = (unsigned long)filp->private_data;
 563        poll_wait(filp, &table->poll->wait, wait);
 564
 565        if (event != atomic_read(&table->poll->event)) {
 566                filp->private_data = proc_sys_poll_event(table->poll);
 567                ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
 568        }
 569
 570out:
 571        sysctl_head_finish(head);
 572
 573        return ret;
 574}
 575
 576static bool proc_sys_fill_cache(struct file *file,
 577                                struct dir_context *ctx,
 578                                struct ctl_table_header *head,
 579                                struct ctl_table *table)
 580{
 581        struct dentry *child, *dir = file->f_path.dentry;
 582        struct inode *inode;
 583        struct qstr qname;
 584        ino_t ino = 0;
 585        unsigned type = DT_UNKNOWN;
 586
 587        qname.name = table->procname;
 588        qname.len  = strlen(table->procname);
 589        qname.hash = full_name_hash(qname.name, qname.len);
 590
 591        child = d_lookup(dir, &qname);
 592        if (!child) {
 593                child = d_alloc(dir, &qname);
 594                if (child) {
 595                        inode = proc_sys_make_inode(dir->d_sb, head, table);
 596                        if (!inode) {
 597                                dput(child);
 598                                return false;
 599                        } else {
 600                                d_set_d_op(child, &proc_sys_dentry_operations);
 601                                d_add(child, inode);
 602                        }
 603                } else {
 604                        return false;
 605                }
 606        }
 607        inode = child->d_inode;
 608        ino  = inode->i_ino;
 609        type = inode->i_mode >> 12;
 610        dput(child);
 611        return dir_emit(ctx, qname.name, qname.len, ino, type);
 612}
 613
 614static bool proc_sys_link_fill_cache(struct file *file,
 615                                    struct dir_context *ctx,
 616                                    struct ctl_table_header *head,
 617                                    struct ctl_table *table)
 618{
 619        bool ret = true;
 620        head = sysctl_head_grab(head);
 621
 622        if (S_ISLNK(table->mode)) {
 623                /* It is not an error if we can not follow the link ignore it */
 624                int err = sysctl_follow_link(&head, &table, current->nsproxy);
 625                if (err)
 626                        goto out;
 627        }
 628
 629        ret = proc_sys_fill_cache(file, ctx, head, table);
 630out:
 631        sysctl_head_finish(head);
 632        return ret;
 633}
 634
 635static int scan(struct ctl_table_header *head, ctl_table *table,
 636                unsigned long *pos, struct file *file,
 637                struct dir_context *ctx)
 638{
 639        bool res;
 640
 641        if ((*pos)++ < ctx->pos)
 642                return true;
 643
 644        if (unlikely(S_ISLNK(table->mode)))
 645                res = proc_sys_link_fill_cache(file, ctx, head, table);
 646        else
 647                res = proc_sys_fill_cache(file, ctx, head, table);
 648
 649        if (res)
 650                ctx->pos = *pos;
 651
 652        return res;
 653}
 654
 655static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
 656{
 657        struct ctl_table_header *head = grab_header(file_inode(file));
 658        struct ctl_table_header *h = NULL;
 659        struct ctl_table *entry;
 660        struct ctl_dir *ctl_dir;
 661        unsigned long pos;
 662
 663        if (IS_ERR(head))
 664                return PTR_ERR(head);
 665
 666        ctl_dir = container_of(head, struct ctl_dir, header);
 667
 668        if (!dir_emit_dots(file, ctx))
 669                return 0;
 670
 671        pos = 2;
 672
 673        for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
 674                if (!scan(h, entry, &pos, file, ctx)) {
 675                        sysctl_head_finish(h);
 676                        break;
 677                }
 678        }
 679        sysctl_head_finish(head);
 680        return 0;
 681}
 682
 683static int proc_sys_permission(struct inode *inode, int mask)
 684{
 685        /*
 686         * sysctl entries that are not writeable,
 687         * are _NOT_ writeable, capabilities or not.
 688         */
 689        struct ctl_table_header *head;
 690        struct ctl_table *table;
 691        int error;
 692
 693        /* Executable files are not allowed under /proc/sys/ */
 694        if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
 695                return -EACCES;
 696
 697        head = grab_header(inode);
 698        if (IS_ERR(head))
 699                return PTR_ERR(head);
 700
 701        table = PROC_I(inode)->sysctl_entry;
 702        if (!table) /* global root - r-xr-xr-x */
 703                error = mask & MAY_WRITE ? -EACCES : 0;
 704        else /* Use the permissions on the sysctl table entry */
 705                error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);
 706
 707        sysctl_head_finish(head);
 708        return error;
 709}
 710
 711static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 712{
 713        struct inode *inode = dentry->d_inode;
 714        int error;
 715
 716        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 717                return -EPERM;
 718
 719        error = inode_change_ok(inode, attr);
 720        if (error)
 721                return error;
 722
 723        setattr_copy(inode, attr);
 724        mark_inode_dirty(inode);
 725        return 0;
 726}
 727
 728static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 729{
 730        struct inode *inode = dentry->d_inode;
 731        struct ctl_table_header *head = grab_header(inode);
 732        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 733
 734        if (IS_ERR(head))
 735                return PTR_ERR(head);
 736
 737        generic_fillattr(inode, stat);
 738        if (table)
 739                stat->mode = (stat->mode & S_IFMT) | table->mode;
 740
 741        sysctl_head_finish(head);
 742        return 0;
 743}
 744
 745static const struct file_operations proc_sys_file_operations = {
 746        .open           = proc_sys_open,
 747        .poll           = proc_sys_poll,
 748        .read           = proc_sys_read,
 749        .write          = proc_sys_write,
 750        .llseek         = default_llseek,
 751};
 752
 753static const struct file_operations proc_sys_dir_file_operations = {
 754        .read           = generic_read_dir,
 755        .iterate        = proc_sys_readdir,
 756        .llseek         = generic_file_llseek,
 757};
 758
 759static const struct inode_operations proc_sys_inode_operations = {
 760        .permission     = proc_sys_permission,
 761        .setattr        = proc_sys_setattr,
 762        .getattr        = proc_sys_getattr,
 763};
 764
 765static const struct inode_operations proc_sys_dir_operations = {
 766        .lookup         = proc_sys_lookup,
 767        .permission     = proc_sys_permission,
 768        .setattr        = proc_sys_setattr,
 769        .getattr        = proc_sys_getattr,
 770};
 771
 772static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 773{
 774        if (flags & LOOKUP_RCU)
 775                return -ECHILD;
 776        return !PROC_I(dentry->d_inode)->sysctl->unregistering;
 777}
 778
 779static int proc_sys_delete(const struct dentry *dentry)
 780{
 781        return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
 782}
 783
 784static int sysctl_is_seen(struct ctl_table_header *p)
 785{
 786        struct ctl_table_set *set = p->set;
 787        int res;
 788        spin_lock(&sysctl_lock);
 789        if (p->unregistering)
 790                res = 0;
 791        else if (!set->is_seen)
 792                res = 1;
 793        else
 794                res = set->is_seen(set);
 795        spin_unlock(&sysctl_lock);
 796        return res;
 797}
 798
 799static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry,
 800                unsigned int len, const char *str, const struct qstr *name)
 801{
 802        struct ctl_table_header *head;
 803        struct inode *inode;
 804
 805        /* Although proc doesn't have negative dentries, rcu-walk means
 806         * that inode here can be NULL */
 807        /* AV: can it, indeed? */
 808        inode = ACCESS_ONCE(dentry->d_inode);
 809        if (!inode)
 810                return 1;
 811        if (name->len != len)
 812                return 1;
 813        if (memcmp(name->name, str, len))
 814                return 1;
 815        head = rcu_dereference(PROC_I(inode)->sysctl);
 816        return !head || !sysctl_is_seen(head);
 817}
 818
 819static const struct dentry_operations proc_sys_dentry_operations = {
 820        .d_revalidate   = proc_sys_revalidate,
 821        .d_delete       = proc_sys_delete,
 822        .d_compare      = proc_sys_compare,
 823};
 824
 825static struct ctl_dir *find_subdir(struct ctl_dir *dir,
 826                                   const char *name, int namelen)
 827{
 828        struct ctl_table_header *head;
 829        struct ctl_table *entry;
 830
 831        entry = find_entry(&head, dir, name, namelen);
 832        if (!entry)
 833                return ERR_PTR(-ENOENT);
 834        if (!S_ISDIR(entry->mode))
 835                return ERR_PTR(-ENOTDIR);
 836        return container_of(head, struct ctl_dir, header);
 837}
 838
 839static struct ctl_dir *new_dir(struct ctl_table_set *set,
 840                               const char *name, int namelen)
 841{
 842        struct ctl_table *table;
 843        struct ctl_dir *new;
 844        struct ctl_node *node;
 845        char *new_name;
 846
 847        new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
 848                      sizeof(struct ctl_table)*2 +  namelen + 1,
 849                      GFP_KERNEL);
 850        if (!new)
 851                return NULL;
 852
 853        node = (struct ctl_node *)(new + 1);
 854        table = (struct ctl_table *)(node + 1);
 855        new_name = (char *)(table + 2);
 856        memcpy(new_name, name, namelen);
 857        new_name[namelen] = '\0';
 858        table[0].procname = new_name;
 859        table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
 860        init_header(&new->header, set->dir.header.root, set, node, table);
 861
 862        return new;
 863}
 864
 865/**
 866 * get_subdir - find or create a subdir with the specified name.
 867 * @dir:  Directory to create the subdirectory in
 868 * @name: The name of the subdirectory to find or create
 869 * @namelen: The length of name
 870 *
 871 * Takes a directory with an elevated reference count so we know that
 872 * if we drop the lock the directory will not go away.  Upon success
 873 * the reference is moved from @dir to the returned subdirectory.
 874 * Upon error an error code is returned and the reference on @dir is
 875 * simply dropped.
 876 */
 877static struct ctl_dir *get_subdir(struct ctl_dir *dir,
 878                                  const char *name, int namelen)
 879{
 880        struct ctl_table_set *set = dir->header.set;
 881        struct ctl_dir *subdir, *new = NULL;
 882        int err;
 883
 884        spin_lock(&sysctl_lock);
 885        subdir = find_subdir(dir, name, namelen);
 886        if (!IS_ERR(subdir))
 887                goto found;
 888        if (PTR_ERR(subdir) != -ENOENT)
 889                goto failed;
 890
 891        spin_unlock(&sysctl_lock);
 892        new = new_dir(set, name, namelen);
 893        spin_lock(&sysctl_lock);
 894        subdir = ERR_PTR(-ENOMEM);
 895        if (!new)
 896                goto failed;
 897
 898        /* Was the subdir added while we dropped the lock? */
 899        subdir = find_subdir(dir, name, namelen);
 900        if (!IS_ERR(subdir))
 901                goto found;
 902        if (PTR_ERR(subdir) != -ENOENT)
 903                goto failed;
 904
 905        /* Nope.  Use the our freshly made directory entry. */
 906        err = insert_header(dir, &new->header);
 907        subdir = ERR_PTR(err);
 908        if (err)
 909                goto failed;
 910        subdir = new;
 911found:
 912        subdir->header.nreg++;
 913failed:
 914        if (unlikely(IS_ERR(subdir))) {
 915                pr_err("sysctl could not get directory: ");
 916                sysctl_print_dir(dir);
 917                pr_cont("/%*.*s %ld\n",
 918                        namelen, namelen, name, PTR_ERR(subdir));
 919        }
 920        drop_sysctl_table(&dir->header);
 921        if (new)
 922                drop_sysctl_table(&new->header);
 923        spin_unlock(&sysctl_lock);
 924        return subdir;
 925}
 926
 927static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
 928{
 929        struct ctl_dir *parent;
 930        const char *procname;
 931        if (!dir->header.parent)
 932                return &set->dir;
 933        parent = xlate_dir(set, dir->header.parent);
 934        if (IS_ERR(parent))
 935                return parent;
 936        procname = dir->header.ctl_table[0].procname;
 937        return find_subdir(parent, procname, strlen(procname));
 938}
 939
 940static int sysctl_follow_link(struct ctl_table_header **phead,
 941        struct ctl_table **pentry, struct nsproxy *namespaces)
 942{
 943        struct ctl_table_header *head;
 944        struct ctl_table_root *root;
 945        struct ctl_table_set *set;
 946        struct ctl_table *entry;
 947        struct ctl_dir *dir;
 948        int ret;
 949
 950        ret = 0;
 951        spin_lock(&sysctl_lock);
 952        root = (*pentry)->data;
 953        set = lookup_header_set(root, namespaces);
 954        dir = xlate_dir(set, (*phead)->parent);
 955        if (IS_ERR(dir))
 956                ret = PTR_ERR(dir);
 957        else {
 958                const char *procname = (*pentry)->procname;
 959                head = NULL;
 960                entry = find_entry(&head, dir, procname, strlen(procname));
 961                ret = -ENOENT;
 962                if (entry && use_table(head)) {
 963                        unuse_table(*phead);
 964                        *phead = head;
 965                        *pentry = entry;
 966                        ret = 0;
 967                }
 968        }
 969
 970        spin_unlock(&sysctl_lock);
 971        return ret;
 972}
 973
 974static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
 975{
 976        struct va_format vaf;
 977        va_list args;
 978
 979        va_start(args, fmt);
 980        vaf.fmt = fmt;
 981        vaf.va = &args;
 982
 983        pr_err("sysctl table check failed: %s/%s %pV\n",
 984               path, table->procname, &vaf);
 985
 986        va_end(args);
 987        return -EINVAL;
 988}
 989
 990static int sysctl_check_table(const char *path, struct ctl_table *table)
 991{
 992        int err = 0;
 993        for (; table->procname; table++) {
 994                if (table->child)
 995                        err = sysctl_err(path, table, "Not a file");
 996
 997                if ((table->proc_handler == proc_dostring) ||
 998                    (table->proc_handler == proc_dointvec) ||
 999                    (table->proc_handler == proc_dointvec_minmax) ||
1000                    (table->proc_handler == proc_dointvec_jiffies) ||
1001                    (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1002                    (table->proc_handler == proc_dointvec_ms_jiffies) ||
1003                    (table->proc_handler == proc_doulongvec_minmax) ||
1004                    (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1005                        if (!table->data)
1006                                err = sysctl_err(path, table, "No data");
1007                        if (!table->maxlen)
1008                                err = sysctl_err(path, table, "No maxlen");
1009                }
1010                if (!table->proc_handler)
1011                        err = sysctl_err(path, table, "No proc_handler");
1012
1013                if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1014                        err = sysctl_err(path, table, "bogus .mode 0%o",
1015                                table->mode);
1016        }
1017        return err;
1018}
1019
1020static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1021        struct ctl_table_root *link_root)
1022{
1023        struct ctl_table *link_table, *entry, *link;
1024        struct ctl_table_header *links;
1025        struct ctl_node *node;
1026        char *link_name;
1027        int nr_entries, name_bytes;
1028
1029        name_bytes = 0;
1030        nr_entries = 0;
1031        for (entry = table; entry->procname; entry++) {
1032                nr_entries++;
1033                name_bytes += strlen(entry->procname) + 1;
1034        }
1035
1036        links = kzalloc(sizeof(struct ctl_table_header) +
1037                        sizeof(struct ctl_node)*nr_entries +
1038                        sizeof(struct ctl_table)*(nr_entries + 1) +
1039                        name_bytes,
1040                        GFP_KERNEL);
1041
1042        if (!links)
1043                return NULL;
1044
1045        node = (struct ctl_node *)(links + 1);
1046        link_table = (struct ctl_table *)(node + nr_entries);
1047        link_name = (char *)&link_table[nr_entries + 1];
1048
1049        for (link = link_table, entry = table; entry->procname; link++, entry++) {
1050                int len = strlen(entry->procname) + 1;
1051                memcpy(link_name, entry->procname, len);
1052                link->procname = link_name;
1053                link->mode = S_IFLNK|S_IRWXUGO;
1054                link->data = link_root;
1055                link_name += len;
1056        }
1057        init_header(links, dir->header.root, dir->header.set, node, link_table);
1058        links->nreg = nr_entries;
1059
1060        return links;
1061}
1062
1063static bool get_links(struct ctl_dir *dir,
1064        struct ctl_table *table, struct ctl_table_root *link_root)
1065{
1066        struct ctl_table_header *head;
1067        struct ctl_table *entry, *link;
1068
1069        /* Are there links available for every entry in table? */
1070        for (entry = table; entry->procname; entry++) {
1071                const char *procname = entry->procname;
1072                link = find_entry(&head, dir, procname, strlen(procname));
1073                if (!link)
1074                        return false;
1075                if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1076                        continue;
1077                if (S_ISLNK(link->mode) && (link->data == link_root))
1078                        continue;
1079                return false;
1080        }
1081
1082        /* The checks passed.  Increase the registration count on the links */
1083        for (entry = table; entry->procname; entry++) {
1084                const char *procname = entry->procname;
1085                link = find_entry(&head, dir, procname, strlen(procname));
1086                head->nreg++;
1087        }
1088        return true;
1089}
1090
1091static int insert_links(struct ctl_table_header *head)
1092{
1093        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1094        struct ctl_dir *core_parent = NULL;
1095        struct ctl_table_header *links;
1096        int err;
1097
1098        if (head->set == root_set)
1099                return 0;
1100
1101        core_parent = xlate_dir(root_set, head->parent);
1102        if (IS_ERR(core_parent))
1103                return 0;
1104
1105        if (get_links(core_parent, head->ctl_table, head->root))
1106                return 0;
1107
1108        core_parent->header.nreg++;
1109        spin_unlock(&sysctl_lock);
1110
1111        links = new_links(core_parent, head->ctl_table, head->root);
1112
1113        spin_lock(&sysctl_lock);
1114        err = -ENOMEM;
1115        if (!links)
1116                goto out;
1117
1118        err = 0;
1119        if (get_links(core_parent, head->ctl_table, head->root)) {
1120                kfree(links);
1121                goto out;
1122        }
1123
1124        err = insert_header(core_parent, links);
1125        if (err)
1126                kfree(links);
1127out:
1128        drop_sysctl_table(&core_parent->header);
1129        return err;
1130}
1131
1132/**
1133 * __register_sysctl_table - register a leaf sysctl table
1134 * @set: Sysctl tree to register on
1135 * @path: The path to the directory the sysctl table is in.
1136 * @table: the top-level table structure
1137 *
1138 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1139 * array. A completely 0 filled entry terminates the table.
1140 *
1141 * The members of the &struct ctl_table structure are used as follows:
1142 *
1143 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1144 *            enter a sysctl file
1145 *
1146 * data - a pointer to data for use by proc_handler
1147 *
1148 * maxlen - the maximum size in bytes of the data
1149 *
1150 * mode - the file permissions for the /proc/sys file
1151 *
1152 * child - must be %NULL.
1153 *
1154 * proc_handler - the text handler routine (described below)
1155 *
1156 * extra1, extra2 - extra pointers usable by the proc handler routines
1157 *
1158 * Leaf nodes in the sysctl tree will be represented by a single file
1159 * under /proc; non-leaf nodes will be represented by directories.
1160 *
1161 * There must be a proc_handler routine for any terminal nodes.
1162 * Several default handlers are available to cover common cases -
1163 *
1164 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1165 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1166 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1167 *
1168 * It is the handler's job to read the input buffer from user memory
1169 * and process it. The handler should return 0 on success.
1170 *
1171 * This routine returns %NULL on a failure to register, and a pointer
1172 * to the table header on success.
1173 */
1174struct ctl_table_header *__register_sysctl_table(
1175        struct ctl_table_set *set,
1176        const char *path, struct ctl_table *table)
1177{
1178        struct ctl_table_root *root = set->dir.header.root;
1179        struct ctl_table_header *header;
1180        const char *name, *nextname;
1181        struct ctl_dir *dir;
1182        struct ctl_table *entry;
1183        struct ctl_node *node;
1184        int nr_entries = 0;
1185
1186        for (entry = table; entry->procname; entry++)
1187                nr_entries++;
1188
1189        header = kzalloc(sizeof(struct ctl_table_header) +
1190                         sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1191        if (!header)
1192                return NULL;
1193
1194        node = (struct ctl_node *)(header + 1);
1195        init_header(header, root, set, node, table);
1196        if (sysctl_check_table(path, table))
1197                goto fail;
1198
1199        spin_lock(&sysctl_lock);
1200        dir = &set->dir;
1201        /* Reference moved down the diretory tree get_subdir */
1202        dir->header.nreg++;
1203        spin_unlock(&sysctl_lock);
1204
1205        /* Find the directory for the ctl_table */
1206        for (name = path; name; name = nextname) {
1207                int namelen;
1208                nextname = strchr(name, '/');
1209                if (nextname) {
1210                        namelen = nextname - name;
1211                        nextname++;
1212                } else {
1213                        namelen = strlen(name);
1214                }
1215                if (namelen == 0)
1216                        continue;
1217
1218                dir = get_subdir(dir, name, namelen);
1219                if (IS_ERR(dir))
1220                        goto fail;
1221        }
1222
1223        spin_lock(&sysctl_lock);
1224        if (insert_header(dir, header))
1225                goto fail_put_dir_locked;
1226
1227        drop_sysctl_table(&dir->header);
1228        spin_unlock(&sysctl_lock);
1229
1230        return header;
1231
1232fail_put_dir_locked:
1233        drop_sysctl_table(&dir->header);
1234        spin_unlock(&sysctl_lock);
1235fail:
1236        kfree(header);
1237        dump_stack();
1238        return NULL;
1239}
1240
1241/**
1242 * register_sysctl - register a sysctl table
1243 * @path: The path to the directory the sysctl table is in.
1244 * @table: the table structure
1245 *
1246 * Register a sysctl table. @table should be a filled in ctl_table
1247 * array. A completely 0 filled entry terminates the table.
1248 *
1249 * See __register_sysctl_table for more details.
1250 */
1251struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1252{
1253        return __register_sysctl_table(&sysctl_table_root.default_set,
1254                                        path, table);
1255}
1256EXPORT_SYMBOL(register_sysctl);
1257
1258static char *append_path(const char *path, char *pos, const char *name)
1259{
1260        int namelen;
1261        namelen = strlen(name);
1262        if (((pos - path) + namelen + 2) >= PATH_MAX)
1263                return NULL;
1264        memcpy(pos, name, namelen);
1265        pos[namelen] = '/';
1266        pos[namelen + 1] = '\0';
1267        pos += namelen + 1;
1268        return pos;
1269}
1270
1271static int count_subheaders(struct ctl_table *table)
1272{
1273        int has_files = 0;
1274        int nr_subheaders = 0;
1275        struct ctl_table *entry;
1276
1277        /* special case: no directory and empty directory */
1278        if (!table || !table->procname)
1279                return 1;
1280
1281        for (entry = table; entry->procname; entry++) {
1282                if (entry->child)
1283                        nr_subheaders += count_subheaders(entry->child);
1284                else
1285                        has_files = 1;
1286        }
1287        return nr_subheaders + has_files;
1288}
1289
1290static int register_leaf_sysctl_tables(const char *path, char *pos,
1291        struct ctl_table_header ***subheader, struct ctl_table_set *set,
1292        struct ctl_table *table)
1293{
1294        struct ctl_table *ctl_table_arg = NULL;
1295        struct ctl_table *entry, *files;
1296        int nr_files = 0;
1297        int nr_dirs = 0;
1298        int err = -ENOMEM;
1299
1300        for (entry = table; entry->procname; entry++) {
1301                if (entry->child)
1302                        nr_dirs++;
1303                else
1304                        nr_files++;
1305        }
1306
1307        files = table;
1308        /* If there are mixed files and directories we need a new table */
1309        if (nr_dirs && nr_files) {
1310                struct ctl_table *new;
1311                files = kzalloc(sizeof(struct ctl_table) * (nr_files + 1),
1312                                GFP_KERNEL);
1313                if (!files)
1314                        goto out;
1315
1316                ctl_table_arg = files;
1317                for (new = files, entry = table; entry->procname; entry++) {
1318                        if (entry->child)
1319                                continue;
1320                        *new = *entry;
1321                        new++;
1322                }
1323        }
1324
1325        /* Register everything except a directory full of subdirectories */
1326        if (nr_files || !nr_dirs) {
1327                struct ctl_table_header *header;
1328                header = __register_sysctl_table(set, path, files);
1329                if (!header) {
1330                        kfree(ctl_table_arg);
1331                        goto out;
1332                }
1333
1334                /* Remember if we need to free the file table */
1335                header->ctl_table_arg = ctl_table_arg;
1336                **subheader = header;
1337                (*subheader)++;
1338        }
1339
1340        /* Recurse into the subdirectories. */
1341        for (entry = table; entry->procname; entry++) {
1342                char *child_pos;
1343
1344                if (!entry->child)
1345                        continue;
1346
1347                err = -ENAMETOOLONG;
1348                child_pos = append_path(path, pos, entry->procname);
1349                if (!child_pos)
1350                        goto out;
1351
1352                err = register_leaf_sysctl_tables(path, child_pos, subheader,
1353                                                  set, entry->child);
1354                pos[0] = '\0';
1355                if (err)
1356                        goto out;
1357        }
1358        err = 0;
1359out:
1360        /* On failure our caller will unregister all registered subheaders */
1361        return err;
1362}
1363
1364/**
1365 * __register_sysctl_paths - register a sysctl table hierarchy
1366 * @set: Sysctl tree to register on
1367 * @path: The path to the directory the sysctl table is in.
1368 * @table: the top-level table structure
1369 *
1370 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1371 * array. A completely 0 filled entry terminates the table.
1372 *
1373 * See __register_sysctl_table for more details.
1374 */
1375struct ctl_table_header *__register_sysctl_paths(
1376        struct ctl_table_set *set,
1377        const struct ctl_path *path, struct ctl_table *table)
1378{
1379        struct ctl_table *ctl_table_arg = table;
1380        int nr_subheaders = count_subheaders(table);
1381        struct ctl_table_header *header = NULL, **subheaders, **subheader;
1382        const struct ctl_path *component;
1383        char *new_path, *pos;
1384
1385        pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1386        if (!new_path)
1387                return NULL;
1388
1389        pos[0] = '\0';
1390        for (component = path; component->procname; component++) {
1391                pos = append_path(new_path, pos, component->procname);
1392                if (!pos)
1393                        goto out;
1394        }
1395        while (table->procname && table->child && !table[1].procname) {
1396                pos = append_path(new_path, pos, table->procname);
1397                if (!pos)
1398                        goto out;
1399                table = table->child;
1400        }
1401        if (nr_subheaders == 1) {
1402                header = __register_sysctl_table(set, new_path, table);
1403                if (header)
1404                        header->ctl_table_arg = ctl_table_arg;
1405        } else {
1406                header = kzalloc(sizeof(*header) +
1407                                 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1408                if (!header)
1409                        goto out;
1410
1411                subheaders = (struct ctl_table_header **) (header + 1);
1412                subheader = subheaders;
1413                header->ctl_table_arg = ctl_table_arg;
1414
1415                if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1416                                                set, table))
1417                        goto err_register_leaves;
1418        }
1419
1420out:
1421        kfree(new_path);
1422        return header;
1423
1424err_register_leaves:
1425        while (subheader > subheaders) {
1426                struct ctl_table_header *subh = *(--subheader);
1427                struct ctl_table *table = subh->ctl_table_arg;
1428                unregister_sysctl_table(subh);
1429                kfree(table);
1430        }
1431        kfree(header);
1432        header = NULL;
1433        goto out;
1434}
1435
1436/**
1437 * register_sysctl_table_path - register a sysctl table hierarchy
1438 * @path: The path to the directory the sysctl table is in.
1439 * @table: the top-level table structure
1440 *
1441 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1442 * array. A completely 0 filled entry terminates the table.
1443 *
1444 * See __register_sysctl_paths for more details.
1445 */
1446struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1447                                                struct ctl_table *table)
1448{
1449        return __register_sysctl_paths(&sysctl_table_root.default_set,
1450                                        path, table);
1451}
1452EXPORT_SYMBOL(register_sysctl_paths);
1453
1454/**
1455 * register_sysctl_table - register a sysctl table hierarchy
1456 * @table: the top-level table structure
1457 *
1458 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1459 * array. A completely 0 filled entry terminates the table.
1460 *
1461 * See register_sysctl_paths for more details.
1462 */
1463struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1464{
1465        static const struct ctl_path null_path[] = { {} };
1466
1467        return register_sysctl_paths(null_path, table);
1468}
1469EXPORT_SYMBOL(register_sysctl_table);
1470
1471static void put_links(struct ctl_table_header *header)
1472{
1473        struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1474        struct ctl_table_root *root = header->root;
1475        struct ctl_dir *parent = header->parent;
1476        struct ctl_dir *core_parent;
1477        struct ctl_table *entry;
1478
1479        if (header->set == root_set)
1480                return;
1481
1482        core_parent = xlate_dir(root_set, parent);
1483        if (IS_ERR(core_parent))
1484                return;
1485
1486        for (entry = header->ctl_table; entry->procname; entry++) {
1487                struct ctl_table_header *link_head;
1488                struct ctl_table *link;
1489                const char *name = entry->procname;
1490
1491                link = find_entry(&link_head, core_parent, name, strlen(name));
1492                if (link &&
1493                    ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1494                     (S_ISLNK(link->mode) && (link->data == root)))) {
1495                        drop_sysctl_table(link_head);
1496                }
1497                else {
1498                        pr_err("sysctl link missing during unregister: ");
1499                        sysctl_print_dir(parent);
1500                        pr_cont("/%s\n", name);
1501                }
1502        }
1503}
1504
1505static void drop_sysctl_table(struct ctl_table_header *header)
1506{
1507        struct ctl_dir *parent = header->parent;
1508
1509        if (--header->nreg)
1510                return;
1511
1512        put_links(header);
1513        start_unregistering(header);
1514        if (!--header->count)
1515                kfree_rcu(header, rcu);
1516
1517        if (parent)
1518                drop_sysctl_table(&parent->header);
1519}
1520
1521/**
1522 * unregister_sysctl_table - unregister a sysctl table hierarchy
1523 * @header: the header returned from register_sysctl_table
1524 *
1525 * Unregisters the sysctl table and all children. proc entries may not
1526 * actually be removed until they are no longer used by anyone.
1527 */
1528void unregister_sysctl_table(struct ctl_table_header * header)
1529{
1530        int nr_subheaders;
1531        might_sleep();
1532
1533        if (header == NULL)
1534                return;
1535
1536        nr_subheaders = count_subheaders(header->ctl_table_arg);
1537        if (unlikely(nr_subheaders > 1)) {
1538                struct ctl_table_header **subheaders;
1539                int i;
1540
1541                subheaders = (struct ctl_table_header **)(header + 1);
1542                for (i = nr_subheaders -1; i >= 0; i--) {
1543                        struct ctl_table_header *subh = subheaders[i];
1544                        struct ctl_table *table = subh->ctl_table_arg;
1545                        unregister_sysctl_table(subh);
1546                        kfree(table);
1547                }
1548                kfree(header);
1549                return;
1550        }
1551
1552        spin_lock(&sysctl_lock);
1553        drop_sysctl_table(header);
1554        spin_unlock(&sysctl_lock);
1555}
1556EXPORT_SYMBOL(unregister_sysctl_table);
1557
1558void setup_sysctl_set(struct ctl_table_set *set,
1559        struct ctl_table_root *root,
1560        int (*is_seen)(struct ctl_table_set *))
1561{
1562        memset(set, 0, sizeof(*set));
1563        set->is_seen = is_seen;
1564        init_header(&set->dir.header, root, set, NULL, root_table);
1565}
1566
1567void retire_sysctl_set(struct ctl_table_set *set)
1568{
1569        WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1570}
1571
1572int __init proc_sys_init(void)
1573{
1574        struct proc_dir_entry *proc_sys_root;
1575
1576        proc_sys_root = proc_mkdir("sys", NULL);
1577        proc_sys_root->proc_iops = &proc_sys_dir_operations;
1578        proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
1579        proc_sys_root->nlink = 0;
1580
1581        return sysctl_init();
1582}
1583