1
2
3
4
5#include <linux/init.h>
6#include <linux/sysctl.h>
7#include <linux/poll.h>
8#include <linux/proc_fs.h>
9#include <linux/printk.h>
10#include <linux/security.h>
11#include <linux/sched.h>
12#include <linux/cred.h>
13#include <linux/namei.h>
14#include <linux/mm.h>
15#include <linux/module.h>
16#include <linux/bpf-cgroup.h>
17#include "internal.h"
18
19static const struct dentry_operations proc_sys_dentry_operations;
20static const struct file_operations proc_sys_file_operations;
21static const struct inode_operations proc_sys_inode_operations;
22static const struct file_operations proc_sys_dir_file_operations;
23static const struct inode_operations proc_sys_dir_operations;
24
25
26const int sysctl_vals[] = { 0, 1, INT_MAX };
27EXPORT_SYMBOL(sysctl_vals);
28
29
30
31struct ctl_table sysctl_mount_point[] = {
32 { }
33};
34
35static bool is_empty_dir(struct ctl_table_header *head)
36{
37 return head->ctl_table[0].child == sysctl_mount_point;
38}
39
40static void set_empty_dir(struct ctl_dir *dir)
41{
42 dir->header.ctl_table[0].child = sysctl_mount_point;
43}
44
45static void clear_empty_dir(struct ctl_dir *dir)
46
47{
48 dir->header.ctl_table[0].child = NULL;
49}
50
51void proc_sys_poll_notify(struct ctl_table_poll *poll)
52{
53 if (!poll)
54 return;
55
56 atomic_inc(&poll->event);
57 wake_up_interruptible(&poll->wait);
58}
59
60static struct ctl_table root_table[] = {
61 {
62 .procname = "",
63 .mode = S_IFDIR|S_IRUGO|S_IXUGO,
64 },
65 { }
66};
67static struct ctl_table_root sysctl_table_root = {
68 .default_set.dir.header = {
69 {{.count = 1,
70 .nreg = 1,
71 .ctl_table = root_table }},
72 .ctl_table_arg = root_table,
73 .root = &sysctl_table_root,
74 .set = &sysctl_table_root.default_set,
75 },
76};
77
78static DEFINE_SPINLOCK(sysctl_lock);
79
80static void drop_sysctl_table(struct ctl_table_header *header);
81static int sysctl_follow_link(struct ctl_table_header **phead,
82 struct ctl_table **pentry);
83static int insert_links(struct ctl_table_header *head);
84static void put_links(struct ctl_table_header *header);
85
86static void sysctl_print_dir(struct ctl_dir *dir)
87{
88 if (dir->header.parent)
89 sysctl_print_dir(dir->header.parent);
90 pr_cont("%s/", dir->header.ctl_table[0].procname);
91}
92
93static int namecmp(const char *name1, int len1, const char *name2, int len2)
94{
95 int minlen;
96 int cmp;
97
98 minlen = len1;
99 if (minlen > len2)
100 minlen = len2;
101
102 cmp = memcmp(name1, name2, minlen);
103 if (cmp == 0)
104 cmp = len1 - len2;
105 return cmp;
106}
107
108
109static struct ctl_table *find_entry(struct ctl_table_header **phead,
110 struct ctl_dir *dir, const char *name, int namelen)
111{
112 struct ctl_table_header *head;
113 struct ctl_table *entry;
114 struct rb_node *node = dir->root.rb_node;
115
116 while (node)
117 {
118 struct ctl_node *ctl_node;
119 const char *procname;
120 int cmp;
121
122 ctl_node = rb_entry(node, struct ctl_node, node);
123 head = ctl_node->header;
124 entry = &head->ctl_table[ctl_node - head->node];
125 procname = entry->procname;
126
127 cmp = namecmp(name, namelen, procname, strlen(procname));
128 if (cmp < 0)
129 node = node->rb_left;
130 else if (cmp > 0)
131 node = node->rb_right;
132 else {
133 *phead = head;
134 return entry;
135 }
136 }
137 return NULL;
138}
139
140static int insert_entry(struct ctl_table_header *head, struct ctl_table *entry)
141{
142 struct rb_node *node = &head->node[entry - head->ctl_table].node;
143 struct rb_node **p = &head->parent->root.rb_node;
144 struct rb_node *parent = NULL;
145 const char *name = entry->procname;
146 int namelen = strlen(name);
147
148 while (*p) {
149 struct ctl_table_header *parent_head;
150 struct ctl_table *parent_entry;
151 struct ctl_node *parent_node;
152 const char *parent_name;
153 int cmp;
154
155 parent = *p;
156 parent_node = rb_entry(parent, struct ctl_node, node);
157 parent_head = parent_node->header;
158 parent_entry = &parent_head->ctl_table[parent_node - parent_head->node];
159 parent_name = parent_entry->procname;
160
161 cmp = namecmp(name, namelen, parent_name, strlen(parent_name));
162 if (cmp < 0)
163 p = &(*p)->rb_left;
164 else if (cmp > 0)
165 p = &(*p)->rb_right;
166 else {
167 pr_err("sysctl duplicate entry: ");
168 sysctl_print_dir(head->parent);
169 pr_cont("/%s\n", entry->procname);
170 return -EEXIST;
171 }
172 }
173
174 rb_link_node(node, parent, p);
175 rb_insert_color(node, &head->parent->root);
176 return 0;
177}
178
179static void erase_entry(struct ctl_table_header *head, struct ctl_table *entry)
180{
181 struct rb_node *node = &head->node[entry - head->ctl_table].node;
182
183 rb_erase(node, &head->parent->root);
184}
185
186static void init_header(struct ctl_table_header *head,
187 struct ctl_table_root *root, struct ctl_table_set *set,
188 struct ctl_node *node, struct ctl_table *table)
189{
190 head->ctl_table = table;
191 head->ctl_table_arg = table;
192 head->used = 0;
193 head->count = 1;
194 head->nreg = 1;
195 head->unregistering = NULL;
196 head->root = root;
197 head->set = set;
198 head->parent = NULL;
199 head->node = node;
200 INIT_HLIST_HEAD(&head->inodes);
201 if (node) {
202 struct ctl_table *entry;
203 for (entry = table; entry->procname; entry++, node++)
204 node->header = head;
205 }
206}
207
208static void erase_header(struct ctl_table_header *head)
209{
210 struct ctl_table *entry;
211 for (entry = head->ctl_table; entry->procname; entry++)
212 erase_entry(head, entry);
213}
214
215static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
216{
217 struct ctl_table *entry;
218 int err;
219
220
221 if (is_empty_dir(&dir->header))
222 return -EROFS;
223
224
225 if (header->ctl_table == sysctl_mount_point) {
226 if (!RB_EMPTY_ROOT(&dir->root))
227 return -EINVAL;
228 set_empty_dir(dir);
229 }
230
231 dir->header.nreg++;
232 header->parent = dir;
233 err = insert_links(header);
234 if (err)
235 goto fail_links;
236 for (entry = header->ctl_table; entry->procname; entry++) {
237 err = insert_entry(header, entry);
238 if (err)
239 goto fail;
240 }
241 return 0;
242fail:
243 erase_header(header);
244 put_links(header);
245fail_links:
246 if (header->ctl_table == sysctl_mount_point)
247 clear_empty_dir(dir);
248 header->parent = NULL;
249 drop_sysctl_table(&dir->header);
250 return err;
251}
252
253
254static int use_table(struct ctl_table_header *p)
255{
256 if (unlikely(p->unregistering))
257 return 0;
258 p->used++;
259 return 1;
260}
261
262
263static void unuse_table(struct ctl_table_header *p)
264{
265 if (!--p->used)
266 if (unlikely(p->unregistering))
267 complete(p->unregistering);
268}
269
270static void proc_sys_invalidate_dcache(struct ctl_table_header *head)
271{
272 proc_invalidate_siblings_dcache(&head->inodes, &sysctl_lock);
273}
274
275
276static void start_unregistering(struct ctl_table_header *p)
277{
278
279
280
281
282 if (unlikely(p->used)) {
283 struct completion wait;
284 init_completion(&wait);
285 p->unregistering = &wait;
286 spin_unlock(&sysctl_lock);
287 wait_for_completion(&wait);
288 } else {
289
290 p->unregistering = ERR_PTR(-EINVAL);
291 spin_unlock(&sysctl_lock);
292 }
293
294
295
296
297 proc_sys_invalidate_dcache(p);
298
299
300
301
302 spin_lock(&sysctl_lock);
303 erase_header(p);
304}
305
306static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
307{
308 BUG_ON(!head);
309 spin_lock(&sysctl_lock);
310 if (!use_table(head))
311 head = ERR_PTR(-ENOENT);
312 spin_unlock(&sysctl_lock);
313 return head;
314}
315
316static void sysctl_head_finish(struct ctl_table_header *head)
317{
318 if (!head)
319 return;
320 spin_lock(&sysctl_lock);
321 unuse_table(head);
322 spin_unlock(&sysctl_lock);
323}
324
325static struct ctl_table_set *
326lookup_header_set(struct ctl_table_root *root)
327{
328 struct ctl_table_set *set = &root->default_set;
329 if (root->lookup)
330 set = root->lookup(root);
331 return set;
332}
333
334static struct ctl_table *lookup_entry(struct ctl_table_header **phead,
335 struct ctl_dir *dir,
336 const char *name, int namelen)
337{
338 struct ctl_table_header *head;
339 struct ctl_table *entry;
340
341 spin_lock(&sysctl_lock);
342 entry = find_entry(&head, dir, name, namelen);
343 if (entry && use_table(head))
344 *phead = head;
345 else
346 entry = NULL;
347 spin_unlock(&sysctl_lock);
348 return entry;
349}
350
351static struct ctl_node *first_usable_entry(struct rb_node *node)
352{
353 struct ctl_node *ctl_node;
354
355 for (;node; node = rb_next(node)) {
356 ctl_node = rb_entry(node, struct ctl_node, node);
357 if (use_table(ctl_node->header))
358 return ctl_node;
359 }
360 return NULL;
361}
362
363static void first_entry(struct ctl_dir *dir,
364 struct ctl_table_header **phead, struct ctl_table **pentry)
365{
366 struct ctl_table_header *head = NULL;
367 struct ctl_table *entry = NULL;
368 struct ctl_node *ctl_node;
369
370 spin_lock(&sysctl_lock);
371 ctl_node = first_usable_entry(rb_first(&dir->root));
372 spin_unlock(&sysctl_lock);
373 if (ctl_node) {
374 head = ctl_node->header;
375 entry = &head->ctl_table[ctl_node - head->node];
376 }
377 *phead = head;
378 *pentry = entry;
379}
380
381static void next_entry(struct ctl_table_header **phead, struct ctl_table **pentry)
382{
383 struct ctl_table_header *head = *phead;
384 struct ctl_table *entry = *pentry;
385 struct ctl_node *ctl_node = &head->node[entry - head->ctl_table];
386
387 spin_lock(&sysctl_lock);
388 unuse_table(head);
389
390 ctl_node = first_usable_entry(rb_next(&ctl_node->node));
391 spin_unlock(&sysctl_lock);
392 head = NULL;
393 if (ctl_node) {
394 head = ctl_node->header;
395 entry = &head->ctl_table[ctl_node - head->node];
396 }
397 *phead = head;
398 *pentry = entry;
399}
400
401
402
403
404
405
406static int test_perm(int mode, int op)
407{
408 if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
409 mode >>= 6;
410 else if (in_egroup_p(GLOBAL_ROOT_GID))
411 mode >>= 3;
412 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
413 return 0;
414 return -EACCES;
415}
416
417static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, int op)
418{
419 struct ctl_table_root *root = head->root;
420 int mode;
421
422 if (root->permissions)
423 mode = root->permissions(head, table);
424 else
425 mode = table->mode;
426
427 return test_perm(mode, op);
428}
429
430static struct inode *proc_sys_make_inode(struct super_block *sb,
431 struct ctl_table_header *head, struct ctl_table *table)
432{
433 struct ctl_table_root *root = head->root;
434 struct inode *inode;
435 struct proc_inode *ei;
436
437 inode = new_inode(sb);
438 if (!inode)
439 return ERR_PTR(-ENOMEM);
440
441 inode->i_ino = get_next_ino();
442
443 ei = PROC_I(inode);
444
445 spin_lock(&sysctl_lock);
446 if (unlikely(head->unregistering)) {
447 spin_unlock(&sysctl_lock);
448 iput(inode);
449 return ERR_PTR(-ENOENT);
450 }
451 ei->sysctl = head;
452 ei->sysctl_entry = table;
453 hlist_add_head_rcu(&ei->sibling_inodes, &head->inodes);
454 head->count++;
455 spin_unlock(&sysctl_lock);
456
457 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
458 inode->i_mode = table->mode;
459 if (!S_ISDIR(table->mode)) {
460 inode->i_mode |= S_IFREG;
461 inode->i_op = &proc_sys_inode_operations;
462 inode->i_fop = &proc_sys_file_operations;
463 } else {
464 inode->i_mode |= S_IFDIR;
465 inode->i_op = &proc_sys_dir_operations;
466 inode->i_fop = &proc_sys_dir_file_operations;
467 if (is_empty_dir(head))
468 make_empty_dir_inode(inode);
469 }
470
471 if (root->set_ownership)
472 root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
473 else {
474 inode->i_uid = GLOBAL_ROOT_UID;
475 inode->i_gid = GLOBAL_ROOT_GID;
476 }
477
478 return inode;
479}
480
481void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
482{
483 spin_lock(&sysctl_lock);
484 hlist_del_init_rcu(&PROC_I(inode)->sibling_inodes);
485 if (!--head->count)
486 kfree_rcu(head, rcu);
487 spin_unlock(&sysctl_lock);
488}
489
490static struct ctl_table_header *grab_header(struct inode *inode)
491{
492 struct ctl_table_header *head = PROC_I(inode)->sysctl;
493 if (!head)
494 head = &sysctl_table_root.default_set.dir.header;
495 return sysctl_head_grab(head);
496}
497
498static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
499 unsigned int flags)
500{
501 struct ctl_table_header *head = grab_header(dir);
502 struct ctl_table_header *h = NULL;
503 const struct qstr *name = &dentry->d_name;
504 struct ctl_table *p;
505 struct inode *inode;
506 struct dentry *err = ERR_PTR(-ENOENT);
507 struct ctl_dir *ctl_dir;
508 int ret;
509
510 if (IS_ERR(head))
511 return ERR_CAST(head);
512
513 ctl_dir = container_of(head, struct ctl_dir, header);
514
515 p = lookup_entry(&h, ctl_dir, name->name, name->len);
516 if (!p)
517 goto out;
518
519 if (S_ISLNK(p->mode)) {
520 ret = sysctl_follow_link(&h, &p);
521 err = ERR_PTR(ret);
522 if (ret)
523 goto out;
524 }
525
526 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
527 if (IS_ERR(inode)) {
528 err = ERR_CAST(inode);
529 goto out;
530 }
531
532 d_set_d_op(dentry, &proc_sys_dentry_operations);
533 err = d_splice_alias(inode, dentry);
534
535out:
536 if (h)
537 sysctl_head_finish(h);
538 sysctl_head_finish(head);
539 return err;
540}
541
542static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
543 size_t count, loff_t *ppos, int write)
544{
545 struct inode *inode = file_inode(filp);
546 struct ctl_table_header *head = grab_header(inode);
547 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
548 void *new_buf = NULL;
549 ssize_t error;
550
551 if (IS_ERR(head))
552 return PTR_ERR(head);
553
554
555
556
557
558 error = -EPERM;
559 if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
560 goto out;
561
562
563 error = -EINVAL;
564 if (!table->proc_handler)
565 goto out;
566
567 error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
568 ppos, &new_buf);
569 if (error)
570 goto out;
571
572
573 if (new_buf) {
574 mm_segment_t old_fs;
575
576 old_fs = get_fs();
577 set_fs(KERNEL_DS);
578 error = table->proc_handler(table, write, (void __user *)new_buf,
579 &count, ppos);
580 set_fs(old_fs);
581 kfree(new_buf);
582 } else {
583 error = table->proc_handler(table, write, buf, &count, ppos);
584 }
585
586 if (!error)
587 error = count;
588out:
589 sysctl_head_finish(head);
590
591 return error;
592}
593
594static ssize_t proc_sys_read(struct file *filp, char __user *buf,
595 size_t count, loff_t *ppos)
596{
597 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
598}
599
600static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
601 size_t count, loff_t *ppos)
602{
603 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
604}
605
606static int proc_sys_open(struct inode *inode, struct file *filp)
607{
608 struct ctl_table_header *head = grab_header(inode);
609 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
610
611
612 if (IS_ERR(head))
613 return PTR_ERR(head);
614
615 if (table->poll)
616 filp->private_data = proc_sys_poll_event(table->poll);
617
618 sysctl_head_finish(head);
619
620 return 0;
621}
622
623static __poll_t proc_sys_poll(struct file *filp, poll_table *wait)
624{
625 struct inode *inode = file_inode(filp);
626 struct ctl_table_header *head = grab_header(inode);
627 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
628 __poll_t ret = DEFAULT_POLLMASK;
629 unsigned long event;
630
631
632 if (IS_ERR(head))
633 return EPOLLERR | EPOLLHUP;
634
635 if (!table->proc_handler)
636 goto out;
637
638 if (!table->poll)
639 goto out;
640
641 event = (unsigned long)filp->private_data;
642 poll_wait(filp, &table->poll->wait, wait);
643
644 if (event != atomic_read(&table->poll->event)) {
645 filp->private_data = proc_sys_poll_event(table->poll);
646 ret = EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI;
647 }
648
649out:
650 sysctl_head_finish(head);
651
652 return ret;
653}
654
655static bool proc_sys_fill_cache(struct file *file,
656 struct dir_context *ctx,
657 struct ctl_table_header *head,
658 struct ctl_table *table)
659{
660 struct dentry *child, *dir = file->f_path.dentry;
661 struct inode *inode;
662 struct qstr qname;
663 ino_t ino = 0;
664 unsigned type = DT_UNKNOWN;
665
666 qname.name = table->procname;
667 qname.len = strlen(table->procname);
668 qname.hash = full_name_hash(dir, qname.name, qname.len);
669
670 child = d_lookup(dir, &qname);
671 if (!child) {
672 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
673 child = d_alloc_parallel(dir, &qname, &wq);
674 if (IS_ERR(child))
675 return false;
676 if (d_in_lookup(child)) {
677 struct dentry *res;
678 inode = proc_sys_make_inode(dir->d_sb, head, table);
679 if (IS_ERR(inode)) {
680 d_lookup_done(child);
681 dput(child);
682 return false;
683 }
684 d_set_d_op(child, &proc_sys_dentry_operations);
685 res = d_splice_alias(inode, child);
686 d_lookup_done(child);
687 if (unlikely(res)) {
688 if (IS_ERR(res)) {
689 dput(child);
690 return false;
691 }
692 dput(child);
693 child = res;
694 }
695 }
696 }
697 inode = d_inode(child);
698 ino = inode->i_ino;
699 type = inode->i_mode >> 12;
700 dput(child);
701 return dir_emit(ctx, qname.name, qname.len, ino, type);
702}
703
704static bool proc_sys_link_fill_cache(struct file *file,
705 struct dir_context *ctx,
706 struct ctl_table_header *head,
707 struct ctl_table *table)
708{
709 bool ret = true;
710
711 head = sysctl_head_grab(head);
712 if (IS_ERR(head))
713 return false;
714
715
716 if (sysctl_follow_link(&head, &table))
717 goto out;
718
719 ret = proc_sys_fill_cache(file, ctx, head, table);
720out:
721 sysctl_head_finish(head);
722 return ret;
723}
724
725static int scan(struct ctl_table_header *head, struct ctl_table *table,
726 unsigned long *pos, struct file *file,
727 struct dir_context *ctx)
728{
729 bool res;
730
731 if ((*pos)++ < ctx->pos)
732 return true;
733
734 if (unlikely(S_ISLNK(table->mode)))
735 res = proc_sys_link_fill_cache(file, ctx, head, table);
736 else
737 res = proc_sys_fill_cache(file, ctx, head, table);
738
739 if (res)
740 ctx->pos = *pos;
741
742 return res;
743}
744
745static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
746{
747 struct ctl_table_header *head = grab_header(file_inode(file));
748 struct ctl_table_header *h = NULL;
749 struct ctl_table *entry;
750 struct ctl_dir *ctl_dir;
751 unsigned long pos;
752
753 if (IS_ERR(head))
754 return PTR_ERR(head);
755
756 ctl_dir = container_of(head, struct ctl_dir, header);
757
758 if (!dir_emit_dots(file, ctx))
759 goto out;
760
761 pos = 2;
762
763 for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
764 if (!scan(h, entry, &pos, file, ctx)) {
765 sysctl_head_finish(h);
766 break;
767 }
768 }
769out:
770 sysctl_head_finish(head);
771 return 0;
772}
773
774static int proc_sys_permission(struct inode *inode, int mask)
775{
776
777
778
779
780 struct ctl_table_header *head;
781 struct ctl_table *table;
782 int error;
783
784
785 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
786 return -EACCES;
787
788 head = grab_header(inode);
789 if (IS_ERR(head))
790 return PTR_ERR(head);
791
792 table = PROC_I(inode)->sysctl_entry;
793 if (!table)
794 error = mask & MAY_WRITE ? -EACCES : 0;
795 else
796 error = sysctl_perm(head, table, mask & ~MAY_NOT_BLOCK);
797
798 sysctl_head_finish(head);
799 return error;
800}
801
802static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
803{
804 struct inode *inode = d_inode(dentry);
805 int error;
806
807 if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
808 return -EPERM;
809
810 error = setattr_prepare(dentry, attr);
811 if (error)
812 return error;
813
814 setattr_copy(inode, attr);
815 mark_inode_dirty(inode);
816 return 0;
817}
818
819static int proc_sys_getattr(const struct path *path, struct kstat *stat,
820 u32 request_mask, unsigned int query_flags)
821{
822 struct inode *inode = d_inode(path->dentry);
823 struct ctl_table_header *head = grab_header(inode);
824 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
825
826 if (IS_ERR(head))
827 return PTR_ERR(head);
828
829 generic_fillattr(inode, stat);
830 if (table)
831 stat->mode = (stat->mode & S_IFMT) | table->mode;
832
833 sysctl_head_finish(head);
834 return 0;
835}
836
837static const struct file_operations proc_sys_file_operations = {
838 .open = proc_sys_open,
839 .poll = proc_sys_poll,
840 .read = proc_sys_read,
841 .write = proc_sys_write,
842 .llseek = default_llseek,
843};
844
845static const struct file_operations proc_sys_dir_file_operations = {
846 .read = generic_read_dir,
847 .iterate_shared = proc_sys_readdir,
848 .llseek = generic_file_llseek,
849};
850
851static const struct inode_operations proc_sys_inode_operations = {
852 .permission = proc_sys_permission,
853 .setattr = proc_sys_setattr,
854 .getattr = proc_sys_getattr,
855};
856
857static const struct inode_operations proc_sys_dir_operations = {
858 .lookup = proc_sys_lookup,
859 .permission = proc_sys_permission,
860 .setattr = proc_sys_setattr,
861 .getattr = proc_sys_getattr,
862};
863
864static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
865{
866 if (flags & LOOKUP_RCU)
867 return -ECHILD;
868 return !PROC_I(d_inode(dentry))->sysctl->unregistering;
869}
870
871static int proc_sys_delete(const struct dentry *dentry)
872{
873 return !!PROC_I(d_inode(dentry))->sysctl->unregistering;
874}
875
876static int sysctl_is_seen(struct ctl_table_header *p)
877{
878 struct ctl_table_set *set = p->set;
879 int res;
880 spin_lock(&sysctl_lock);
881 if (p->unregistering)
882 res = 0;
883 else if (!set->is_seen)
884 res = 1;
885 else
886 res = set->is_seen(set);
887 spin_unlock(&sysctl_lock);
888 return res;
889}
890
891static int proc_sys_compare(const struct dentry *dentry,
892 unsigned int len, const char *str, const struct qstr *name)
893{
894 struct ctl_table_header *head;
895 struct inode *inode;
896
897
898
899
900 inode = d_inode_rcu(dentry);
901 if (!inode)
902 return 1;
903 if (name->len != len)
904 return 1;
905 if (memcmp(name->name, str, len))
906 return 1;
907 head = rcu_dereference(PROC_I(inode)->sysctl);
908 return !head || !sysctl_is_seen(head);
909}
910
911static const struct dentry_operations proc_sys_dentry_operations = {
912 .d_revalidate = proc_sys_revalidate,
913 .d_delete = proc_sys_delete,
914 .d_compare = proc_sys_compare,
915};
916
917static struct ctl_dir *find_subdir(struct ctl_dir *dir,
918 const char *name, int namelen)
919{
920 struct ctl_table_header *head;
921 struct ctl_table *entry;
922
923 entry = find_entry(&head, dir, name, namelen);
924 if (!entry)
925 return ERR_PTR(-ENOENT);
926 if (!S_ISDIR(entry->mode))
927 return ERR_PTR(-ENOTDIR);
928 return container_of(head, struct ctl_dir, header);
929}
930
931static struct ctl_dir *new_dir(struct ctl_table_set *set,
932 const char *name, int namelen)
933{
934 struct ctl_table *table;
935 struct ctl_dir *new;
936 struct ctl_node *node;
937 char *new_name;
938
939 new = kzalloc(sizeof(*new) + sizeof(struct ctl_node) +
940 sizeof(struct ctl_table)*2 + namelen + 1,
941 GFP_KERNEL);
942 if (!new)
943 return NULL;
944
945 node = (struct ctl_node *)(new + 1);
946 table = (struct ctl_table *)(node + 1);
947 new_name = (char *)(table + 2);
948 memcpy(new_name, name, namelen);
949 new_name[namelen] = '\0';
950 table[0].procname = new_name;
951 table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
952 init_header(&new->header, set->dir.header.root, set, node, table);
953
954 return new;
955}
956
957
958
959
960
961
962
963
964
965
966
967
968
969static struct ctl_dir *get_subdir(struct ctl_dir *dir,
970 const char *name, int namelen)
971{
972 struct ctl_table_set *set = dir->header.set;
973 struct ctl_dir *subdir, *new = NULL;
974 int err;
975
976 spin_lock(&sysctl_lock);
977 subdir = find_subdir(dir, name, namelen);
978 if (!IS_ERR(subdir))
979 goto found;
980 if (PTR_ERR(subdir) != -ENOENT)
981 goto failed;
982
983 spin_unlock(&sysctl_lock);
984 new = new_dir(set, name, namelen);
985 spin_lock(&sysctl_lock);
986 subdir = ERR_PTR(-ENOMEM);
987 if (!new)
988 goto failed;
989
990
991 subdir = find_subdir(dir, name, namelen);
992 if (!IS_ERR(subdir))
993 goto found;
994 if (PTR_ERR(subdir) != -ENOENT)
995 goto failed;
996
997
998 err = insert_header(dir, &new->header);
999 subdir = ERR_PTR(err);
1000 if (err)
1001 goto failed;
1002 subdir = new;
1003found:
1004 subdir->header.nreg++;
1005failed:
1006 if (IS_ERR(subdir)) {
1007 pr_err("sysctl could not get directory: ");
1008 sysctl_print_dir(dir);
1009 pr_cont("/%*.*s %ld\n",
1010 namelen, namelen, name, PTR_ERR(subdir));
1011 }
1012 drop_sysctl_table(&dir->header);
1013 if (new)
1014 drop_sysctl_table(&new->header);
1015 spin_unlock(&sysctl_lock);
1016 return subdir;
1017}
1018
1019static struct ctl_dir *xlate_dir(struct ctl_table_set *set, struct ctl_dir *dir)
1020{
1021 struct ctl_dir *parent;
1022 const char *procname;
1023 if (!dir->header.parent)
1024 return &set->dir;
1025 parent = xlate_dir(set, dir->header.parent);
1026 if (IS_ERR(parent))
1027 return parent;
1028 procname = dir->header.ctl_table[0].procname;
1029 return find_subdir(parent, procname, strlen(procname));
1030}
1031
1032static int sysctl_follow_link(struct ctl_table_header **phead,
1033 struct ctl_table **pentry)
1034{
1035 struct ctl_table_header *head;
1036 struct ctl_table_root *root;
1037 struct ctl_table_set *set;
1038 struct ctl_table *entry;
1039 struct ctl_dir *dir;
1040 int ret;
1041
1042 ret = 0;
1043 spin_lock(&sysctl_lock);
1044 root = (*pentry)->data;
1045 set = lookup_header_set(root);
1046 dir = xlate_dir(set, (*phead)->parent);
1047 if (IS_ERR(dir))
1048 ret = PTR_ERR(dir);
1049 else {
1050 const char *procname = (*pentry)->procname;
1051 head = NULL;
1052 entry = find_entry(&head, dir, procname, strlen(procname));
1053 ret = -ENOENT;
1054 if (entry && use_table(head)) {
1055 unuse_table(*phead);
1056 *phead = head;
1057 *pentry = entry;
1058 ret = 0;
1059 }
1060 }
1061
1062 spin_unlock(&sysctl_lock);
1063 return ret;
1064}
1065
1066static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...)
1067{
1068 struct va_format vaf;
1069 va_list args;
1070
1071 va_start(args, fmt);
1072 vaf.fmt = fmt;
1073 vaf.va = &args;
1074
1075 pr_err("sysctl table check failed: %s/%s %pV\n",
1076 path, table->procname, &vaf);
1077
1078 va_end(args);
1079 return -EINVAL;
1080}
1081
1082static int sysctl_check_table_array(const char *path, struct ctl_table *table)
1083{
1084 int err = 0;
1085
1086 if ((table->proc_handler == proc_douintvec) ||
1087 (table->proc_handler == proc_douintvec_minmax)) {
1088 if (table->maxlen != sizeof(unsigned int))
1089 err |= sysctl_err(path, table, "array not allowed");
1090 }
1091
1092 return err;
1093}
1094
1095static int sysctl_check_table(const char *path, struct ctl_table *table)
1096{
1097 int err = 0;
1098 for (; table->procname; table++) {
1099 if (table->child)
1100 err |= sysctl_err(path, table, "Not a file");
1101
1102 if ((table->proc_handler == proc_dostring) ||
1103 (table->proc_handler == proc_dointvec) ||
1104 (table->proc_handler == proc_douintvec) ||
1105 (table->proc_handler == proc_douintvec_minmax) ||
1106 (table->proc_handler == proc_dointvec_minmax) ||
1107 (table->proc_handler == proc_dointvec_jiffies) ||
1108 (table->proc_handler == proc_dointvec_userhz_jiffies) ||
1109 (table->proc_handler == proc_dointvec_ms_jiffies) ||
1110 (table->proc_handler == proc_doulongvec_minmax) ||
1111 (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
1112 if (!table->data)
1113 err |= sysctl_err(path, table, "No data");
1114 if (!table->maxlen)
1115 err |= sysctl_err(path, table, "No maxlen");
1116 else
1117 err |= sysctl_check_table_array(path, table);
1118 }
1119 if (!table->proc_handler)
1120 err |= sysctl_err(path, table, "No proc_handler");
1121
1122 if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
1123 err |= sysctl_err(path, table, "bogus .mode 0%o",
1124 table->mode);
1125 }
1126 return err;
1127}
1128
1129static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table *table,
1130 struct ctl_table_root *link_root)
1131{
1132 struct ctl_table *link_table, *entry, *link;
1133 struct ctl_table_header *links;
1134 struct ctl_node *node;
1135 char *link_name;
1136 int nr_entries, name_bytes;
1137
1138 name_bytes = 0;
1139 nr_entries = 0;
1140 for (entry = table; entry->procname; entry++) {
1141 nr_entries++;
1142 name_bytes += strlen(entry->procname) + 1;
1143 }
1144
1145 links = kzalloc(sizeof(struct ctl_table_header) +
1146 sizeof(struct ctl_node)*nr_entries +
1147 sizeof(struct ctl_table)*(nr_entries + 1) +
1148 name_bytes,
1149 GFP_KERNEL);
1150
1151 if (!links)
1152 return NULL;
1153
1154 node = (struct ctl_node *)(links + 1);
1155 link_table = (struct ctl_table *)(node + nr_entries);
1156 link_name = (char *)&link_table[nr_entries + 1];
1157
1158 for (link = link_table, entry = table; entry->procname; link++, entry++) {
1159 int len = strlen(entry->procname) + 1;
1160 memcpy(link_name, entry->procname, len);
1161 link->procname = link_name;
1162 link->mode = S_IFLNK|S_IRWXUGO;
1163 link->data = link_root;
1164 link_name += len;
1165 }
1166 init_header(links, dir->header.root, dir->header.set, node, link_table);
1167 links->nreg = nr_entries;
1168
1169 return links;
1170}
1171
1172static bool get_links(struct ctl_dir *dir,
1173 struct ctl_table *table, struct ctl_table_root *link_root)
1174{
1175 struct ctl_table_header *head;
1176 struct ctl_table *entry, *link;
1177
1178
1179 for (entry = table; entry->procname; entry++) {
1180 const char *procname = entry->procname;
1181 link = find_entry(&head, dir, procname, strlen(procname));
1182 if (!link)
1183 return false;
1184 if (S_ISDIR(link->mode) && S_ISDIR(entry->mode))
1185 continue;
1186 if (S_ISLNK(link->mode) && (link->data == link_root))
1187 continue;
1188 return false;
1189 }
1190
1191
1192 for (entry = table; entry->procname; entry++) {
1193 const char *procname = entry->procname;
1194 link = find_entry(&head, dir, procname, strlen(procname));
1195 head->nreg++;
1196 }
1197 return true;
1198}
1199
1200static int insert_links(struct ctl_table_header *head)
1201{
1202 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1203 struct ctl_dir *core_parent = NULL;
1204 struct ctl_table_header *links;
1205 int err;
1206
1207 if (head->set == root_set)
1208 return 0;
1209
1210 core_parent = xlate_dir(root_set, head->parent);
1211 if (IS_ERR(core_parent))
1212 return 0;
1213
1214 if (get_links(core_parent, head->ctl_table, head->root))
1215 return 0;
1216
1217 core_parent->header.nreg++;
1218 spin_unlock(&sysctl_lock);
1219
1220 links = new_links(core_parent, head->ctl_table, head->root);
1221
1222 spin_lock(&sysctl_lock);
1223 err = -ENOMEM;
1224 if (!links)
1225 goto out;
1226
1227 err = 0;
1228 if (get_links(core_parent, head->ctl_table, head->root)) {
1229 kfree(links);
1230 goto out;
1231 }
1232
1233 err = insert_header(core_parent, links);
1234 if (err)
1235 kfree(links);
1236out:
1237 drop_sysctl_table(&core_parent->header);
1238 return err;
1239}
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283struct ctl_table_header *__register_sysctl_table(
1284 struct ctl_table_set *set,
1285 const char *path, struct ctl_table *table)
1286{
1287 struct ctl_table_root *root = set->dir.header.root;
1288 struct ctl_table_header *header;
1289 const char *name, *nextname;
1290 struct ctl_dir *dir;
1291 struct ctl_table *entry;
1292 struct ctl_node *node;
1293 int nr_entries = 0;
1294
1295 for (entry = table; entry->procname; entry++)
1296 nr_entries++;
1297
1298 header = kzalloc(sizeof(struct ctl_table_header) +
1299 sizeof(struct ctl_node)*nr_entries, GFP_KERNEL);
1300 if (!header)
1301 return NULL;
1302
1303 node = (struct ctl_node *)(header + 1);
1304 init_header(header, root, set, node, table);
1305 if (sysctl_check_table(path, table))
1306 goto fail;
1307
1308 spin_lock(&sysctl_lock);
1309 dir = &set->dir;
1310
1311 dir->header.nreg++;
1312 spin_unlock(&sysctl_lock);
1313
1314
1315 for (name = path; name; name = nextname) {
1316 int namelen;
1317 nextname = strchr(name, '/');
1318 if (nextname) {
1319 namelen = nextname - name;
1320 nextname++;
1321 } else {
1322 namelen = strlen(name);
1323 }
1324 if (namelen == 0)
1325 continue;
1326
1327 dir = get_subdir(dir, name, namelen);
1328 if (IS_ERR(dir))
1329 goto fail;
1330 }
1331
1332 spin_lock(&sysctl_lock);
1333 if (insert_header(dir, header))
1334 goto fail_put_dir_locked;
1335
1336 drop_sysctl_table(&dir->header);
1337 spin_unlock(&sysctl_lock);
1338
1339 return header;
1340
1341fail_put_dir_locked:
1342 drop_sysctl_table(&dir->header);
1343 spin_unlock(&sysctl_lock);
1344fail:
1345 kfree(header);
1346 dump_stack();
1347 return NULL;
1348}
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table)
1361{
1362 return __register_sysctl_table(&sysctl_table_root.default_set,
1363 path, table);
1364}
1365EXPORT_SYMBOL(register_sysctl);
1366
1367static char *append_path(const char *path, char *pos, const char *name)
1368{
1369 int namelen;
1370 namelen = strlen(name);
1371 if (((pos - path) + namelen + 2) >= PATH_MAX)
1372 return NULL;
1373 memcpy(pos, name, namelen);
1374 pos[namelen] = '/';
1375 pos[namelen + 1] = '\0';
1376 pos += namelen + 1;
1377 return pos;
1378}
1379
1380static int count_subheaders(struct ctl_table *table)
1381{
1382 int has_files = 0;
1383 int nr_subheaders = 0;
1384 struct ctl_table *entry;
1385
1386
1387 if (!table || !table->procname)
1388 return 1;
1389
1390 for (entry = table; entry->procname; entry++) {
1391 if (entry->child)
1392 nr_subheaders += count_subheaders(entry->child);
1393 else
1394 has_files = 1;
1395 }
1396 return nr_subheaders + has_files;
1397}
1398
1399static int register_leaf_sysctl_tables(const char *path, char *pos,
1400 struct ctl_table_header ***subheader, struct ctl_table_set *set,
1401 struct ctl_table *table)
1402{
1403 struct ctl_table *ctl_table_arg = NULL;
1404 struct ctl_table *entry, *files;
1405 int nr_files = 0;
1406 int nr_dirs = 0;
1407 int err = -ENOMEM;
1408
1409 for (entry = table; entry->procname; entry++) {
1410 if (entry->child)
1411 nr_dirs++;
1412 else
1413 nr_files++;
1414 }
1415
1416 files = table;
1417
1418 if (nr_dirs && nr_files) {
1419 struct ctl_table *new;
1420 files = kcalloc(nr_files + 1, sizeof(struct ctl_table),
1421 GFP_KERNEL);
1422 if (!files)
1423 goto out;
1424
1425 ctl_table_arg = files;
1426 for (new = files, entry = table; entry->procname; entry++) {
1427 if (entry->child)
1428 continue;
1429 *new = *entry;
1430 new++;
1431 }
1432 }
1433
1434
1435 if (nr_files || !nr_dirs) {
1436 struct ctl_table_header *header;
1437 header = __register_sysctl_table(set, path, files);
1438 if (!header) {
1439 kfree(ctl_table_arg);
1440 goto out;
1441 }
1442
1443
1444 header->ctl_table_arg = ctl_table_arg;
1445 **subheader = header;
1446 (*subheader)++;
1447 }
1448
1449
1450 for (entry = table; entry->procname; entry++) {
1451 char *child_pos;
1452
1453 if (!entry->child)
1454 continue;
1455
1456 err = -ENAMETOOLONG;
1457 child_pos = append_path(path, pos, entry->procname);
1458 if (!child_pos)
1459 goto out;
1460
1461 err = register_leaf_sysctl_tables(path, child_pos, subheader,
1462 set, entry->child);
1463 pos[0] = '\0';
1464 if (err)
1465 goto out;
1466 }
1467 err = 0;
1468out:
1469
1470 return err;
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484struct ctl_table_header *__register_sysctl_paths(
1485 struct ctl_table_set *set,
1486 const struct ctl_path *path, struct ctl_table *table)
1487{
1488 struct ctl_table *ctl_table_arg = table;
1489 int nr_subheaders = count_subheaders(table);
1490 struct ctl_table_header *header = NULL, **subheaders, **subheader;
1491 const struct ctl_path *component;
1492 char *new_path, *pos;
1493
1494 pos = new_path = kmalloc(PATH_MAX, GFP_KERNEL);
1495 if (!new_path)
1496 return NULL;
1497
1498 pos[0] = '\0';
1499 for (component = path; component->procname; component++) {
1500 pos = append_path(new_path, pos, component->procname);
1501 if (!pos)
1502 goto out;
1503 }
1504 while (table->procname && table->child && !table[1].procname) {
1505 pos = append_path(new_path, pos, table->procname);
1506 if (!pos)
1507 goto out;
1508 table = table->child;
1509 }
1510 if (nr_subheaders == 1) {
1511 header = __register_sysctl_table(set, new_path, table);
1512 if (header)
1513 header->ctl_table_arg = ctl_table_arg;
1514 } else {
1515 header = kzalloc(sizeof(*header) +
1516 sizeof(*subheaders)*nr_subheaders, GFP_KERNEL);
1517 if (!header)
1518 goto out;
1519
1520 subheaders = (struct ctl_table_header **) (header + 1);
1521 subheader = subheaders;
1522 header->ctl_table_arg = ctl_table_arg;
1523
1524 if (register_leaf_sysctl_tables(new_path, pos, &subheader,
1525 set, table))
1526 goto err_register_leaves;
1527 }
1528
1529out:
1530 kfree(new_path);
1531 return header;
1532
1533err_register_leaves:
1534 while (subheader > subheaders) {
1535 struct ctl_table_header *subh = *(--subheader);
1536 struct ctl_table *table = subh->ctl_table_arg;
1537 unregister_sysctl_table(subh);
1538 kfree(table);
1539 }
1540 kfree(header);
1541 header = NULL;
1542 goto out;
1543}
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1556 struct ctl_table *table)
1557{
1558 return __register_sysctl_paths(&sysctl_table_root.default_set,
1559 path, table);
1560}
1561EXPORT_SYMBOL(register_sysctl_paths);
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1573{
1574 static const struct ctl_path null_path[] = { {} };
1575
1576 return register_sysctl_paths(null_path, table);
1577}
1578EXPORT_SYMBOL(register_sysctl_table);
1579
1580static void put_links(struct ctl_table_header *header)
1581{
1582 struct ctl_table_set *root_set = &sysctl_table_root.default_set;
1583 struct ctl_table_root *root = header->root;
1584 struct ctl_dir *parent = header->parent;
1585 struct ctl_dir *core_parent;
1586 struct ctl_table *entry;
1587
1588 if (header->set == root_set)
1589 return;
1590
1591 core_parent = xlate_dir(root_set, parent);
1592 if (IS_ERR(core_parent))
1593 return;
1594
1595 for (entry = header->ctl_table; entry->procname; entry++) {
1596 struct ctl_table_header *link_head;
1597 struct ctl_table *link;
1598 const char *name = entry->procname;
1599
1600 link = find_entry(&link_head, core_parent, name, strlen(name));
1601 if (link &&
1602 ((S_ISDIR(link->mode) && S_ISDIR(entry->mode)) ||
1603 (S_ISLNK(link->mode) && (link->data == root)))) {
1604 drop_sysctl_table(link_head);
1605 }
1606 else {
1607 pr_err("sysctl link missing during unregister: ");
1608 sysctl_print_dir(parent);
1609 pr_cont("/%s\n", name);
1610 }
1611 }
1612}
1613
1614static void drop_sysctl_table(struct ctl_table_header *header)
1615{
1616 struct ctl_dir *parent = header->parent;
1617
1618 if (--header->nreg)
1619 return;
1620
1621 if (parent) {
1622 put_links(header);
1623 start_unregistering(header);
1624 }
1625
1626 if (!--header->count)
1627 kfree_rcu(header, rcu);
1628
1629 if (parent)
1630 drop_sysctl_table(&parent->header);
1631}
1632
1633
1634
1635
1636
1637
1638
1639
1640void unregister_sysctl_table(struct ctl_table_header * header)
1641{
1642 int nr_subheaders;
1643 might_sleep();
1644
1645 if (header == NULL)
1646 return;
1647
1648 nr_subheaders = count_subheaders(header->ctl_table_arg);
1649 if (unlikely(nr_subheaders > 1)) {
1650 struct ctl_table_header **subheaders;
1651 int i;
1652
1653 subheaders = (struct ctl_table_header **)(header + 1);
1654 for (i = nr_subheaders -1; i >= 0; i--) {
1655 struct ctl_table_header *subh = subheaders[i];
1656 struct ctl_table *table = subh->ctl_table_arg;
1657 unregister_sysctl_table(subh);
1658 kfree(table);
1659 }
1660 kfree(header);
1661 return;
1662 }
1663
1664 spin_lock(&sysctl_lock);
1665 drop_sysctl_table(header);
1666 spin_unlock(&sysctl_lock);
1667}
1668EXPORT_SYMBOL(unregister_sysctl_table);
1669
1670void setup_sysctl_set(struct ctl_table_set *set,
1671 struct ctl_table_root *root,
1672 int (*is_seen)(struct ctl_table_set *))
1673{
1674 memset(set, 0, sizeof(*set));
1675 set->is_seen = is_seen;
1676 init_header(&set->dir.header, root, set, NULL, root_table);
1677}
1678
1679void retire_sysctl_set(struct ctl_table_set *set)
1680{
1681 WARN_ON(!RB_EMPTY_ROOT(&set->dir.root));
1682}
1683
1684int __init proc_sys_init(void)
1685{
1686 struct proc_dir_entry *proc_sys_root;
1687
1688 proc_sys_root = proc_mkdir("sys", NULL);
1689 proc_sys_root->proc_iops = &proc_sys_dir_operations;
1690 proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations;
1691 proc_sys_root->nlink = 0;
1692
1693 return sysctl_init();
1694}
1695