1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#define pr_fmt(fmt) "seccomp: " fmt
17
18#include <linux/refcount.h>
19#include <linux/audit.h>
20#include <linux/compat.h>
21#include <linux/coredump.h>
22#include <linux/kmemleak.h>
23#include <linux/nospec.h>
24#include <linux/prctl.h>
25#include <linux/sched.h>
26#include <linux/sched/task_stack.h>
27#include <linux/seccomp.h>
28#include <linux/slab.h>
29#include <linux/syscalls.h>
30#include <linux/sysctl.h>
31
32#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
33#include <asm/syscall.h>
34#endif
35
36#ifdef CONFIG_SECCOMP_FILTER
37#include <linux/file.h>
38#include <linux/filter.h>
39#include <linux/pid.h>
40#include <linux/ptrace.h>
41#include <linux/capability.h>
42#include <linux/tracehook.h>
43#include <linux/uaccess.h>
44#include <linux/anon_inodes.h>
45#include <linux/lockdep.h>
46
47
48
49
50
51
52
53#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
54
55enum notify_state {
56 SECCOMP_NOTIFY_INIT,
57 SECCOMP_NOTIFY_SENT,
58 SECCOMP_NOTIFY_REPLIED,
59};
60
61struct seccomp_knotif {
62
63 struct task_struct *task;
64
65
66 u64 id;
67
68
69
70
71
72
73 const struct seccomp_data *data;
74
75
76
77
78
79
80
81
82
83 enum notify_state state;
84
85
86 int error;
87 long val;
88 u32 flags;
89
90
91
92
93
94 struct completion ready;
95
96 struct list_head list;
97
98
99 struct list_head addfd;
100};
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117struct seccomp_kaddfd {
118 struct file *file;
119 int fd;
120 unsigned int flags;
121
122 union {
123 bool setfd;
124
125 int ret;
126 };
127 struct completion completion;
128 struct list_head list;
129};
130
131
132
133
134
135
136
137
138
139
140
141
142
143struct notification {
144 struct semaphore request;
145 u64 next_id;
146 struct list_head notifications;
147};
148
149#ifdef SECCOMP_ARCH_NATIVE
150
151
152
153
154
155
156
157
158
159
160
161struct action_cache {
162 DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
163#ifdef SECCOMP_ARCH_COMPAT
164 DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
165#endif
166};
167#else
168struct action_cache { };
169
170static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
171 const struct seccomp_data *sd)
172{
173 return false;
174}
175
176static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
177{
178}
179#endif
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215struct seccomp_filter {
216 refcount_t refs;
217 refcount_t users;
218 bool log;
219 struct action_cache cache;
220 struct seccomp_filter *prev;
221 struct bpf_prog *prog;
222 struct notification *notif;
223 struct mutex notify_lock;
224 wait_queue_head_t wqh;
225};
226
227
228#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
229
230
231
232
233
234static void populate_seccomp_data(struct seccomp_data *sd)
235{
236
237
238
239
240 struct task_struct *task = current;
241 struct pt_regs *regs = task_pt_regs(task);
242 unsigned long args[6];
243
244 sd->nr = syscall_get_nr(task, regs);
245 sd->arch = syscall_get_arch(task);
246 syscall_get_arguments(task, regs, args);
247 sd->args[0] = args[0];
248 sd->args[1] = args[1];
249 sd->args[2] = args[2];
250 sd->args[3] = args[3];
251 sd->args[4] = args[4];
252 sd->args[5] = args[5];
253 sd->instruction_pointer = KSTK_EIP(task);
254}
255
256
257
258
259
260
261
262
263
264
265
266
267
268static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
269{
270 int pc;
271 for (pc = 0; pc < flen; pc++) {
272 struct sock_filter *ftest = &filter[pc];
273 u16 code = ftest->code;
274 u32 k = ftest->k;
275
276 switch (code) {
277 case BPF_LD | BPF_W | BPF_ABS:
278 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
279
280 if (k >= sizeof(struct seccomp_data) || k & 3)
281 return -EINVAL;
282 continue;
283 case BPF_LD | BPF_W | BPF_LEN:
284 ftest->code = BPF_LD | BPF_IMM;
285 ftest->k = sizeof(struct seccomp_data);
286 continue;
287 case BPF_LDX | BPF_W | BPF_LEN:
288 ftest->code = BPF_LDX | BPF_IMM;
289 ftest->k = sizeof(struct seccomp_data);
290 continue;
291
292 case BPF_RET | BPF_K:
293 case BPF_RET | BPF_A:
294 case BPF_ALU | BPF_ADD | BPF_K:
295 case BPF_ALU | BPF_ADD | BPF_X:
296 case BPF_ALU | BPF_SUB | BPF_K:
297 case BPF_ALU | BPF_SUB | BPF_X:
298 case BPF_ALU | BPF_MUL | BPF_K:
299 case BPF_ALU | BPF_MUL | BPF_X:
300 case BPF_ALU | BPF_DIV | BPF_K:
301 case BPF_ALU | BPF_DIV | BPF_X:
302 case BPF_ALU | BPF_AND | BPF_K:
303 case BPF_ALU | BPF_AND | BPF_X:
304 case BPF_ALU | BPF_OR | BPF_K:
305 case BPF_ALU | BPF_OR | BPF_X:
306 case BPF_ALU | BPF_XOR | BPF_K:
307 case BPF_ALU | BPF_XOR | BPF_X:
308 case BPF_ALU | BPF_LSH | BPF_K:
309 case BPF_ALU | BPF_LSH | BPF_X:
310 case BPF_ALU | BPF_RSH | BPF_K:
311 case BPF_ALU | BPF_RSH | BPF_X:
312 case BPF_ALU | BPF_NEG:
313 case BPF_LD | BPF_IMM:
314 case BPF_LDX | BPF_IMM:
315 case BPF_MISC | BPF_TAX:
316 case BPF_MISC | BPF_TXA:
317 case BPF_LD | BPF_MEM:
318 case BPF_LDX | BPF_MEM:
319 case BPF_ST:
320 case BPF_STX:
321 case BPF_JMP | BPF_JA:
322 case BPF_JMP | BPF_JEQ | BPF_K:
323 case BPF_JMP | BPF_JEQ | BPF_X:
324 case BPF_JMP | BPF_JGE | BPF_K:
325 case BPF_JMP | BPF_JGE | BPF_X:
326 case BPF_JMP | BPF_JGT | BPF_K:
327 case BPF_JMP | BPF_JGT | BPF_X:
328 case BPF_JMP | BPF_JSET | BPF_K:
329 case BPF_JMP | BPF_JSET | BPF_X:
330 continue;
331 default:
332 return -EINVAL;
333 }
334 }
335 return 0;
336}
337
338#ifdef SECCOMP_ARCH_NATIVE
339static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
340 size_t bitmap_size,
341 int syscall_nr)
342{
343 if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
344 return false;
345 syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
346
347 return test_bit(syscall_nr, bitmap);
348}
349
350
351
352
353
354
355
356
357static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
358 const struct seccomp_data *sd)
359{
360 int syscall_nr = sd->nr;
361 const struct action_cache *cache = &sfilter->cache;
362
363#ifndef SECCOMP_ARCH_COMPAT
364
365 return seccomp_cache_check_allow_bitmap(cache->allow_native,
366 SECCOMP_ARCH_NATIVE_NR,
367 syscall_nr);
368#else
369 if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
370 return seccomp_cache_check_allow_bitmap(cache->allow_native,
371 SECCOMP_ARCH_NATIVE_NR,
372 syscall_nr);
373 if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
374 return seccomp_cache_check_allow_bitmap(cache->allow_compat,
375 SECCOMP_ARCH_COMPAT_NR,
376 syscall_nr);
377#endif
378
379 WARN_ON_ONCE(true);
380 return false;
381}
382#endif
383
384
385
386
387
388
389
390
391
392
393#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
394static u32 seccomp_run_filters(const struct seccomp_data *sd,
395 struct seccomp_filter **match)
396{
397 u32 ret = SECCOMP_RET_ALLOW;
398
399 struct seccomp_filter *f =
400 READ_ONCE(current->seccomp.filter);
401
402
403 if (WARN_ON(f == NULL))
404 return SECCOMP_RET_KILL_PROCESS;
405
406 if (seccomp_cache_check_allow(f, sd))
407 return SECCOMP_RET_ALLOW;
408
409
410
411
412
413 for (; f; f = f->prev) {
414 u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
415
416 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
417 ret = cur_ret;
418 *match = f;
419 }
420 }
421 return ret;
422}
423#endif
424
425static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
426{
427 assert_spin_locked(¤t->sighand->siglock);
428
429 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
430 return false;
431
432 return true;
433}
434
435void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
436
437static inline void seccomp_assign_mode(struct task_struct *task,
438 unsigned long seccomp_mode,
439 unsigned long flags)
440{
441 assert_spin_locked(&task->sighand->siglock);
442
443 task->seccomp.mode = seccomp_mode;
444
445
446
447
448 smp_mb__before_atomic();
449
450 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
451 arch_seccomp_spec_mitigate(task);
452 set_task_syscall_work(task, SECCOMP);
453}
454
455#ifdef CONFIG_SECCOMP_FILTER
456
457static int is_ancestor(struct seccomp_filter *parent,
458 struct seccomp_filter *child)
459{
460
461 if (parent == NULL)
462 return 1;
463 for (; child; child = child->prev)
464 if (child == parent)
465 return 1;
466 return 0;
467}
468
469
470
471
472
473
474
475
476
477
478static inline pid_t seccomp_can_sync_threads(void)
479{
480 struct task_struct *thread, *caller;
481
482 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
483 assert_spin_locked(¤t->sighand->siglock);
484
485
486 caller = current;
487 for_each_thread(caller, thread) {
488 pid_t failed;
489
490
491 if (thread == caller)
492 continue;
493
494 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
495 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
496 is_ancestor(thread->seccomp.filter,
497 caller->seccomp.filter)))
498 continue;
499
500
501 failed = task_pid_vnr(thread);
502
503 if (WARN_ON(failed == 0))
504 failed = -ESRCH;
505 return failed;
506 }
507
508 return 0;
509}
510
511static inline void seccomp_filter_free(struct seccomp_filter *filter)
512{
513 if (filter) {
514 bpf_prog_destroy(filter->prog);
515 kfree(filter);
516 }
517}
518
519static void __seccomp_filter_orphan(struct seccomp_filter *orig)
520{
521 while (orig && refcount_dec_and_test(&orig->users)) {
522 if (waitqueue_active(&orig->wqh))
523 wake_up_poll(&orig->wqh, EPOLLHUP);
524 orig = orig->prev;
525 }
526}
527
528static void __put_seccomp_filter(struct seccomp_filter *orig)
529{
530
531 while (orig && refcount_dec_and_test(&orig->refs)) {
532 struct seccomp_filter *freeme = orig;
533 orig = orig->prev;
534 seccomp_filter_free(freeme);
535 }
536}
537
538static void __seccomp_filter_release(struct seccomp_filter *orig)
539{
540
541 __seccomp_filter_orphan(orig);
542
543 __put_seccomp_filter(orig);
544}
545
546
547
548
549
550
551
552
553
554
555void seccomp_filter_release(struct task_struct *tsk)
556{
557 struct seccomp_filter *orig = tsk->seccomp.filter;
558
559
560 WARN_ON(tsk->sighand != NULL);
561
562
563 tsk->seccomp.filter = NULL;
564 __seccomp_filter_release(orig);
565}
566
567
568
569
570
571
572
573
574
575static inline void seccomp_sync_threads(unsigned long flags)
576{
577 struct task_struct *thread, *caller;
578
579 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
580 assert_spin_locked(¤t->sighand->siglock);
581
582
583 caller = current;
584 for_each_thread(caller, thread) {
585
586 if (thread == caller)
587 continue;
588
589
590 get_seccomp_filter(caller);
591
592
593
594
595
596
597 __seccomp_filter_release(thread->seccomp.filter);
598
599
600 smp_store_release(&thread->seccomp.filter,
601 caller->seccomp.filter);
602 atomic_set(&thread->seccomp.filter_count,
603 atomic_read(&thread->seccomp.filter_count));
604
605
606
607
608
609
610
611 if (task_no_new_privs(caller))
612 task_set_no_new_privs(thread);
613
614
615
616
617
618
619
620 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
621 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
622 flags);
623 }
624}
625
626
627
628
629
630
631
632static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
633{
634 struct seccomp_filter *sfilter;
635 int ret;
636 const bool save_orig =
637#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
638 true;
639#else
640 false;
641#endif
642
643 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
644 return ERR_PTR(-EINVAL);
645
646 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
647
648
649
650
651
652
653
654 if (!task_no_new_privs(current) &&
655 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
656 return ERR_PTR(-EACCES);
657
658
659 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
660 if (!sfilter)
661 return ERR_PTR(-ENOMEM);
662
663 mutex_init(&sfilter->notify_lock);
664 ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
665 seccomp_check_filter, save_orig);
666 if (ret < 0) {
667 kfree(sfilter);
668 return ERR_PTR(ret);
669 }
670
671 refcount_set(&sfilter->refs, 1);
672 refcount_set(&sfilter->users, 1);
673 init_waitqueue_head(&sfilter->wqh);
674
675 return sfilter;
676}
677
678
679
680
681
682
683
684static struct seccomp_filter *
685seccomp_prepare_user_filter(const char __user *user_filter)
686{
687 struct sock_fprog fprog;
688 struct seccomp_filter *filter = ERR_PTR(-EFAULT);
689
690#ifdef CONFIG_COMPAT
691 if (in_compat_syscall()) {
692 struct compat_sock_fprog fprog32;
693 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
694 goto out;
695 fprog.len = fprog32.len;
696 fprog.filter = compat_ptr(fprog32.filter);
697 } else
698#endif
699 if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
700 goto out;
701 filter = seccomp_prepare_filter(&fprog);
702out:
703 return filter;
704}
705
706#ifdef SECCOMP_ARCH_NATIVE
707
708
709
710
711
712
713static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
714 struct seccomp_data *sd)
715{
716 unsigned int reg_value = 0;
717 unsigned int pc;
718 bool op_res;
719
720 if (WARN_ON_ONCE(!fprog))
721 return false;
722
723 for (pc = 0; pc < fprog->len; pc++) {
724 struct sock_filter *insn = &fprog->filter[pc];
725 u16 code = insn->code;
726 u32 k = insn->k;
727
728 switch (code) {
729 case BPF_LD | BPF_W | BPF_ABS:
730 switch (k) {
731 case offsetof(struct seccomp_data, nr):
732 reg_value = sd->nr;
733 break;
734 case offsetof(struct seccomp_data, arch):
735 reg_value = sd->arch;
736 break;
737 default:
738
739 return false;
740 }
741 break;
742 case BPF_RET | BPF_K:
743
744 return k == SECCOMP_RET_ALLOW;
745 case BPF_JMP | BPF_JA:
746 pc += insn->k;
747 break;
748 case BPF_JMP | BPF_JEQ | BPF_K:
749 case BPF_JMP | BPF_JGE | BPF_K:
750 case BPF_JMP | BPF_JGT | BPF_K:
751 case BPF_JMP | BPF_JSET | BPF_K:
752 switch (BPF_OP(code)) {
753 case BPF_JEQ:
754 op_res = reg_value == k;
755 break;
756 case BPF_JGE:
757 op_res = reg_value >= k;
758 break;
759 case BPF_JGT:
760 op_res = reg_value > k;
761 break;
762 case BPF_JSET:
763 op_res = !!(reg_value & k);
764 break;
765 default:
766
767 return false;
768 }
769
770 pc += op_res ? insn->jt : insn->jf;
771 break;
772 case BPF_ALU | BPF_AND | BPF_K:
773 reg_value &= k;
774 break;
775 default:
776
777 return false;
778 }
779 }
780
781
782 WARN_ON(1);
783 return false;
784}
785
786static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
787 void *bitmap, const void *bitmap_prev,
788 size_t bitmap_size, int arch)
789{
790 struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
791 struct seccomp_data sd;
792 int nr;
793
794 if (bitmap_prev) {
795
796 bitmap_copy(bitmap, bitmap_prev, bitmap_size);
797 } else {
798
799 bitmap_fill(bitmap, bitmap_size);
800 }
801
802 for (nr = 0; nr < bitmap_size; nr++) {
803
804 if (!test_bit(nr, bitmap))
805 continue;
806
807 sd.nr = nr;
808 sd.arch = arch;
809
810
811 if (seccomp_is_const_allow(fprog, &sd))
812 continue;
813
814
815
816
817
818 __clear_bit(nr, bitmap);
819 }
820}
821
822
823
824
825
826
827
828static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
829{
830 struct action_cache *cache = &sfilter->cache;
831 const struct action_cache *cache_prev =
832 sfilter->prev ? &sfilter->prev->cache : NULL;
833
834 seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
835 cache_prev ? cache_prev->allow_native : NULL,
836 SECCOMP_ARCH_NATIVE_NR,
837 SECCOMP_ARCH_NATIVE);
838
839#ifdef SECCOMP_ARCH_COMPAT
840 seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
841 cache_prev ? cache_prev->allow_compat : NULL,
842 SECCOMP_ARCH_COMPAT_NR,
843 SECCOMP_ARCH_COMPAT);
844#endif
845}
846#endif
847
848
849
850
851
852
853
854
855
856
857
858
859
860static long seccomp_attach_filter(unsigned int flags,
861 struct seccomp_filter *filter)
862{
863 unsigned long total_insns;
864 struct seccomp_filter *walker;
865
866 assert_spin_locked(¤t->sighand->siglock);
867
868
869 total_insns = filter->prog->len;
870 for (walker = current->seccomp.filter; walker; walker = walker->prev)
871 total_insns += walker->prog->len + 4;
872 if (total_insns > MAX_INSNS_PER_PATH)
873 return -ENOMEM;
874
875
876 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
877 int ret;
878
879 ret = seccomp_can_sync_threads();
880 if (ret) {
881 if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
882 return -ESRCH;
883 else
884 return ret;
885 }
886 }
887
888
889 if (flags & SECCOMP_FILTER_FLAG_LOG)
890 filter->log = true;
891
892
893
894
895
896 filter->prev = current->seccomp.filter;
897 seccomp_cache_prepare(filter);
898 current->seccomp.filter = filter;
899 atomic_inc(¤t->seccomp.filter_count);
900
901
902 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
903 seccomp_sync_threads(flags);
904
905 return 0;
906}
907
908static void __get_seccomp_filter(struct seccomp_filter *filter)
909{
910 refcount_inc(&filter->refs);
911}
912
913
914void get_seccomp_filter(struct task_struct *tsk)
915{
916 struct seccomp_filter *orig = tsk->seccomp.filter;
917 if (!orig)
918 return;
919 __get_seccomp_filter(orig);
920 refcount_inc(&orig->users);
921}
922
923static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
924{
925 clear_siginfo(info);
926 info->si_signo = SIGSYS;
927 info->si_code = SYS_SECCOMP;
928 info->si_call_addr = (void __user *)KSTK_EIP(current);
929 info->si_errno = reason;
930 info->si_arch = syscall_get_arch(current);
931 info->si_syscall = syscall;
932}
933
934
935
936
937
938
939
940
941static void seccomp_send_sigsys(int syscall, int reason)
942{
943 struct kernel_siginfo info;
944 seccomp_init_siginfo(&info, syscall, reason);
945 force_sig_info(&info);
946}
947#endif
948
949
950#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
951#define SECCOMP_LOG_KILL_THREAD (1 << 1)
952#define SECCOMP_LOG_TRAP (1 << 2)
953#define SECCOMP_LOG_ERRNO (1 << 3)
954#define SECCOMP_LOG_TRACE (1 << 4)
955#define SECCOMP_LOG_LOG (1 << 5)
956#define SECCOMP_LOG_ALLOW (1 << 6)
957#define SECCOMP_LOG_USER_NOTIF (1 << 7)
958
959static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
960 SECCOMP_LOG_KILL_THREAD |
961 SECCOMP_LOG_TRAP |
962 SECCOMP_LOG_ERRNO |
963 SECCOMP_LOG_USER_NOTIF |
964 SECCOMP_LOG_TRACE |
965 SECCOMP_LOG_LOG;
966
967static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
968 bool requested)
969{
970 bool log = false;
971
972 switch (action) {
973 case SECCOMP_RET_ALLOW:
974 break;
975 case SECCOMP_RET_TRAP:
976 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
977 break;
978 case SECCOMP_RET_ERRNO:
979 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
980 break;
981 case SECCOMP_RET_TRACE:
982 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
983 break;
984 case SECCOMP_RET_USER_NOTIF:
985 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
986 break;
987 case SECCOMP_RET_LOG:
988 log = seccomp_actions_logged & SECCOMP_LOG_LOG;
989 break;
990 case SECCOMP_RET_KILL_THREAD:
991 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
992 break;
993 case SECCOMP_RET_KILL_PROCESS:
994 default:
995 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
996 }
997
998
999
1000
1001
1002
1003
1004 if (!log)
1005 return;
1006
1007 audit_seccomp(syscall, signr, action);
1008}
1009
1010
1011
1012
1013
1014
1015static const int mode1_syscalls[] = {
1016 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
1017 -1,
1018};
1019
1020static void __secure_computing_strict(int this_syscall)
1021{
1022 const int *allowed_syscalls = mode1_syscalls;
1023#ifdef CONFIG_COMPAT
1024 if (in_compat_syscall())
1025 allowed_syscalls = get_compat_mode1_syscalls();
1026#endif
1027 do {
1028 if (*allowed_syscalls == this_syscall)
1029 return;
1030 } while (*++allowed_syscalls != -1);
1031
1032#ifdef SECCOMP_DEBUG
1033 dump_stack();
1034#endif
1035 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1036 do_exit(SIGKILL);
1037}
1038
1039#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1040void secure_computing_strict(int this_syscall)
1041{
1042 int mode = current->seccomp.mode;
1043
1044 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1045 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1046 return;
1047
1048 if (mode == SECCOMP_MODE_DISABLED)
1049 return;
1050 else if (mode == SECCOMP_MODE_STRICT)
1051 __secure_computing_strict(this_syscall);
1052 else
1053 BUG();
1054}
1055#else
1056
1057#ifdef CONFIG_SECCOMP_FILTER
1058static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1059{
1060
1061
1062
1063
1064 lockdep_assert_held(&filter->notify_lock);
1065 return filter->notif->next_id++;
1066}
1067
1068static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
1069{
1070
1071
1072
1073
1074 list_del_init(&addfd->list);
1075 if (!addfd->setfd)
1076 addfd->ret = receive_fd(addfd->file, addfd->flags);
1077 else
1078 addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
1079 addfd->flags);
1080 complete(&addfd->completion);
1081}
1082
1083static int seccomp_do_user_notification(int this_syscall,
1084 struct seccomp_filter *match,
1085 const struct seccomp_data *sd)
1086{
1087 int err;
1088 u32 flags = 0;
1089 long ret = 0;
1090 struct seccomp_knotif n = {};
1091 struct seccomp_kaddfd *addfd, *tmp;
1092
1093 mutex_lock(&match->notify_lock);
1094 err = -ENOSYS;
1095 if (!match->notif)
1096 goto out;
1097
1098 n.task = current;
1099 n.state = SECCOMP_NOTIFY_INIT;
1100 n.data = sd;
1101 n.id = seccomp_next_notify_id(match);
1102 init_completion(&n.ready);
1103 list_add(&n.list, &match->notif->notifications);
1104 INIT_LIST_HEAD(&n.addfd);
1105
1106 up(&match->notif->request);
1107 wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1108
1109
1110
1111
1112 do {
1113 mutex_unlock(&match->notify_lock);
1114 err = wait_for_completion_interruptible(&n.ready);
1115 mutex_lock(&match->notify_lock);
1116 if (err != 0)
1117 goto interrupted;
1118
1119 addfd = list_first_entry_or_null(&n.addfd,
1120 struct seccomp_kaddfd, list);
1121
1122 if (addfd)
1123 seccomp_handle_addfd(addfd);
1124
1125 } while (n.state != SECCOMP_NOTIFY_REPLIED);
1126
1127 ret = n.val;
1128 err = n.error;
1129 flags = n.flags;
1130
1131interrupted:
1132
1133 list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1134
1135 addfd->ret = -ESRCH;
1136 list_del_init(&addfd->list);
1137 complete(&addfd->completion);
1138 }
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150 if (match->notif)
1151 list_del(&n.list);
1152out:
1153 mutex_unlock(&match->notify_lock);
1154
1155
1156 if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1157 return 0;
1158
1159 syscall_set_return_value(current, current_pt_regs(),
1160 err, ret);
1161 return -1;
1162}
1163
1164static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1165 const bool recheck_after_trace)
1166{
1167 u32 filter_ret, action;
1168 struct seccomp_filter *match = NULL;
1169 int data;
1170 struct seccomp_data sd_local;
1171
1172
1173
1174
1175
1176 smp_rmb();
1177
1178 if (!sd) {
1179 populate_seccomp_data(&sd_local);
1180 sd = &sd_local;
1181 }
1182
1183 filter_ret = seccomp_run_filters(sd, &match);
1184 data = filter_ret & SECCOMP_RET_DATA;
1185 action = filter_ret & SECCOMP_RET_ACTION_FULL;
1186
1187 switch (action) {
1188 case SECCOMP_RET_ERRNO:
1189
1190 if (data > MAX_ERRNO)
1191 data = MAX_ERRNO;
1192 syscall_set_return_value(current, current_pt_regs(),
1193 -data, 0);
1194 goto skip;
1195
1196 case SECCOMP_RET_TRAP:
1197
1198 syscall_rollback(current, current_pt_regs());
1199
1200 seccomp_send_sigsys(this_syscall, data);
1201 goto skip;
1202
1203 case SECCOMP_RET_TRACE:
1204
1205 if (recheck_after_trace)
1206 return 0;
1207
1208
1209 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1210 syscall_set_return_value(current,
1211 current_pt_regs(),
1212 -ENOSYS, 0);
1213 goto skip;
1214 }
1215
1216
1217 ptrace_event(PTRACE_EVENT_SECCOMP, data);
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 if (fatal_signal_pending(current))
1229 goto skip;
1230
1231 this_syscall = syscall_get_nr(current, current_pt_regs());
1232 if (this_syscall < 0)
1233 goto skip;
1234
1235
1236
1237
1238
1239
1240
1241 if (__seccomp_filter(this_syscall, NULL, true))
1242 return -1;
1243
1244 return 0;
1245
1246 case SECCOMP_RET_USER_NOTIF:
1247 if (seccomp_do_user_notification(this_syscall, match, sd))
1248 goto skip;
1249
1250 return 0;
1251
1252 case SECCOMP_RET_LOG:
1253 seccomp_log(this_syscall, 0, action, true);
1254 return 0;
1255
1256 case SECCOMP_RET_ALLOW:
1257
1258
1259
1260
1261
1262 return 0;
1263
1264 case SECCOMP_RET_KILL_THREAD:
1265 case SECCOMP_RET_KILL_PROCESS:
1266 default:
1267 seccomp_log(this_syscall, SIGSYS, action, true);
1268
1269 if (action != SECCOMP_RET_KILL_THREAD ||
1270 get_nr_threads(current) == 1) {
1271 kernel_siginfo_t info;
1272
1273
1274 syscall_rollback(current, current_pt_regs());
1275
1276 seccomp_init_siginfo(&info, this_syscall, data);
1277 do_coredump(&info);
1278 }
1279 if (action == SECCOMP_RET_KILL_THREAD)
1280 do_exit(SIGSYS);
1281 else
1282 do_group_exit(SIGSYS);
1283 }
1284
1285 unreachable();
1286
1287skip:
1288 seccomp_log(this_syscall, 0, action, match ? match->log : false);
1289 return -1;
1290}
1291#else
1292static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1293 const bool recheck_after_trace)
1294{
1295 BUG();
1296
1297 return -1;
1298}
1299#endif
1300
1301int __secure_computing(const struct seccomp_data *sd)
1302{
1303 int mode = current->seccomp.mode;
1304 int this_syscall;
1305
1306 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1307 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1308 return 0;
1309
1310 this_syscall = sd ? sd->nr :
1311 syscall_get_nr(current, current_pt_regs());
1312
1313 switch (mode) {
1314 case SECCOMP_MODE_STRICT:
1315 __secure_computing_strict(this_syscall);
1316 return 0;
1317 case SECCOMP_MODE_FILTER:
1318 return __seccomp_filter(this_syscall, sd, false);
1319 default:
1320 BUG();
1321 }
1322}
1323#endif
1324
1325long prctl_get_seccomp(void)
1326{
1327 return current->seccomp.mode;
1328}
1329
1330
1331
1332
1333
1334
1335
1336
1337static long seccomp_set_mode_strict(void)
1338{
1339 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1340 long ret = -EINVAL;
1341
1342 spin_lock_irq(¤t->sighand->siglock);
1343
1344 if (!seccomp_may_assign_mode(seccomp_mode))
1345 goto out;
1346
1347#ifdef TIF_NOTSC
1348 disable_TSC();
1349#endif
1350 seccomp_assign_mode(current, seccomp_mode, 0);
1351 ret = 0;
1352
1353out:
1354 spin_unlock_irq(¤t->sighand->siglock);
1355
1356 return ret;
1357}
1358
1359#ifdef CONFIG_SECCOMP_FILTER
1360static void seccomp_notify_free(struct seccomp_filter *filter)
1361{
1362 kfree(filter->notif);
1363 filter->notif = NULL;
1364}
1365
1366static void seccomp_notify_detach(struct seccomp_filter *filter)
1367{
1368 struct seccomp_knotif *knotif;
1369
1370 if (!filter)
1371 return;
1372
1373 mutex_lock(&filter->notify_lock);
1374
1375
1376
1377
1378
1379 list_for_each_entry(knotif, &filter->notif->notifications, list) {
1380 if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1381 continue;
1382
1383 knotif->state = SECCOMP_NOTIFY_REPLIED;
1384 knotif->error = -ENOSYS;
1385 knotif->val = 0;
1386
1387
1388
1389
1390
1391
1392 complete(&knotif->ready);
1393 }
1394
1395 seccomp_notify_free(filter);
1396 mutex_unlock(&filter->notify_lock);
1397}
1398
1399static int seccomp_notify_release(struct inode *inode, struct file *file)
1400{
1401 struct seccomp_filter *filter = file->private_data;
1402
1403 seccomp_notify_detach(filter);
1404 __put_seccomp_filter(filter);
1405 return 0;
1406}
1407
1408
1409static inline struct seccomp_knotif *
1410find_notification(struct seccomp_filter *filter, u64 id)
1411{
1412 struct seccomp_knotif *cur;
1413
1414 lockdep_assert_held(&filter->notify_lock);
1415
1416 list_for_each_entry(cur, &filter->notif->notifications, list) {
1417 if (cur->id == id)
1418 return cur;
1419 }
1420
1421 return NULL;
1422}
1423
1424
1425static long seccomp_notify_recv(struct seccomp_filter *filter,
1426 void __user *buf)
1427{
1428 struct seccomp_knotif *knotif = NULL, *cur;
1429 struct seccomp_notif unotif;
1430 ssize_t ret;
1431
1432
1433 ret = check_zeroed_user(buf, sizeof(unotif));
1434 if (ret < 0)
1435 return ret;
1436 if (!ret)
1437 return -EINVAL;
1438
1439 memset(&unotif, 0, sizeof(unotif));
1440
1441 ret = down_interruptible(&filter->notif->request);
1442 if (ret < 0)
1443 return ret;
1444
1445 mutex_lock(&filter->notify_lock);
1446 list_for_each_entry(cur, &filter->notif->notifications, list) {
1447 if (cur->state == SECCOMP_NOTIFY_INIT) {
1448 knotif = cur;
1449 break;
1450 }
1451 }
1452
1453
1454
1455
1456
1457
1458 if (!knotif) {
1459 ret = -ENOENT;
1460 goto out;
1461 }
1462
1463 unotif.id = knotif->id;
1464 unotif.pid = task_pid_vnr(knotif->task);
1465 unotif.data = *(knotif->data);
1466
1467 knotif->state = SECCOMP_NOTIFY_SENT;
1468 wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1469 ret = 0;
1470out:
1471 mutex_unlock(&filter->notify_lock);
1472
1473 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1474 ret = -EFAULT;
1475
1476
1477
1478
1479
1480
1481
1482 mutex_lock(&filter->notify_lock);
1483 knotif = find_notification(filter, unotif.id);
1484 if (knotif) {
1485 knotif->state = SECCOMP_NOTIFY_INIT;
1486 up(&filter->notif->request);
1487 }
1488 mutex_unlock(&filter->notify_lock);
1489 }
1490
1491 return ret;
1492}
1493
1494static long seccomp_notify_send(struct seccomp_filter *filter,
1495 void __user *buf)
1496{
1497 struct seccomp_notif_resp resp = {};
1498 struct seccomp_knotif *knotif;
1499 long ret;
1500
1501 if (copy_from_user(&resp, buf, sizeof(resp)))
1502 return -EFAULT;
1503
1504 if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1505 return -EINVAL;
1506
1507 if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1508 (resp.error || resp.val))
1509 return -EINVAL;
1510
1511 ret = mutex_lock_interruptible(&filter->notify_lock);
1512 if (ret < 0)
1513 return ret;
1514
1515 knotif = find_notification(filter, resp.id);
1516 if (!knotif) {
1517 ret = -ENOENT;
1518 goto out;
1519 }
1520
1521
1522 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1523 ret = -EINPROGRESS;
1524 goto out;
1525 }
1526
1527 ret = 0;
1528 knotif->state = SECCOMP_NOTIFY_REPLIED;
1529 knotif->error = resp.error;
1530 knotif->val = resp.val;
1531 knotif->flags = resp.flags;
1532 complete(&knotif->ready);
1533out:
1534 mutex_unlock(&filter->notify_lock);
1535 return ret;
1536}
1537
1538static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1539 void __user *buf)
1540{
1541 struct seccomp_knotif *knotif;
1542 u64 id;
1543 long ret;
1544
1545 if (copy_from_user(&id, buf, sizeof(id)))
1546 return -EFAULT;
1547
1548 ret = mutex_lock_interruptible(&filter->notify_lock);
1549 if (ret < 0)
1550 return ret;
1551
1552 knotif = find_notification(filter, id);
1553 if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1554 ret = 0;
1555 else
1556 ret = -ENOENT;
1557
1558 mutex_unlock(&filter->notify_lock);
1559 return ret;
1560}
1561
1562static long seccomp_notify_addfd(struct seccomp_filter *filter,
1563 struct seccomp_notif_addfd __user *uaddfd,
1564 unsigned int size)
1565{
1566 struct seccomp_notif_addfd addfd;
1567 struct seccomp_knotif *knotif;
1568 struct seccomp_kaddfd kaddfd;
1569 int ret;
1570
1571 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1572 BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1573
1574 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1575 return -EINVAL;
1576
1577 ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1578 if (ret)
1579 return ret;
1580
1581 if (addfd.newfd_flags & ~O_CLOEXEC)
1582 return -EINVAL;
1583
1584 if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
1585 return -EINVAL;
1586
1587 if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1588 return -EINVAL;
1589
1590 kaddfd.file = fget(addfd.srcfd);
1591 if (!kaddfd.file)
1592 return -EBADF;
1593
1594 kaddfd.flags = addfd.newfd_flags;
1595 kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1596 kaddfd.fd = addfd.newfd;
1597 init_completion(&kaddfd.completion);
1598
1599 ret = mutex_lock_interruptible(&filter->notify_lock);
1600 if (ret < 0)
1601 goto out;
1602
1603 knotif = find_notification(filter, addfd.id);
1604 if (!knotif) {
1605 ret = -ENOENT;
1606 goto out_unlock;
1607 }
1608
1609
1610
1611
1612
1613
1614 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1615 ret = -EINPROGRESS;
1616 goto out_unlock;
1617 }
1618
1619 list_add(&kaddfd.list, &knotif->addfd);
1620 complete(&knotif->ready);
1621 mutex_unlock(&filter->notify_lock);
1622
1623
1624 ret = wait_for_completion_interruptible(&kaddfd.completion);
1625 if (ret == 0) {
1626
1627
1628
1629
1630
1631
1632
1633 ret = kaddfd.ret;
1634 goto out;
1635 }
1636
1637 mutex_lock(&filter->notify_lock);
1638
1639
1640
1641
1642
1643
1644
1645 if (list_empty(&kaddfd.list))
1646 ret = kaddfd.ret;
1647 else
1648 list_del(&kaddfd.list);
1649
1650out_unlock:
1651 mutex_unlock(&filter->notify_lock);
1652out:
1653 fput(kaddfd.file);
1654
1655 return ret;
1656}
1657
1658static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1659 unsigned long arg)
1660{
1661 struct seccomp_filter *filter = file->private_data;
1662 void __user *buf = (void __user *)arg;
1663
1664
1665 switch (cmd) {
1666 case SECCOMP_IOCTL_NOTIF_RECV:
1667 return seccomp_notify_recv(filter, buf);
1668 case SECCOMP_IOCTL_NOTIF_SEND:
1669 return seccomp_notify_send(filter, buf);
1670 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1671 case SECCOMP_IOCTL_NOTIF_ID_VALID:
1672 return seccomp_notify_id_valid(filter, buf);
1673 }
1674
1675
1676#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1677 switch (EA_IOCTL(cmd)) {
1678 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1679 return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1680 default:
1681 return -EINVAL;
1682 }
1683}
1684
1685static __poll_t seccomp_notify_poll(struct file *file,
1686 struct poll_table_struct *poll_tab)
1687{
1688 struct seccomp_filter *filter = file->private_data;
1689 __poll_t ret = 0;
1690 struct seccomp_knotif *cur;
1691
1692 poll_wait(file, &filter->wqh, poll_tab);
1693
1694 if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1695 return EPOLLERR;
1696
1697 list_for_each_entry(cur, &filter->notif->notifications, list) {
1698 if (cur->state == SECCOMP_NOTIFY_INIT)
1699 ret |= EPOLLIN | EPOLLRDNORM;
1700 if (cur->state == SECCOMP_NOTIFY_SENT)
1701 ret |= EPOLLOUT | EPOLLWRNORM;
1702 if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1703 break;
1704 }
1705
1706 mutex_unlock(&filter->notify_lock);
1707
1708 if (refcount_read(&filter->users) == 0)
1709 ret |= EPOLLHUP;
1710
1711 return ret;
1712}
1713
1714static const struct file_operations seccomp_notify_ops = {
1715 .poll = seccomp_notify_poll,
1716 .release = seccomp_notify_release,
1717 .unlocked_ioctl = seccomp_notify_ioctl,
1718 .compat_ioctl = seccomp_notify_ioctl,
1719};
1720
1721static struct file *init_listener(struct seccomp_filter *filter)
1722{
1723 struct file *ret;
1724
1725 ret = ERR_PTR(-ENOMEM);
1726 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1727 if (!filter->notif)
1728 goto out;
1729
1730 sema_init(&filter->notif->request, 0);
1731 filter->notif->next_id = get_random_u64();
1732 INIT_LIST_HEAD(&filter->notif->notifications);
1733
1734 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1735 filter, O_RDWR);
1736 if (IS_ERR(ret))
1737 goto out_notif;
1738
1739
1740 __get_seccomp_filter(filter);
1741
1742out_notif:
1743 if (IS_ERR(ret))
1744 seccomp_notify_free(filter);
1745out:
1746 return ret;
1747}
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757static bool has_duplicate_listener(struct seccomp_filter *new_child)
1758{
1759 struct seccomp_filter *cur;
1760
1761
1762 lockdep_assert_held(¤t->sighand->siglock);
1763
1764 if (!new_child->notif)
1765 return false;
1766 for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1767 if (cur->notif)
1768 return true;
1769 }
1770
1771 return false;
1772}
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787static long seccomp_set_mode_filter(unsigned int flags,
1788 const char __user *filter)
1789{
1790 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1791 struct seccomp_filter *prepared = NULL;
1792 long ret = -EINVAL;
1793 int listener = -1;
1794 struct file *listener_f = NULL;
1795
1796
1797 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1798 return -EINVAL;
1799
1800
1801
1802
1803
1804
1805
1806
1807 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1808 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1809 ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1810 return -EINVAL;
1811
1812
1813 prepared = seccomp_prepare_user_filter(filter);
1814 if (IS_ERR(prepared))
1815 return PTR_ERR(prepared);
1816
1817 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1818 listener = get_unused_fd_flags(O_CLOEXEC);
1819 if (listener < 0) {
1820 ret = listener;
1821 goto out_free;
1822 }
1823
1824 listener_f = init_listener(prepared);
1825 if (IS_ERR(listener_f)) {
1826 put_unused_fd(listener);
1827 ret = PTR_ERR(listener_f);
1828 goto out_free;
1829 }
1830 }
1831
1832
1833
1834
1835
1836 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1837 mutex_lock_killable(¤t->signal->cred_guard_mutex))
1838 goto out_put_fd;
1839
1840 spin_lock_irq(¤t->sighand->siglock);
1841
1842 if (!seccomp_may_assign_mode(seccomp_mode))
1843 goto out;
1844
1845 if (has_duplicate_listener(prepared)) {
1846 ret = -EBUSY;
1847 goto out;
1848 }
1849
1850 ret = seccomp_attach_filter(flags, prepared);
1851 if (ret)
1852 goto out;
1853
1854 prepared = NULL;
1855
1856 seccomp_assign_mode(current, seccomp_mode, flags);
1857out:
1858 spin_unlock_irq(¤t->sighand->siglock);
1859 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1860 mutex_unlock(¤t->signal->cred_guard_mutex);
1861out_put_fd:
1862 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1863 if (ret) {
1864 listener_f->private_data = NULL;
1865 fput(listener_f);
1866 put_unused_fd(listener);
1867 seccomp_notify_detach(prepared);
1868 } else {
1869 fd_install(listener, listener_f);
1870 ret = listener;
1871 }
1872 }
1873out_free:
1874 seccomp_filter_free(prepared);
1875 return ret;
1876}
1877#else
1878static inline long seccomp_set_mode_filter(unsigned int flags,
1879 const char __user *filter)
1880{
1881 return -EINVAL;
1882}
1883#endif
1884
1885static long seccomp_get_action_avail(const char __user *uaction)
1886{
1887 u32 action;
1888
1889 if (copy_from_user(&action, uaction, sizeof(action)))
1890 return -EFAULT;
1891
1892 switch (action) {
1893 case SECCOMP_RET_KILL_PROCESS:
1894 case SECCOMP_RET_KILL_THREAD:
1895 case SECCOMP_RET_TRAP:
1896 case SECCOMP_RET_ERRNO:
1897 case SECCOMP_RET_USER_NOTIF:
1898 case SECCOMP_RET_TRACE:
1899 case SECCOMP_RET_LOG:
1900 case SECCOMP_RET_ALLOW:
1901 break;
1902 default:
1903 return -EOPNOTSUPP;
1904 }
1905
1906 return 0;
1907}
1908
1909static long seccomp_get_notif_sizes(void __user *usizes)
1910{
1911 struct seccomp_notif_sizes sizes = {
1912 .seccomp_notif = sizeof(struct seccomp_notif),
1913 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1914 .seccomp_data = sizeof(struct seccomp_data),
1915 };
1916
1917 if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1918 return -EFAULT;
1919
1920 return 0;
1921}
1922
1923
1924static long do_seccomp(unsigned int op, unsigned int flags,
1925 void __user *uargs)
1926{
1927 switch (op) {
1928 case SECCOMP_SET_MODE_STRICT:
1929 if (flags != 0 || uargs != NULL)
1930 return -EINVAL;
1931 return seccomp_set_mode_strict();
1932 case SECCOMP_SET_MODE_FILTER:
1933 return seccomp_set_mode_filter(flags, uargs);
1934 case SECCOMP_GET_ACTION_AVAIL:
1935 if (flags != 0)
1936 return -EINVAL;
1937
1938 return seccomp_get_action_avail(uargs);
1939 case SECCOMP_GET_NOTIF_SIZES:
1940 if (flags != 0)
1941 return -EINVAL;
1942
1943 return seccomp_get_notif_sizes(uargs);
1944 default:
1945 return -EINVAL;
1946 }
1947}
1948
1949SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
1950 void __user *, uargs)
1951{
1952 return do_seccomp(op, flags, uargs);
1953}
1954
1955
1956
1957
1958
1959
1960
1961
1962long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
1963{
1964 unsigned int op;
1965 void __user *uargs;
1966
1967 switch (seccomp_mode) {
1968 case SECCOMP_MODE_STRICT:
1969 op = SECCOMP_SET_MODE_STRICT;
1970
1971
1972
1973
1974
1975 uargs = NULL;
1976 break;
1977 case SECCOMP_MODE_FILTER:
1978 op = SECCOMP_SET_MODE_FILTER;
1979 uargs = filter;
1980 break;
1981 default:
1982 return -EINVAL;
1983 }
1984
1985
1986 return do_seccomp(op, 0, uargs);
1987}
1988
1989#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
1990static struct seccomp_filter *get_nth_filter(struct task_struct *task,
1991 unsigned long filter_off)
1992{
1993 struct seccomp_filter *orig, *filter;
1994 unsigned long count;
1995
1996
1997
1998
1999
2000 spin_lock_irq(&task->sighand->siglock);
2001
2002 if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2003 spin_unlock_irq(&task->sighand->siglock);
2004 return ERR_PTR(-EINVAL);
2005 }
2006
2007 orig = task->seccomp.filter;
2008 __get_seccomp_filter(orig);
2009 spin_unlock_irq(&task->sighand->siglock);
2010
2011 count = 0;
2012 for (filter = orig; filter; filter = filter->prev)
2013 count++;
2014
2015 if (filter_off >= count) {
2016 filter = ERR_PTR(-ENOENT);
2017 goto out;
2018 }
2019
2020 count -= filter_off;
2021 for (filter = orig; filter && count > 1; filter = filter->prev)
2022 count--;
2023
2024 if (WARN_ON(count != 1 || !filter)) {
2025 filter = ERR_PTR(-ENOENT);
2026 goto out;
2027 }
2028
2029 __get_seccomp_filter(filter);
2030
2031out:
2032 __put_seccomp_filter(orig);
2033 return filter;
2034}
2035
2036long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2037 void __user *data)
2038{
2039 struct seccomp_filter *filter;
2040 struct sock_fprog_kern *fprog;
2041 long ret;
2042
2043 if (!capable(CAP_SYS_ADMIN) ||
2044 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2045 return -EACCES;
2046 }
2047
2048 filter = get_nth_filter(task, filter_off);
2049 if (IS_ERR(filter))
2050 return PTR_ERR(filter);
2051
2052 fprog = filter->prog->orig_prog;
2053 if (!fprog) {
2054
2055
2056
2057
2058 ret = -EMEDIUMTYPE;
2059 goto out;
2060 }
2061
2062 ret = fprog->len;
2063 if (!data)
2064 goto out;
2065
2066 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2067 ret = -EFAULT;
2068
2069out:
2070 __put_seccomp_filter(filter);
2071 return ret;
2072}
2073
2074long seccomp_get_metadata(struct task_struct *task,
2075 unsigned long size, void __user *data)
2076{
2077 long ret;
2078 struct seccomp_filter *filter;
2079 struct seccomp_metadata kmd = {};
2080
2081 if (!capable(CAP_SYS_ADMIN) ||
2082 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2083 return -EACCES;
2084 }
2085
2086 size = min_t(unsigned long, size, sizeof(kmd));
2087
2088 if (size < sizeof(kmd.filter_off))
2089 return -EINVAL;
2090
2091 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2092 return -EFAULT;
2093
2094 filter = get_nth_filter(task, kmd.filter_off);
2095 if (IS_ERR(filter))
2096 return PTR_ERR(filter);
2097
2098 if (filter->log)
2099 kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2100
2101 ret = size;
2102 if (copy_to_user(data, &kmd, size))
2103 ret = -EFAULT;
2104
2105 __put_seccomp_filter(filter);
2106 return ret;
2107}
2108#endif
2109
2110#ifdef CONFIG_SYSCTL
2111
2112
2113#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2114#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2115#define SECCOMP_RET_TRAP_NAME "trap"
2116#define SECCOMP_RET_ERRNO_NAME "errno"
2117#define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2118#define SECCOMP_RET_TRACE_NAME "trace"
2119#define SECCOMP_RET_LOG_NAME "log"
2120#define SECCOMP_RET_ALLOW_NAME "allow"
2121
2122static const char seccomp_actions_avail[] =
2123 SECCOMP_RET_KILL_PROCESS_NAME " "
2124 SECCOMP_RET_KILL_THREAD_NAME " "
2125 SECCOMP_RET_TRAP_NAME " "
2126 SECCOMP_RET_ERRNO_NAME " "
2127 SECCOMP_RET_USER_NOTIF_NAME " "
2128 SECCOMP_RET_TRACE_NAME " "
2129 SECCOMP_RET_LOG_NAME " "
2130 SECCOMP_RET_ALLOW_NAME;
2131
2132struct seccomp_log_name {
2133 u32 log;
2134 const char *name;
2135};
2136
2137static const struct seccomp_log_name seccomp_log_names[] = {
2138 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2139 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2140 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2141 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2142 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2143 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2144 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2145 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2146 { }
2147};
2148
2149static bool seccomp_names_from_actions_logged(char *names, size_t size,
2150 u32 actions_logged,
2151 const char *sep)
2152{
2153 const struct seccomp_log_name *cur;
2154 bool append_sep = false;
2155
2156 for (cur = seccomp_log_names; cur->name && size; cur++) {
2157 ssize_t ret;
2158
2159 if (!(actions_logged & cur->log))
2160 continue;
2161
2162 if (append_sep) {
2163 ret = strscpy(names, sep, size);
2164 if (ret < 0)
2165 return false;
2166
2167 names += ret;
2168 size -= ret;
2169 } else
2170 append_sep = true;
2171
2172 ret = strscpy(names, cur->name, size);
2173 if (ret < 0)
2174 return false;
2175
2176 names += ret;
2177 size -= ret;
2178 }
2179
2180 return true;
2181}
2182
2183static bool seccomp_action_logged_from_name(u32 *action_logged,
2184 const char *name)
2185{
2186 const struct seccomp_log_name *cur;
2187
2188 for (cur = seccomp_log_names; cur->name; cur++) {
2189 if (!strcmp(cur->name, name)) {
2190 *action_logged = cur->log;
2191 return true;
2192 }
2193 }
2194
2195 return false;
2196}
2197
2198static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2199{
2200 char *name;
2201
2202 *actions_logged = 0;
2203 while ((name = strsep(&names, " ")) && *name) {
2204 u32 action_logged = 0;
2205
2206 if (!seccomp_action_logged_from_name(&action_logged, name))
2207 return false;
2208
2209 *actions_logged |= action_logged;
2210 }
2211
2212 return true;
2213}
2214
2215static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2216 size_t *lenp, loff_t *ppos)
2217{
2218 char names[sizeof(seccomp_actions_avail)];
2219 struct ctl_table table;
2220
2221 memset(names, 0, sizeof(names));
2222
2223 if (!seccomp_names_from_actions_logged(names, sizeof(names),
2224 seccomp_actions_logged, " "))
2225 return -EINVAL;
2226
2227 table = *ro_table;
2228 table.data = names;
2229 table.maxlen = sizeof(names);
2230 return proc_dostring(&table, 0, buffer, lenp, ppos);
2231}
2232
2233static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2234 size_t *lenp, loff_t *ppos, u32 *actions_logged)
2235{
2236 char names[sizeof(seccomp_actions_avail)];
2237 struct ctl_table table;
2238 int ret;
2239
2240 if (!capable(CAP_SYS_ADMIN))
2241 return -EPERM;
2242
2243 memset(names, 0, sizeof(names));
2244
2245 table = *ro_table;
2246 table.data = names;
2247 table.maxlen = sizeof(names);
2248 ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2249 if (ret)
2250 return ret;
2251
2252 if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2253 return -EINVAL;
2254
2255 if (*actions_logged & SECCOMP_LOG_ALLOW)
2256 return -EINVAL;
2257
2258 seccomp_actions_logged = *actions_logged;
2259 return 0;
2260}
2261
2262static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2263 int ret)
2264{
2265 char names[sizeof(seccomp_actions_avail)];
2266 char old_names[sizeof(seccomp_actions_avail)];
2267 const char *new = names;
2268 const char *old = old_names;
2269
2270 if (!audit_enabled)
2271 return;
2272
2273 memset(names, 0, sizeof(names));
2274 memset(old_names, 0, sizeof(old_names));
2275
2276 if (ret)
2277 new = "?";
2278 else if (!actions_logged)
2279 new = "(none)";
2280 else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2281 actions_logged, ","))
2282 new = "?";
2283
2284 if (!old_actions_logged)
2285 old = "(none)";
2286 else if (!seccomp_names_from_actions_logged(old_names,
2287 sizeof(old_names),
2288 old_actions_logged, ","))
2289 old = "?";
2290
2291 return audit_seccomp_actions_logged(new, old, !ret);
2292}
2293
2294static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2295 void *buffer, size_t *lenp,
2296 loff_t *ppos)
2297{
2298 int ret;
2299
2300 if (write) {
2301 u32 actions_logged = 0;
2302 u32 old_actions_logged = seccomp_actions_logged;
2303
2304 ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2305 &actions_logged);
2306 audit_actions_logged(actions_logged, old_actions_logged, ret);
2307 } else
2308 ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2309
2310 return ret;
2311}
2312
2313static struct ctl_path seccomp_sysctl_path[] = {
2314 { .procname = "kernel", },
2315 { .procname = "seccomp", },
2316 { }
2317};
2318
2319static struct ctl_table seccomp_sysctl_table[] = {
2320 {
2321 .procname = "actions_avail",
2322 .data = (void *) &seccomp_actions_avail,
2323 .maxlen = sizeof(seccomp_actions_avail),
2324 .mode = 0444,
2325 .proc_handler = proc_dostring,
2326 },
2327 {
2328 .procname = "actions_logged",
2329 .mode = 0644,
2330 .proc_handler = seccomp_actions_logged_handler,
2331 },
2332 { }
2333};
2334
2335static int __init seccomp_sysctl_init(void)
2336{
2337 struct ctl_table_header *hdr;
2338
2339 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2340 if (!hdr)
2341 pr_warn("sysctl registration failed\n");
2342 else
2343 kmemleak_not_leak(hdr);
2344
2345 return 0;
2346}
2347
2348device_initcall(seccomp_sysctl_init)
2349
2350#endif
2351
2352#ifdef CONFIG_SECCOMP_CACHE_DEBUG
2353
2354static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2355 const void *bitmap, size_t bitmap_size)
2356{
2357 int nr;
2358
2359 for (nr = 0; nr < bitmap_size; nr++) {
2360 bool cached = test_bit(nr, bitmap);
2361 char *status = cached ? "ALLOW" : "FILTER";
2362
2363 seq_printf(m, "%s %d %s\n", name, nr, status);
2364 }
2365}
2366
2367int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2368 struct pid *pid, struct task_struct *task)
2369{
2370 struct seccomp_filter *f;
2371 unsigned long flags;
2372
2373
2374
2375
2376
2377 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2378 return -EACCES;
2379
2380 if (!lock_task_sighand(task, &flags))
2381 return -ESRCH;
2382
2383 f = READ_ONCE(task->seccomp.filter);
2384 if (!f) {
2385 unlock_task_sighand(task, &flags);
2386 return 0;
2387 }
2388
2389
2390 __get_seccomp_filter(f);
2391 unlock_task_sighand(task, &flags);
2392
2393 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2394 f->cache.allow_native,
2395 SECCOMP_ARCH_NATIVE_NR);
2396
2397#ifdef SECCOMP_ARCH_COMPAT
2398 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2399 f->cache.allow_compat,
2400 SECCOMP_ARCH_COMPAT_NR);
2401#endif
2402
2403 __put_seccomp_filter(f);
2404 return 0;
2405}
2406#endif
2407