1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#define pr_fmt(fmt) "seccomp: " fmt
17
18#include <linux/refcount.h>
19#include <linux/audit.h>
20#include <linux/compat.h>
21#include <linux/coredump.h>
22#include <linux/kmemleak.h>
23#include <linux/nospec.h>
24#include <linux/prctl.h>
25#include <linux/sched.h>
26#include <linux/sched/task_stack.h>
27#include <linux/seccomp.h>
28#include <linux/slab.h>
29#include <linux/syscalls.h>
30#include <linux/sysctl.h>
31
32
33#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1)
34
35#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
36#include <asm/syscall.h>
37#endif
38
39#ifdef CONFIG_SECCOMP_FILTER
40#include <linux/file.h>
41#include <linux/filter.h>
42#include <linux/pid.h>
43#include <linux/ptrace.h>
44#include <linux/capability.h>
45#include <linux/uaccess.h>
46#include <linux/anon_inodes.h>
47#include <linux/lockdep.h>
48
49
50
51
52
53
54
55#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
56
57enum notify_state {
58 SECCOMP_NOTIFY_INIT,
59 SECCOMP_NOTIFY_SENT,
60 SECCOMP_NOTIFY_REPLIED,
61};
62
63struct seccomp_knotif {
64
65 struct task_struct *task;
66
67
68 u64 id;
69
70
71
72
73
74
75 const struct seccomp_data *data;
76
77
78
79
80
81
82
83
84
85 enum notify_state state;
86
87
88 int error;
89 long val;
90 u32 flags;
91
92
93
94
95
96 struct completion ready;
97
98 struct list_head list;
99
100
101 struct list_head addfd;
102};
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120struct seccomp_kaddfd {
121 struct file *file;
122 int fd;
123 unsigned int flags;
124 __u32 ioctl_flags;
125
126 union {
127 bool setfd;
128
129 int ret;
130 };
131 struct completion completion;
132 struct list_head list;
133};
134
135
136
137
138
139
140
141
142
143
144
145
146
147struct notification {
148 struct semaphore request;
149 u64 next_id;
150 struct list_head notifications;
151};
152
153#ifdef SECCOMP_ARCH_NATIVE
154
155
156
157
158
159
160
161
162
163
164
165struct action_cache {
166 DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
167#ifdef SECCOMP_ARCH_COMPAT
168 DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
169#endif
170};
171#else
172struct action_cache { };
173
174static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
175 const struct seccomp_data *sd)
176{
177 return false;
178}
179
180static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
181{
182}
183#endif
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219struct seccomp_filter {
220 refcount_t refs;
221 refcount_t users;
222 bool log;
223 struct action_cache cache;
224 struct seccomp_filter *prev;
225 struct bpf_prog *prog;
226 struct notification *notif;
227 struct mutex notify_lock;
228 wait_queue_head_t wqh;
229};
230
231
232#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
233
234
235
236
237
238static void populate_seccomp_data(struct seccomp_data *sd)
239{
240
241
242
243
244 struct task_struct *task = current;
245 struct pt_regs *regs = task_pt_regs(task);
246 unsigned long args[6];
247
248 sd->nr = syscall_get_nr(task, regs);
249 sd->arch = syscall_get_arch(task);
250 syscall_get_arguments(task, regs, args);
251 sd->args[0] = args[0];
252 sd->args[1] = args[1];
253 sd->args[2] = args[2];
254 sd->args[3] = args[3];
255 sd->args[4] = args[4];
256 sd->args[5] = args[5];
257 sd->instruction_pointer = KSTK_EIP(task);
258}
259
260
261
262
263
264
265
266
267
268
269
270
271
272static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
273{
274 int pc;
275 for (pc = 0; pc < flen; pc++) {
276 struct sock_filter *ftest = &filter[pc];
277 u16 code = ftest->code;
278 u32 k = ftest->k;
279
280 switch (code) {
281 case BPF_LD | BPF_W | BPF_ABS:
282 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
283
284 if (k >= sizeof(struct seccomp_data) || k & 3)
285 return -EINVAL;
286 continue;
287 case BPF_LD | BPF_W | BPF_LEN:
288 ftest->code = BPF_LD | BPF_IMM;
289 ftest->k = sizeof(struct seccomp_data);
290 continue;
291 case BPF_LDX | BPF_W | BPF_LEN:
292 ftest->code = BPF_LDX | BPF_IMM;
293 ftest->k = sizeof(struct seccomp_data);
294 continue;
295
296 case BPF_RET | BPF_K:
297 case BPF_RET | BPF_A:
298 case BPF_ALU | BPF_ADD | BPF_K:
299 case BPF_ALU | BPF_ADD | BPF_X:
300 case BPF_ALU | BPF_SUB | BPF_K:
301 case BPF_ALU | BPF_SUB | BPF_X:
302 case BPF_ALU | BPF_MUL | BPF_K:
303 case BPF_ALU | BPF_MUL | BPF_X:
304 case BPF_ALU | BPF_DIV | BPF_K:
305 case BPF_ALU | BPF_DIV | BPF_X:
306 case BPF_ALU | BPF_AND | BPF_K:
307 case BPF_ALU | BPF_AND | BPF_X:
308 case BPF_ALU | BPF_OR | BPF_K:
309 case BPF_ALU | BPF_OR | BPF_X:
310 case BPF_ALU | BPF_XOR | BPF_K:
311 case BPF_ALU | BPF_XOR | BPF_X:
312 case BPF_ALU | BPF_LSH | BPF_K:
313 case BPF_ALU | BPF_LSH | BPF_X:
314 case BPF_ALU | BPF_RSH | BPF_K:
315 case BPF_ALU | BPF_RSH | BPF_X:
316 case BPF_ALU | BPF_NEG:
317 case BPF_LD | BPF_IMM:
318 case BPF_LDX | BPF_IMM:
319 case BPF_MISC | BPF_TAX:
320 case BPF_MISC | BPF_TXA:
321 case BPF_LD | BPF_MEM:
322 case BPF_LDX | BPF_MEM:
323 case BPF_ST:
324 case BPF_STX:
325 case BPF_JMP | BPF_JA:
326 case BPF_JMP | BPF_JEQ | BPF_K:
327 case BPF_JMP | BPF_JEQ | BPF_X:
328 case BPF_JMP | BPF_JGE | BPF_K:
329 case BPF_JMP | BPF_JGE | BPF_X:
330 case BPF_JMP | BPF_JGT | BPF_K:
331 case BPF_JMP | BPF_JGT | BPF_X:
332 case BPF_JMP | BPF_JSET | BPF_K:
333 case BPF_JMP | BPF_JSET | BPF_X:
334 continue;
335 default:
336 return -EINVAL;
337 }
338 }
339 return 0;
340}
341
342#ifdef SECCOMP_ARCH_NATIVE
343static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
344 size_t bitmap_size,
345 int syscall_nr)
346{
347 if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
348 return false;
349 syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
350
351 return test_bit(syscall_nr, bitmap);
352}
353
354
355
356
357
358
359
360
361static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
362 const struct seccomp_data *sd)
363{
364 int syscall_nr = sd->nr;
365 const struct action_cache *cache = &sfilter->cache;
366
367#ifndef SECCOMP_ARCH_COMPAT
368
369 return seccomp_cache_check_allow_bitmap(cache->allow_native,
370 SECCOMP_ARCH_NATIVE_NR,
371 syscall_nr);
372#else
373 if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
374 return seccomp_cache_check_allow_bitmap(cache->allow_native,
375 SECCOMP_ARCH_NATIVE_NR,
376 syscall_nr);
377 if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
378 return seccomp_cache_check_allow_bitmap(cache->allow_compat,
379 SECCOMP_ARCH_COMPAT_NR,
380 syscall_nr);
381#endif
382
383 WARN_ON_ONCE(true);
384 return false;
385}
386#endif
387
388
389
390
391
392
393
394
395
396
397#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
398static u32 seccomp_run_filters(const struct seccomp_data *sd,
399 struct seccomp_filter **match)
400{
401 u32 ret = SECCOMP_RET_ALLOW;
402
403 struct seccomp_filter *f =
404 READ_ONCE(current->seccomp.filter);
405
406
407 if (WARN_ON(f == NULL))
408 return SECCOMP_RET_KILL_PROCESS;
409
410 if (seccomp_cache_check_allow(f, sd))
411 return SECCOMP_RET_ALLOW;
412
413
414
415
416
417 for (; f; f = f->prev) {
418 u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
419
420 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
421 ret = cur_ret;
422 *match = f;
423 }
424 }
425 return ret;
426}
427#endif
428
429static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
430{
431 assert_spin_locked(¤t->sighand->siglock);
432
433 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
434 return false;
435
436 return true;
437}
438
439void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
440
441static inline void seccomp_assign_mode(struct task_struct *task,
442 unsigned long seccomp_mode,
443 unsigned long flags)
444{
445 assert_spin_locked(&task->sighand->siglock);
446
447 task->seccomp.mode = seccomp_mode;
448
449
450
451
452 smp_mb__before_atomic();
453
454 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
455 arch_seccomp_spec_mitigate(task);
456 set_task_syscall_work(task, SECCOMP);
457}
458
459#ifdef CONFIG_SECCOMP_FILTER
460
461static int is_ancestor(struct seccomp_filter *parent,
462 struct seccomp_filter *child)
463{
464
465 if (parent == NULL)
466 return 1;
467 for (; child; child = child->prev)
468 if (child == parent)
469 return 1;
470 return 0;
471}
472
473
474
475
476
477
478
479
480
481
482static inline pid_t seccomp_can_sync_threads(void)
483{
484 struct task_struct *thread, *caller;
485
486 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
487 assert_spin_locked(¤t->sighand->siglock);
488
489
490 caller = current;
491 for_each_thread(caller, thread) {
492 pid_t failed;
493
494
495 if (thread == caller)
496 continue;
497
498 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
499 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
500 is_ancestor(thread->seccomp.filter,
501 caller->seccomp.filter)))
502 continue;
503
504
505 failed = task_pid_vnr(thread);
506
507 if (WARN_ON(failed == 0))
508 failed = -ESRCH;
509 return failed;
510 }
511
512 return 0;
513}
514
515static inline void seccomp_filter_free(struct seccomp_filter *filter)
516{
517 if (filter) {
518 bpf_prog_destroy(filter->prog);
519 kfree(filter);
520 }
521}
522
523static void __seccomp_filter_orphan(struct seccomp_filter *orig)
524{
525 while (orig && refcount_dec_and_test(&orig->users)) {
526 if (waitqueue_active(&orig->wqh))
527 wake_up_poll(&orig->wqh, EPOLLHUP);
528 orig = orig->prev;
529 }
530}
531
532static void __put_seccomp_filter(struct seccomp_filter *orig)
533{
534
535 while (orig && refcount_dec_and_test(&orig->refs)) {
536 struct seccomp_filter *freeme = orig;
537 orig = orig->prev;
538 seccomp_filter_free(freeme);
539 }
540}
541
542static void __seccomp_filter_release(struct seccomp_filter *orig)
543{
544
545 __seccomp_filter_orphan(orig);
546
547 __put_seccomp_filter(orig);
548}
549
550
551
552
553
554
555
556
557
558
559void seccomp_filter_release(struct task_struct *tsk)
560{
561 struct seccomp_filter *orig = tsk->seccomp.filter;
562
563
564 WARN_ON(tsk->sighand != NULL);
565
566
567 tsk->seccomp.filter = NULL;
568 __seccomp_filter_release(orig);
569}
570
571
572
573
574
575
576
577
578
579static inline void seccomp_sync_threads(unsigned long flags)
580{
581 struct task_struct *thread, *caller;
582
583 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
584 assert_spin_locked(¤t->sighand->siglock);
585
586
587 caller = current;
588 for_each_thread(caller, thread) {
589
590 if (thread == caller)
591 continue;
592
593
594 get_seccomp_filter(caller);
595
596
597
598
599
600
601 __seccomp_filter_release(thread->seccomp.filter);
602
603
604 smp_store_release(&thread->seccomp.filter,
605 caller->seccomp.filter);
606 atomic_set(&thread->seccomp.filter_count,
607 atomic_read(&caller->seccomp.filter_count));
608
609
610
611
612
613
614
615 if (task_no_new_privs(caller))
616 task_set_no_new_privs(thread);
617
618
619
620
621
622
623
624 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
625 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
626 flags);
627 }
628}
629
630
631
632
633
634
635
636static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
637{
638 struct seccomp_filter *sfilter;
639 int ret;
640 const bool save_orig =
641#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
642 true;
643#else
644 false;
645#endif
646
647 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
648 return ERR_PTR(-EINVAL);
649
650 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
651
652
653
654
655
656
657
658 if (!task_no_new_privs(current) &&
659 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
660 return ERR_PTR(-EACCES);
661
662
663 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
664 if (!sfilter)
665 return ERR_PTR(-ENOMEM);
666
667 mutex_init(&sfilter->notify_lock);
668 ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
669 seccomp_check_filter, save_orig);
670 if (ret < 0) {
671 kfree(sfilter);
672 return ERR_PTR(ret);
673 }
674
675 refcount_set(&sfilter->refs, 1);
676 refcount_set(&sfilter->users, 1);
677 init_waitqueue_head(&sfilter->wqh);
678
679 return sfilter;
680}
681
682
683
684
685
686
687
688static struct seccomp_filter *
689seccomp_prepare_user_filter(const char __user *user_filter)
690{
691 struct sock_fprog fprog;
692 struct seccomp_filter *filter = ERR_PTR(-EFAULT);
693
694#ifdef CONFIG_COMPAT
695 if (in_compat_syscall()) {
696 struct compat_sock_fprog fprog32;
697 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
698 goto out;
699 fprog.len = fprog32.len;
700 fprog.filter = compat_ptr(fprog32.filter);
701 } else
702#endif
703 if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
704 goto out;
705 filter = seccomp_prepare_filter(&fprog);
706out:
707 return filter;
708}
709
710#ifdef SECCOMP_ARCH_NATIVE
711
712
713
714
715
716
717static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
718 struct seccomp_data *sd)
719{
720 unsigned int reg_value = 0;
721 unsigned int pc;
722 bool op_res;
723
724 if (WARN_ON_ONCE(!fprog))
725 return false;
726
727 for (pc = 0; pc < fprog->len; pc++) {
728 struct sock_filter *insn = &fprog->filter[pc];
729 u16 code = insn->code;
730 u32 k = insn->k;
731
732 switch (code) {
733 case BPF_LD | BPF_W | BPF_ABS:
734 switch (k) {
735 case offsetof(struct seccomp_data, nr):
736 reg_value = sd->nr;
737 break;
738 case offsetof(struct seccomp_data, arch):
739 reg_value = sd->arch;
740 break;
741 default:
742
743 return false;
744 }
745 break;
746 case BPF_RET | BPF_K:
747
748 return k == SECCOMP_RET_ALLOW;
749 case BPF_JMP | BPF_JA:
750 pc += insn->k;
751 break;
752 case BPF_JMP | BPF_JEQ | BPF_K:
753 case BPF_JMP | BPF_JGE | BPF_K:
754 case BPF_JMP | BPF_JGT | BPF_K:
755 case BPF_JMP | BPF_JSET | BPF_K:
756 switch (BPF_OP(code)) {
757 case BPF_JEQ:
758 op_res = reg_value == k;
759 break;
760 case BPF_JGE:
761 op_res = reg_value >= k;
762 break;
763 case BPF_JGT:
764 op_res = reg_value > k;
765 break;
766 case BPF_JSET:
767 op_res = !!(reg_value & k);
768 break;
769 default:
770
771 return false;
772 }
773
774 pc += op_res ? insn->jt : insn->jf;
775 break;
776 case BPF_ALU | BPF_AND | BPF_K:
777 reg_value &= k;
778 break;
779 default:
780
781 return false;
782 }
783 }
784
785
786 WARN_ON(1);
787 return false;
788}
789
790static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
791 void *bitmap, const void *bitmap_prev,
792 size_t bitmap_size, int arch)
793{
794 struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
795 struct seccomp_data sd;
796 int nr;
797
798 if (bitmap_prev) {
799
800 bitmap_copy(bitmap, bitmap_prev, bitmap_size);
801 } else {
802
803 bitmap_fill(bitmap, bitmap_size);
804 }
805
806 for (nr = 0; nr < bitmap_size; nr++) {
807
808 if (!test_bit(nr, bitmap))
809 continue;
810
811 sd.nr = nr;
812 sd.arch = arch;
813
814
815 if (seccomp_is_const_allow(fprog, &sd))
816 continue;
817
818
819
820
821
822 __clear_bit(nr, bitmap);
823 }
824}
825
826
827
828
829
830
831
832static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
833{
834 struct action_cache *cache = &sfilter->cache;
835 const struct action_cache *cache_prev =
836 sfilter->prev ? &sfilter->prev->cache : NULL;
837
838 seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
839 cache_prev ? cache_prev->allow_native : NULL,
840 SECCOMP_ARCH_NATIVE_NR,
841 SECCOMP_ARCH_NATIVE);
842
843#ifdef SECCOMP_ARCH_COMPAT
844 seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
845 cache_prev ? cache_prev->allow_compat : NULL,
846 SECCOMP_ARCH_COMPAT_NR,
847 SECCOMP_ARCH_COMPAT);
848#endif
849}
850#endif
851
852
853
854
855
856
857
858
859
860
861
862
863
864static long seccomp_attach_filter(unsigned int flags,
865 struct seccomp_filter *filter)
866{
867 unsigned long total_insns;
868 struct seccomp_filter *walker;
869
870 assert_spin_locked(¤t->sighand->siglock);
871
872
873 total_insns = filter->prog->len;
874 for (walker = current->seccomp.filter; walker; walker = walker->prev)
875 total_insns += walker->prog->len + 4;
876 if (total_insns > MAX_INSNS_PER_PATH)
877 return -ENOMEM;
878
879
880 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
881 int ret;
882
883 ret = seccomp_can_sync_threads();
884 if (ret) {
885 if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
886 return -ESRCH;
887 else
888 return ret;
889 }
890 }
891
892
893 if (flags & SECCOMP_FILTER_FLAG_LOG)
894 filter->log = true;
895
896
897
898
899
900 filter->prev = current->seccomp.filter;
901 seccomp_cache_prepare(filter);
902 current->seccomp.filter = filter;
903 atomic_inc(¤t->seccomp.filter_count);
904
905
906 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
907 seccomp_sync_threads(flags);
908
909 return 0;
910}
911
912static void __get_seccomp_filter(struct seccomp_filter *filter)
913{
914 refcount_inc(&filter->refs);
915}
916
917
918void get_seccomp_filter(struct task_struct *tsk)
919{
920 struct seccomp_filter *orig = tsk->seccomp.filter;
921 if (!orig)
922 return;
923 __get_seccomp_filter(orig);
924 refcount_inc(&orig->users);
925}
926
927#endif
928
929
930#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
931#define SECCOMP_LOG_KILL_THREAD (1 << 1)
932#define SECCOMP_LOG_TRAP (1 << 2)
933#define SECCOMP_LOG_ERRNO (1 << 3)
934#define SECCOMP_LOG_TRACE (1 << 4)
935#define SECCOMP_LOG_LOG (1 << 5)
936#define SECCOMP_LOG_ALLOW (1 << 6)
937#define SECCOMP_LOG_USER_NOTIF (1 << 7)
938
939static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
940 SECCOMP_LOG_KILL_THREAD |
941 SECCOMP_LOG_TRAP |
942 SECCOMP_LOG_ERRNO |
943 SECCOMP_LOG_USER_NOTIF |
944 SECCOMP_LOG_TRACE |
945 SECCOMP_LOG_LOG;
946
947static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
948 bool requested)
949{
950 bool log = false;
951
952 switch (action) {
953 case SECCOMP_RET_ALLOW:
954 break;
955 case SECCOMP_RET_TRAP:
956 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
957 break;
958 case SECCOMP_RET_ERRNO:
959 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
960 break;
961 case SECCOMP_RET_TRACE:
962 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
963 break;
964 case SECCOMP_RET_USER_NOTIF:
965 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
966 break;
967 case SECCOMP_RET_LOG:
968 log = seccomp_actions_logged & SECCOMP_LOG_LOG;
969 break;
970 case SECCOMP_RET_KILL_THREAD:
971 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
972 break;
973 case SECCOMP_RET_KILL_PROCESS:
974 default:
975 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
976 }
977
978
979
980
981
982
983
984 if (!log)
985 return;
986
987 audit_seccomp(syscall, signr, action);
988}
989
990
991
992
993
994
995static const int mode1_syscalls[] = {
996 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
997 -1,
998};
999
1000static void __secure_computing_strict(int this_syscall)
1001{
1002 const int *allowed_syscalls = mode1_syscalls;
1003#ifdef CONFIG_COMPAT
1004 if (in_compat_syscall())
1005 allowed_syscalls = get_compat_mode1_syscalls();
1006#endif
1007 do {
1008 if (*allowed_syscalls == this_syscall)
1009 return;
1010 } while (*++allowed_syscalls != -1);
1011
1012#ifdef SECCOMP_DEBUG
1013 dump_stack();
1014#endif
1015 current->seccomp.mode = SECCOMP_MODE_DEAD;
1016 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1017 do_exit(SIGKILL);
1018}
1019
1020#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1021void secure_computing_strict(int this_syscall)
1022{
1023 int mode = current->seccomp.mode;
1024
1025 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1026 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1027 return;
1028
1029 if (mode == SECCOMP_MODE_DISABLED)
1030 return;
1031 else if (mode == SECCOMP_MODE_STRICT)
1032 __secure_computing_strict(this_syscall);
1033 else
1034 BUG();
1035}
1036#else
1037
1038#ifdef CONFIG_SECCOMP_FILTER
1039static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1040{
1041
1042
1043
1044
1045 lockdep_assert_held(&filter->notify_lock);
1046 return filter->notif->next_id++;
1047}
1048
1049static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
1050{
1051 int fd;
1052
1053
1054
1055
1056
1057 list_del_init(&addfd->list);
1058 if (!addfd->setfd)
1059 fd = receive_fd(addfd->file, addfd->flags);
1060 else
1061 fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1062 addfd->ret = fd;
1063
1064 if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1065
1066 if (fd < 0) {
1067 n->state = SECCOMP_NOTIFY_SENT;
1068 } else {
1069
1070 n->flags = 0;
1071 n->error = 0;
1072 n->val = fd;
1073 }
1074 }
1075
1076
1077
1078
1079
1080 complete(&addfd->completion);
1081}
1082
1083static int seccomp_do_user_notification(int this_syscall,
1084 struct seccomp_filter *match,
1085 const struct seccomp_data *sd)
1086{
1087 int err;
1088 u32 flags = 0;
1089 long ret = 0;
1090 struct seccomp_knotif n = {};
1091 struct seccomp_kaddfd *addfd, *tmp;
1092
1093 mutex_lock(&match->notify_lock);
1094 err = -ENOSYS;
1095 if (!match->notif)
1096 goto out;
1097
1098 n.task = current;
1099 n.state = SECCOMP_NOTIFY_INIT;
1100 n.data = sd;
1101 n.id = seccomp_next_notify_id(match);
1102 init_completion(&n.ready);
1103 list_add(&n.list, &match->notif->notifications);
1104 INIT_LIST_HEAD(&n.addfd);
1105
1106 up(&match->notif->request);
1107 wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1108
1109
1110
1111
1112 do {
1113 mutex_unlock(&match->notify_lock);
1114 err = wait_for_completion_interruptible(&n.ready);
1115 mutex_lock(&match->notify_lock);
1116 if (err != 0)
1117 goto interrupted;
1118
1119 addfd = list_first_entry_or_null(&n.addfd,
1120 struct seccomp_kaddfd, list);
1121
1122 if (addfd)
1123 seccomp_handle_addfd(addfd, &n);
1124
1125 } while (n.state != SECCOMP_NOTIFY_REPLIED);
1126
1127 ret = n.val;
1128 err = n.error;
1129 flags = n.flags;
1130
1131interrupted:
1132
1133 list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1134
1135 addfd->ret = -ESRCH;
1136 list_del_init(&addfd->list);
1137 complete(&addfd->completion);
1138 }
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150 if (match->notif)
1151 list_del(&n.list);
1152out:
1153 mutex_unlock(&match->notify_lock);
1154
1155
1156 if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1157 return 0;
1158
1159 syscall_set_return_value(current, current_pt_regs(),
1160 err, ret);
1161 return -1;
1162}
1163
1164static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1165 const bool recheck_after_trace)
1166{
1167 u32 filter_ret, action;
1168 struct seccomp_filter *match = NULL;
1169 int data;
1170 struct seccomp_data sd_local;
1171
1172
1173
1174
1175
1176 smp_rmb();
1177
1178 if (!sd) {
1179 populate_seccomp_data(&sd_local);
1180 sd = &sd_local;
1181 }
1182
1183 filter_ret = seccomp_run_filters(sd, &match);
1184 data = filter_ret & SECCOMP_RET_DATA;
1185 action = filter_ret & SECCOMP_RET_ACTION_FULL;
1186
1187 switch (action) {
1188 case SECCOMP_RET_ERRNO:
1189
1190 if (data > MAX_ERRNO)
1191 data = MAX_ERRNO;
1192 syscall_set_return_value(current, current_pt_regs(),
1193 -data, 0);
1194 goto skip;
1195
1196 case SECCOMP_RET_TRAP:
1197
1198 syscall_rollback(current, current_pt_regs());
1199
1200 force_sig_seccomp(this_syscall, data, false);
1201 goto skip;
1202
1203 case SECCOMP_RET_TRACE:
1204
1205 if (recheck_after_trace)
1206 return 0;
1207
1208
1209 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1210 syscall_set_return_value(current,
1211 current_pt_regs(),
1212 -ENOSYS, 0);
1213 goto skip;
1214 }
1215
1216
1217 ptrace_event(PTRACE_EVENT_SECCOMP, data);
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 if (fatal_signal_pending(current))
1229 goto skip;
1230
1231 this_syscall = syscall_get_nr(current, current_pt_regs());
1232 if (this_syscall < 0)
1233 goto skip;
1234
1235
1236
1237
1238
1239
1240
1241 if (__seccomp_filter(this_syscall, NULL, true))
1242 return -1;
1243
1244 return 0;
1245
1246 case SECCOMP_RET_USER_NOTIF:
1247 if (seccomp_do_user_notification(this_syscall, match, sd))
1248 goto skip;
1249
1250 return 0;
1251
1252 case SECCOMP_RET_LOG:
1253 seccomp_log(this_syscall, 0, action, true);
1254 return 0;
1255
1256 case SECCOMP_RET_ALLOW:
1257
1258
1259
1260
1261
1262 return 0;
1263
1264 case SECCOMP_RET_KILL_THREAD:
1265 case SECCOMP_RET_KILL_PROCESS:
1266 default:
1267 current->seccomp.mode = SECCOMP_MODE_DEAD;
1268 seccomp_log(this_syscall, SIGSYS, action, true);
1269
1270 if (action != SECCOMP_RET_KILL_THREAD ||
1271 (atomic_read(¤t->signal->live) == 1)) {
1272
1273 syscall_rollback(current, current_pt_regs());
1274
1275 force_sig_seccomp(this_syscall, data, true);
1276 } else {
1277 do_exit(SIGSYS);
1278 }
1279 return -1;
1280 }
1281
1282 unreachable();
1283
1284skip:
1285 seccomp_log(this_syscall, 0, action, match ? match->log : false);
1286 return -1;
1287}
1288#else
1289static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1290 const bool recheck_after_trace)
1291{
1292 BUG();
1293
1294 return -1;
1295}
1296#endif
1297
1298int __secure_computing(const struct seccomp_data *sd)
1299{
1300 int mode = current->seccomp.mode;
1301 int this_syscall;
1302
1303 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1304 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1305 return 0;
1306
1307 this_syscall = sd ? sd->nr :
1308 syscall_get_nr(current, current_pt_regs());
1309
1310 switch (mode) {
1311 case SECCOMP_MODE_STRICT:
1312 __secure_computing_strict(this_syscall);
1313 return 0;
1314 case SECCOMP_MODE_FILTER:
1315 return __seccomp_filter(this_syscall, sd, false);
1316
1317 case SECCOMP_MODE_DEAD:
1318 WARN_ON_ONCE(1);
1319 do_exit(SIGKILL);
1320 return -1;
1321 default:
1322 BUG();
1323 }
1324}
1325#endif
1326
1327long prctl_get_seccomp(void)
1328{
1329 return current->seccomp.mode;
1330}
1331
1332
1333
1334
1335
1336
1337
1338
1339static long seccomp_set_mode_strict(void)
1340{
1341 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1342 long ret = -EINVAL;
1343
1344 spin_lock_irq(¤t->sighand->siglock);
1345
1346 if (!seccomp_may_assign_mode(seccomp_mode))
1347 goto out;
1348
1349#ifdef TIF_NOTSC
1350 disable_TSC();
1351#endif
1352 seccomp_assign_mode(current, seccomp_mode, 0);
1353 ret = 0;
1354
1355out:
1356 spin_unlock_irq(¤t->sighand->siglock);
1357
1358 return ret;
1359}
1360
1361#ifdef CONFIG_SECCOMP_FILTER
1362static void seccomp_notify_free(struct seccomp_filter *filter)
1363{
1364 kfree(filter->notif);
1365 filter->notif = NULL;
1366}
1367
1368static void seccomp_notify_detach(struct seccomp_filter *filter)
1369{
1370 struct seccomp_knotif *knotif;
1371
1372 if (!filter)
1373 return;
1374
1375 mutex_lock(&filter->notify_lock);
1376
1377
1378
1379
1380
1381 list_for_each_entry(knotif, &filter->notif->notifications, list) {
1382 if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1383 continue;
1384
1385 knotif->state = SECCOMP_NOTIFY_REPLIED;
1386 knotif->error = -ENOSYS;
1387 knotif->val = 0;
1388
1389
1390
1391
1392
1393
1394 complete(&knotif->ready);
1395 }
1396
1397 seccomp_notify_free(filter);
1398 mutex_unlock(&filter->notify_lock);
1399}
1400
1401static int seccomp_notify_release(struct inode *inode, struct file *file)
1402{
1403 struct seccomp_filter *filter = file->private_data;
1404
1405 seccomp_notify_detach(filter);
1406 __put_seccomp_filter(filter);
1407 return 0;
1408}
1409
1410
1411static inline struct seccomp_knotif *
1412find_notification(struct seccomp_filter *filter, u64 id)
1413{
1414 struct seccomp_knotif *cur;
1415
1416 lockdep_assert_held(&filter->notify_lock);
1417
1418 list_for_each_entry(cur, &filter->notif->notifications, list) {
1419 if (cur->id == id)
1420 return cur;
1421 }
1422
1423 return NULL;
1424}
1425
1426
1427static long seccomp_notify_recv(struct seccomp_filter *filter,
1428 void __user *buf)
1429{
1430 struct seccomp_knotif *knotif = NULL, *cur;
1431 struct seccomp_notif unotif;
1432 ssize_t ret;
1433
1434
1435 ret = check_zeroed_user(buf, sizeof(unotif));
1436 if (ret < 0)
1437 return ret;
1438 if (!ret)
1439 return -EINVAL;
1440
1441 memset(&unotif, 0, sizeof(unotif));
1442
1443 ret = down_interruptible(&filter->notif->request);
1444 if (ret < 0)
1445 return ret;
1446
1447 mutex_lock(&filter->notify_lock);
1448 list_for_each_entry(cur, &filter->notif->notifications, list) {
1449 if (cur->state == SECCOMP_NOTIFY_INIT) {
1450 knotif = cur;
1451 break;
1452 }
1453 }
1454
1455
1456
1457
1458
1459
1460 if (!knotif) {
1461 ret = -ENOENT;
1462 goto out;
1463 }
1464
1465 unotif.id = knotif->id;
1466 unotif.pid = task_pid_vnr(knotif->task);
1467 unotif.data = *(knotif->data);
1468
1469 knotif->state = SECCOMP_NOTIFY_SENT;
1470 wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1471 ret = 0;
1472out:
1473 mutex_unlock(&filter->notify_lock);
1474
1475 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1476 ret = -EFAULT;
1477
1478
1479
1480
1481
1482
1483
1484 mutex_lock(&filter->notify_lock);
1485 knotif = find_notification(filter, unotif.id);
1486 if (knotif) {
1487 knotif->state = SECCOMP_NOTIFY_INIT;
1488 up(&filter->notif->request);
1489 }
1490 mutex_unlock(&filter->notify_lock);
1491 }
1492
1493 return ret;
1494}
1495
1496static long seccomp_notify_send(struct seccomp_filter *filter,
1497 void __user *buf)
1498{
1499 struct seccomp_notif_resp resp = {};
1500 struct seccomp_knotif *knotif;
1501 long ret;
1502
1503 if (copy_from_user(&resp, buf, sizeof(resp)))
1504 return -EFAULT;
1505
1506 if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1507 return -EINVAL;
1508
1509 if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1510 (resp.error || resp.val))
1511 return -EINVAL;
1512
1513 ret = mutex_lock_interruptible(&filter->notify_lock);
1514 if (ret < 0)
1515 return ret;
1516
1517 knotif = find_notification(filter, resp.id);
1518 if (!knotif) {
1519 ret = -ENOENT;
1520 goto out;
1521 }
1522
1523
1524 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1525 ret = -EINPROGRESS;
1526 goto out;
1527 }
1528
1529 ret = 0;
1530 knotif->state = SECCOMP_NOTIFY_REPLIED;
1531 knotif->error = resp.error;
1532 knotif->val = resp.val;
1533 knotif->flags = resp.flags;
1534 complete(&knotif->ready);
1535out:
1536 mutex_unlock(&filter->notify_lock);
1537 return ret;
1538}
1539
1540static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1541 void __user *buf)
1542{
1543 struct seccomp_knotif *knotif;
1544 u64 id;
1545 long ret;
1546
1547 if (copy_from_user(&id, buf, sizeof(id)))
1548 return -EFAULT;
1549
1550 ret = mutex_lock_interruptible(&filter->notify_lock);
1551 if (ret < 0)
1552 return ret;
1553
1554 knotif = find_notification(filter, id);
1555 if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1556 ret = 0;
1557 else
1558 ret = -ENOENT;
1559
1560 mutex_unlock(&filter->notify_lock);
1561 return ret;
1562}
1563
1564static long seccomp_notify_addfd(struct seccomp_filter *filter,
1565 struct seccomp_notif_addfd __user *uaddfd,
1566 unsigned int size)
1567{
1568 struct seccomp_notif_addfd addfd;
1569 struct seccomp_knotif *knotif;
1570 struct seccomp_kaddfd kaddfd;
1571 int ret;
1572
1573 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1574 BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1575
1576 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1577 return -EINVAL;
1578
1579 ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1580 if (ret)
1581 return ret;
1582
1583 if (addfd.newfd_flags & ~O_CLOEXEC)
1584 return -EINVAL;
1585
1586 if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
1587 return -EINVAL;
1588
1589 if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1590 return -EINVAL;
1591
1592 kaddfd.file = fget(addfd.srcfd);
1593 if (!kaddfd.file)
1594 return -EBADF;
1595
1596 kaddfd.ioctl_flags = addfd.flags;
1597 kaddfd.flags = addfd.newfd_flags;
1598 kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1599 kaddfd.fd = addfd.newfd;
1600 init_completion(&kaddfd.completion);
1601
1602 ret = mutex_lock_interruptible(&filter->notify_lock);
1603 if (ret < 0)
1604 goto out;
1605
1606 knotif = find_notification(filter, addfd.id);
1607 if (!knotif) {
1608 ret = -ENOENT;
1609 goto out_unlock;
1610 }
1611
1612
1613
1614
1615
1616
1617 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1618 ret = -EINPROGRESS;
1619 goto out_unlock;
1620 }
1621
1622 if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1623
1624
1625
1626
1627
1628
1629
1630 if (!list_empty(&knotif->addfd)) {
1631 ret = -EBUSY;
1632 goto out_unlock;
1633 }
1634
1635
1636 knotif->state = SECCOMP_NOTIFY_REPLIED;
1637 }
1638
1639 list_add(&kaddfd.list, &knotif->addfd);
1640 complete(&knotif->ready);
1641 mutex_unlock(&filter->notify_lock);
1642
1643
1644 ret = wait_for_completion_interruptible(&kaddfd.completion);
1645 if (ret == 0) {
1646
1647
1648
1649
1650
1651
1652
1653 ret = kaddfd.ret;
1654 goto out;
1655 }
1656
1657 mutex_lock(&filter->notify_lock);
1658
1659
1660
1661
1662
1663
1664
1665 if (list_empty(&kaddfd.list))
1666 ret = kaddfd.ret;
1667 else
1668 list_del(&kaddfd.list);
1669
1670out_unlock:
1671 mutex_unlock(&filter->notify_lock);
1672out:
1673 fput(kaddfd.file);
1674
1675 return ret;
1676}
1677
1678static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1679 unsigned long arg)
1680{
1681 struct seccomp_filter *filter = file->private_data;
1682 void __user *buf = (void __user *)arg;
1683
1684
1685 switch (cmd) {
1686 case SECCOMP_IOCTL_NOTIF_RECV:
1687 return seccomp_notify_recv(filter, buf);
1688 case SECCOMP_IOCTL_NOTIF_SEND:
1689 return seccomp_notify_send(filter, buf);
1690 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1691 case SECCOMP_IOCTL_NOTIF_ID_VALID:
1692 return seccomp_notify_id_valid(filter, buf);
1693 }
1694
1695
1696#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1697 switch (EA_IOCTL(cmd)) {
1698 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1699 return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1700 default:
1701 return -EINVAL;
1702 }
1703}
1704
1705static __poll_t seccomp_notify_poll(struct file *file,
1706 struct poll_table_struct *poll_tab)
1707{
1708 struct seccomp_filter *filter = file->private_data;
1709 __poll_t ret = 0;
1710 struct seccomp_knotif *cur;
1711
1712 poll_wait(file, &filter->wqh, poll_tab);
1713
1714 if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1715 return EPOLLERR;
1716
1717 list_for_each_entry(cur, &filter->notif->notifications, list) {
1718 if (cur->state == SECCOMP_NOTIFY_INIT)
1719 ret |= EPOLLIN | EPOLLRDNORM;
1720 if (cur->state == SECCOMP_NOTIFY_SENT)
1721 ret |= EPOLLOUT | EPOLLWRNORM;
1722 if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1723 break;
1724 }
1725
1726 mutex_unlock(&filter->notify_lock);
1727
1728 if (refcount_read(&filter->users) == 0)
1729 ret |= EPOLLHUP;
1730
1731 return ret;
1732}
1733
1734static const struct file_operations seccomp_notify_ops = {
1735 .poll = seccomp_notify_poll,
1736 .release = seccomp_notify_release,
1737 .unlocked_ioctl = seccomp_notify_ioctl,
1738 .compat_ioctl = seccomp_notify_ioctl,
1739};
1740
1741static struct file *init_listener(struct seccomp_filter *filter)
1742{
1743 struct file *ret;
1744
1745 ret = ERR_PTR(-ENOMEM);
1746 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1747 if (!filter->notif)
1748 goto out;
1749
1750 sema_init(&filter->notif->request, 0);
1751 filter->notif->next_id = get_random_u64();
1752 INIT_LIST_HEAD(&filter->notif->notifications);
1753
1754 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1755 filter, O_RDWR);
1756 if (IS_ERR(ret))
1757 goto out_notif;
1758
1759
1760 __get_seccomp_filter(filter);
1761
1762out_notif:
1763 if (IS_ERR(ret))
1764 seccomp_notify_free(filter);
1765out:
1766 return ret;
1767}
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777static bool has_duplicate_listener(struct seccomp_filter *new_child)
1778{
1779 struct seccomp_filter *cur;
1780
1781
1782 lockdep_assert_held(¤t->sighand->siglock);
1783
1784 if (!new_child->notif)
1785 return false;
1786 for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1787 if (cur->notif)
1788 return true;
1789 }
1790
1791 return false;
1792}
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807static long seccomp_set_mode_filter(unsigned int flags,
1808 const char __user *filter)
1809{
1810 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1811 struct seccomp_filter *prepared = NULL;
1812 long ret = -EINVAL;
1813 int listener = -1;
1814 struct file *listener_f = NULL;
1815
1816
1817 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1818 return -EINVAL;
1819
1820
1821
1822
1823
1824
1825
1826
1827 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1828 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1829 ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1830 return -EINVAL;
1831
1832
1833 prepared = seccomp_prepare_user_filter(filter);
1834 if (IS_ERR(prepared))
1835 return PTR_ERR(prepared);
1836
1837 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1838 listener = get_unused_fd_flags(O_CLOEXEC);
1839 if (listener < 0) {
1840 ret = listener;
1841 goto out_free;
1842 }
1843
1844 listener_f = init_listener(prepared);
1845 if (IS_ERR(listener_f)) {
1846 put_unused_fd(listener);
1847 ret = PTR_ERR(listener_f);
1848 goto out_free;
1849 }
1850 }
1851
1852
1853
1854
1855
1856 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1857 mutex_lock_killable(¤t->signal->cred_guard_mutex))
1858 goto out_put_fd;
1859
1860 spin_lock_irq(¤t->sighand->siglock);
1861
1862 if (!seccomp_may_assign_mode(seccomp_mode))
1863 goto out;
1864
1865 if (has_duplicate_listener(prepared)) {
1866 ret = -EBUSY;
1867 goto out;
1868 }
1869
1870 ret = seccomp_attach_filter(flags, prepared);
1871 if (ret)
1872 goto out;
1873
1874 prepared = NULL;
1875
1876 seccomp_assign_mode(current, seccomp_mode, flags);
1877out:
1878 spin_unlock_irq(¤t->sighand->siglock);
1879 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1880 mutex_unlock(¤t->signal->cred_guard_mutex);
1881out_put_fd:
1882 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1883 if (ret) {
1884 listener_f->private_data = NULL;
1885 fput(listener_f);
1886 put_unused_fd(listener);
1887 seccomp_notify_detach(prepared);
1888 } else {
1889 fd_install(listener, listener_f);
1890 ret = listener;
1891 }
1892 }
1893out_free:
1894 seccomp_filter_free(prepared);
1895 return ret;
1896}
1897#else
1898static inline long seccomp_set_mode_filter(unsigned int flags,
1899 const char __user *filter)
1900{
1901 return -EINVAL;
1902}
1903#endif
1904
1905static long seccomp_get_action_avail(const char __user *uaction)
1906{
1907 u32 action;
1908
1909 if (copy_from_user(&action, uaction, sizeof(action)))
1910 return -EFAULT;
1911
1912 switch (action) {
1913 case SECCOMP_RET_KILL_PROCESS:
1914 case SECCOMP_RET_KILL_THREAD:
1915 case SECCOMP_RET_TRAP:
1916 case SECCOMP_RET_ERRNO:
1917 case SECCOMP_RET_USER_NOTIF:
1918 case SECCOMP_RET_TRACE:
1919 case SECCOMP_RET_LOG:
1920 case SECCOMP_RET_ALLOW:
1921 break;
1922 default:
1923 return -EOPNOTSUPP;
1924 }
1925
1926 return 0;
1927}
1928
1929static long seccomp_get_notif_sizes(void __user *usizes)
1930{
1931 struct seccomp_notif_sizes sizes = {
1932 .seccomp_notif = sizeof(struct seccomp_notif),
1933 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1934 .seccomp_data = sizeof(struct seccomp_data),
1935 };
1936
1937 if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1938 return -EFAULT;
1939
1940 return 0;
1941}
1942
1943
1944static long do_seccomp(unsigned int op, unsigned int flags,
1945 void __user *uargs)
1946{
1947 switch (op) {
1948 case SECCOMP_SET_MODE_STRICT:
1949 if (flags != 0 || uargs != NULL)
1950 return -EINVAL;
1951 return seccomp_set_mode_strict();
1952 case SECCOMP_SET_MODE_FILTER:
1953 return seccomp_set_mode_filter(flags, uargs);
1954 case SECCOMP_GET_ACTION_AVAIL:
1955 if (flags != 0)
1956 return -EINVAL;
1957
1958 return seccomp_get_action_avail(uargs);
1959 case SECCOMP_GET_NOTIF_SIZES:
1960 if (flags != 0)
1961 return -EINVAL;
1962
1963 return seccomp_get_notif_sizes(uargs);
1964 default:
1965 return -EINVAL;
1966 }
1967}
1968
1969SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
1970 void __user *, uargs)
1971{
1972 return do_seccomp(op, flags, uargs);
1973}
1974
1975
1976
1977
1978
1979
1980
1981
1982long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
1983{
1984 unsigned int op;
1985 void __user *uargs;
1986
1987 switch (seccomp_mode) {
1988 case SECCOMP_MODE_STRICT:
1989 op = SECCOMP_SET_MODE_STRICT;
1990
1991
1992
1993
1994
1995 uargs = NULL;
1996 break;
1997 case SECCOMP_MODE_FILTER:
1998 op = SECCOMP_SET_MODE_FILTER;
1999 uargs = filter;
2000 break;
2001 default:
2002 return -EINVAL;
2003 }
2004
2005
2006 return do_seccomp(op, 0, uargs);
2007}
2008
2009#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
2010static struct seccomp_filter *get_nth_filter(struct task_struct *task,
2011 unsigned long filter_off)
2012{
2013 struct seccomp_filter *orig, *filter;
2014 unsigned long count;
2015
2016
2017
2018
2019
2020 spin_lock_irq(&task->sighand->siglock);
2021
2022 if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2023 spin_unlock_irq(&task->sighand->siglock);
2024 return ERR_PTR(-EINVAL);
2025 }
2026
2027 orig = task->seccomp.filter;
2028 __get_seccomp_filter(orig);
2029 spin_unlock_irq(&task->sighand->siglock);
2030
2031 count = 0;
2032 for (filter = orig; filter; filter = filter->prev)
2033 count++;
2034
2035 if (filter_off >= count) {
2036 filter = ERR_PTR(-ENOENT);
2037 goto out;
2038 }
2039
2040 count -= filter_off;
2041 for (filter = orig; filter && count > 1; filter = filter->prev)
2042 count--;
2043
2044 if (WARN_ON(count != 1 || !filter)) {
2045 filter = ERR_PTR(-ENOENT);
2046 goto out;
2047 }
2048
2049 __get_seccomp_filter(filter);
2050
2051out:
2052 __put_seccomp_filter(orig);
2053 return filter;
2054}
2055
2056long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2057 void __user *data)
2058{
2059 struct seccomp_filter *filter;
2060 struct sock_fprog_kern *fprog;
2061 long ret;
2062
2063 if (!capable(CAP_SYS_ADMIN) ||
2064 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2065 return -EACCES;
2066 }
2067
2068 filter = get_nth_filter(task, filter_off);
2069 if (IS_ERR(filter))
2070 return PTR_ERR(filter);
2071
2072 fprog = filter->prog->orig_prog;
2073 if (!fprog) {
2074
2075
2076
2077
2078 ret = -EMEDIUMTYPE;
2079 goto out;
2080 }
2081
2082 ret = fprog->len;
2083 if (!data)
2084 goto out;
2085
2086 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2087 ret = -EFAULT;
2088
2089out:
2090 __put_seccomp_filter(filter);
2091 return ret;
2092}
2093
2094long seccomp_get_metadata(struct task_struct *task,
2095 unsigned long size, void __user *data)
2096{
2097 long ret;
2098 struct seccomp_filter *filter;
2099 struct seccomp_metadata kmd = {};
2100
2101 if (!capable(CAP_SYS_ADMIN) ||
2102 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2103 return -EACCES;
2104 }
2105
2106 size = min_t(unsigned long, size, sizeof(kmd));
2107
2108 if (size < sizeof(kmd.filter_off))
2109 return -EINVAL;
2110
2111 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2112 return -EFAULT;
2113
2114 filter = get_nth_filter(task, kmd.filter_off);
2115 if (IS_ERR(filter))
2116 return PTR_ERR(filter);
2117
2118 if (filter->log)
2119 kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2120
2121 ret = size;
2122 if (copy_to_user(data, &kmd, size))
2123 ret = -EFAULT;
2124
2125 __put_seccomp_filter(filter);
2126 return ret;
2127}
2128#endif
2129
2130#ifdef CONFIG_SYSCTL
2131
2132
2133#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2134#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2135#define SECCOMP_RET_TRAP_NAME "trap"
2136#define SECCOMP_RET_ERRNO_NAME "errno"
2137#define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2138#define SECCOMP_RET_TRACE_NAME "trace"
2139#define SECCOMP_RET_LOG_NAME "log"
2140#define SECCOMP_RET_ALLOW_NAME "allow"
2141
2142static const char seccomp_actions_avail[] =
2143 SECCOMP_RET_KILL_PROCESS_NAME " "
2144 SECCOMP_RET_KILL_THREAD_NAME " "
2145 SECCOMP_RET_TRAP_NAME " "
2146 SECCOMP_RET_ERRNO_NAME " "
2147 SECCOMP_RET_USER_NOTIF_NAME " "
2148 SECCOMP_RET_TRACE_NAME " "
2149 SECCOMP_RET_LOG_NAME " "
2150 SECCOMP_RET_ALLOW_NAME;
2151
2152struct seccomp_log_name {
2153 u32 log;
2154 const char *name;
2155};
2156
2157static const struct seccomp_log_name seccomp_log_names[] = {
2158 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2159 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2160 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2161 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2162 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2163 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2164 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2165 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2166 { }
2167};
2168
2169static bool seccomp_names_from_actions_logged(char *names, size_t size,
2170 u32 actions_logged,
2171 const char *sep)
2172{
2173 const struct seccomp_log_name *cur;
2174 bool append_sep = false;
2175
2176 for (cur = seccomp_log_names; cur->name && size; cur++) {
2177 ssize_t ret;
2178
2179 if (!(actions_logged & cur->log))
2180 continue;
2181
2182 if (append_sep) {
2183 ret = strscpy(names, sep, size);
2184 if (ret < 0)
2185 return false;
2186
2187 names += ret;
2188 size -= ret;
2189 } else
2190 append_sep = true;
2191
2192 ret = strscpy(names, cur->name, size);
2193 if (ret < 0)
2194 return false;
2195
2196 names += ret;
2197 size -= ret;
2198 }
2199
2200 return true;
2201}
2202
2203static bool seccomp_action_logged_from_name(u32 *action_logged,
2204 const char *name)
2205{
2206 const struct seccomp_log_name *cur;
2207
2208 for (cur = seccomp_log_names; cur->name; cur++) {
2209 if (!strcmp(cur->name, name)) {
2210 *action_logged = cur->log;
2211 return true;
2212 }
2213 }
2214
2215 return false;
2216}
2217
2218static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2219{
2220 char *name;
2221
2222 *actions_logged = 0;
2223 while ((name = strsep(&names, " ")) && *name) {
2224 u32 action_logged = 0;
2225
2226 if (!seccomp_action_logged_from_name(&action_logged, name))
2227 return false;
2228
2229 *actions_logged |= action_logged;
2230 }
2231
2232 return true;
2233}
2234
2235static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2236 size_t *lenp, loff_t *ppos)
2237{
2238 char names[sizeof(seccomp_actions_avail)];
2239 struct ctl_table table;
2240
2241 memset(names, 0, sizeof(names));
2242
2243 if (!seccomp_names_from_actions_logged(names, sizeof(names),
2244 seccomp_actions_logged, " "))
2245 return -EINVAL;
2246
2247 table = *ro_table;
2248 table.data = names;
2249 table.maxlen = sizeof(names);
2250 return proc_dostring(&table, 0, buffer, lenp, ppos);
2251}
2252
2253static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2254 size_t *lenp, loff_t *ppos, u32 *actions_logged)
2255{
2256 char names[sizeof(seccomp_actions_avail)];
2257 struct ctl_table table;
2258 int ret;
2259
2260 if (!capable(CAP_SYS_ADMIN))
2261 return -EPERM;
2262
2263 memset(names, 0, sizeof(names));
2264
2265 table = *ro_table;
2266 table.data = names;
2267 table.maxlen = sizeof(names);
2268 ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2269 if (ret)
2270 return ret;
2271
2272 if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2273 return -EINVAL;
2274
2275 if (*actions_logged & SECCOMP_LOG_ALLOW)
2276 return -EINVAL;
2277
2278 seccomp_actions_logged = *actions_logged;
2279 return 0;
2280}
2281
2282static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2283 int ret)
2284{
2285 char names[sizeof(seccomp_actions_avail)];
2286 char old_names[sizeof(seccomp_actions_avail)];
2287 const char *new = names;
2288 const char *old = old_names;
2289
2290 if (!audit_enabled)
2291 return;
2292
2293 memset(names, 0, sizeof(names));
2294 memset(old_names, 0, sizeof(old_names));
2295
2296 if (ret)
2297 new = "?";
2298 else if (!actions_logged)
2299 new = "(none)";
2300 else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2301 actions_logged, ","))
2302 new = "?";
2303
2304 if (!old_actions_logged)
2305 old = "(none)";
2306 else if (!seccomp_names_from_actions_logged(old_names,
2307 sizeof(old_names),
2308 old_actions_logged, ","))
2309 old = "?";
2310
2311 return audit_seccomp_actions_logged(new, old, !ret);
2312}
2313
2314static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2315 void *buffer, size_t *lenp,
2316 loff_t *ppos)
2317{
2318 int ret;
2319
2320 if (write) {
2321 u32 actions_logged = 0;
2322 u32 old_actions_logged = seccomp_actions_logged;
2323
2324 ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2325 &actions_logged);
2326 audit_actions_logged(actions_logged, old_actions_logged, ret);
2327 } else
2328 ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2329
2330 return ret;
2331}
2332
2333static struct ctl_path seccomp_sysctl_path[] = {
2334 { .procname = "kernel", },
2335 { .procname = "seccomp", },
2336 { }
2337};
2338
2339static struct ctl_table seccomp_sysctl_table[] = {
2340 {
2341 .procname = "actions_avail",
2342 .data = (void *) &seccomp_actions_avail,
2343 .maxlen = sizeof(seccomp_actions_avail),
2344 .mode = 0444,
2345 .proc_handler = proc_dostring,
2346 },
2347 {
2348 .procname = "actions_logged",
2349 .mode = 0644,
2350 .proc_handler = seccomp_actions_logged_handler,
2351 },
2352 { }
2353};
2354
2355static int __init seccomp_sysctl_init(void)
2356{
2357 struct ctl_table_header *hdr;
2358
2359 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2360 if (!hdr)
2361 pr_warn("sysctl registration failed\n");
2362 else
2363 kmemleak_not_leak(hdr);
2364
2365 return 0;
2366}
2367
2368device_initcall(seccomp_sysctl_init)
2369
2370#endif
2371
2372#ifdef CONFIG_SECCOMP_CACHE_DEBUG
2373
2374static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2375 const void *bitmap, size_t bitmap_size)
2376{
2377 int nr;
2378
2379 for (nr = 0; nr < bitmap_size; nr++) {
2380 bool cached = test_bit(nr, bitmap);
2381 char *status = cached ? "ALLOW" : "FILTER";
2382
2383 seq_printf(m, "%s %d %s\n", name, nr, status);
2384 }
2385}
2386
2387int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2388 struct pid *pid, struct task_struct *task)
2389{
2390 struct seccomp_filter *f;
2391 unsigned long flags;
2392
2393
2394
2395
2396
2397 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2398 return -EACCES;
2399
2400 if (!lock_task_sighand(task, &flags))
2401 return -ESRCH;
2402
2403 f = READ_ONCE(task->seccomp.filter);
2404 if (!f) {
2405 unlock_task_sighand(task, &flags);
2406 return 0;
2407 }
2408
2409
2410 __get_seccomp_filter(f);
2411 unlock_task_sighand(task, &flags);
2412
2413 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2414 f->cache.allow_native,
2415 SECCOMP_ARCH_NATIVE_NR);
2416
2417#ifdef SECCOMP_ARCH_COMPAT
2418 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2419 f->cache.allow_compat,
2420 SECCOMP_ARCH_COMPAT_NR);
2421#endif
2422
2423 __put_seccomp_filter(f);
2424 return 0;
2425}
2426#endif
2427