1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#define pr_fmt(fmt) "seccomp: " fmt
17
18#include <linux/refcount.h>
19#include <linux/audit.h>
20#include <linux/compat.h>
21#include <linux/coredump.h>
22#include <linux/kmemleak.h>
23#include <linux/nospec.h>
24#include <linux/prctl.h>
25#include <linux/sched.h>
26#include <linux/sched/task_stack.h>
27#include <linux/seccomp.h>
28#include <linux/slab.h>
29#include <linux/syscalls.h>
30#include <linux/sysctl.h>
31
32#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
33#include <asm/syscall.h>
34#endif
35
36#ifdef CONFIG_SECCOMP_FILTER
37#include <linux/file.h>
38#include <linux/filter.h>
39#include <linux/pid.h>
40#include <linux/ptrace.h>
41#include <linux/capability.h>
42#include <linux/tracehook.h>
43#include <linux/uaccess.h>
44#include <linux/anon_inodes.h>
45#include <linux/lockdep.h>
46
47
48
49
50
51
52
53#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
54
55enum notify_state {
56 SECCOMP_NOTIFY_INIT,
57 SECCOMP_NOTIFY_SENT,
58 SECCOMP_NOTIFY_REPLIED,
59};
60
61struct seccomp_knotif {
62
63 struct task_struct *task;
64
65
66 u64 id;
67
68
69
70
71
72
73 const struct seccomp_data *data;
74
75
76
77
78
79
80
81
82
83 enum notify_state state;
84
85
86 int error;
87 long val;
88 u32 flags;
89
90
91
92
93
94 struct completion ready;
95
96 struct list_head list;
97
98
99 struct list_head addfd;
100};
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118struct seccomp_kaddfd {
119 struct file *file;
120 int fd;
121 unsigned int flags;
122 __u32 ioctl_flags;
123
124 union {
125 bool setfd;
126
127 int ret;
128 };
129 struct completion completion;
130 struct list_head list;
131};
132
133
134
135
136
137
138
139
140
141
142
143
144
145struct notification {
146 struct semaphore request;
147 u64 next_id;
148 struct list_head notifications;
149};
150
151#ifdef SECCOMP_ARCH_NATIVE
152
153
154
155
156
157
158
159
160
161
162
163struct action_cache {
164 DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
165#ifdef SECCOMP_ARCH_COMPAT
166 DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
167#endif
168};
169#else
170struct action_cache { };
171
172static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
173 const struct seccomp_data *sd)
174{
175 return false;
176}
177
178static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
179{
180}
181#endif
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217struct seccomp_filter {
218 refcount_t refs;
219 refcount_t users;
220 bool log;
221 struct action_cache cache;
222 struct seccomp_filter *prev;
223 struct bpf_prog *prog;
224 struct notification *notif;
225 struct mutex notify_lock;
226 wait_queue_head_t wqh;
227};
228
229
230#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
231
232
233
234
235
236static void populate_seccomp_data(struct seccomp_data *sd)
237{
238
239
240
241
242 struct task_struct *task = current;
243 struct pt_regs *regs = task_pt_regs(task);
244 unsigned long args[6];
245
246 sd->nr = syscall_get_nr(task, regs);
247 sd->arch = syscall_get_arch(task);
248 syscall_get_arguments(task, regs, args);
249 sd->args[0] = args[0];
250 sd->args[1] = args[1];
251 sd->args[2] = args[2];
252 sd->args[3] = args[3];
253 sd->args[4] = args[4];
254 sd->args[5] = args[5];
255 sd->instruction_pointer = KSTK_EIP(task);
256}
257
258
259
260
261
262
263
264
265
266
267
268
269
270static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
271{
272 int pc;
273 for (pc = 0; pc < flen; pc++) {
274 struct sock_filter *ftest = &filter[pc];
275 u16 code = ftest->code;
276 u32 k = ftest->k;
277
278 switch (code) {
279 case BPF_LD | BPF_W | BPF_ABS:
280 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
281
282 if (k >= sizeof(struct seccomp_data) || k & 3)
283 return -EINVAL;
284 continue;
285 case BPF_LD | BPF_W | BPF_LEN:
286 ftest->code = BPF_LD | BPF_IMM;
287 ftest->k = sizeof(struct seccomp_data);
288 continue;
289 case BPF_LDX | BPF_W | BPF_LEN:
290 ftest->code = BPF_LDX | BPF_IMM;
291 ftest->k = sizeof(struct seccomp_data);
292 continue;
293
294 case BPF_RET | BPF_K:
295 case BPF_RET | BPF_A:
296 case BPF_ALU | BPF_ADD | BPF_K:
297 case BPF_ALU | BPF_ADD | BPF_X:
298 case BPF_ALU | BPF_SUB | BPF_K:
299 case BPF_ALU | BPF_SUB | BPF_X:
300 case BPF_ALU | BPF_MUL | BPF_K:
301 case BPF_ALU | BPF_MUL | BPF_X:
302 case BPF_ALU | BPF_DIV | BPF_K:
303 case BPF_ALU | BPF_DIV | BPF_X:
304 case BPF_ALU | BPF_AND | BPF_K:
305 case BPF_ALU | BPF_AND | BPF_X:
306 case BPF_ALU | BPF_OR | BPF_K:
307 case BPF_ALU | BPF_OR | BPF_X:
308 case BPF_ALU | BPF_XOR | BPF_K:
309 case BPF_ALU | BPF_XOR | BPF_X:
310 case BPF_ALU | BPF_LSH | BPF_K:
311 case BPF_ALU | BPF_LSH | BPF_X:
312 case BPF_ALU | BPF_RSH | BPF_K:
313 case BPF_ALU | BPF_RSH | BPF_X:
314 case BPF_ALU | BPF_NEG:
315 case BPF_LD | BPF_IMM:
316 case BPF_LDX | BPF_IMM:
317 case BPF_MISC | BPF_TAX:
318 case BPF_MISC | BPF_TXA:
319 case BPF_LD | BPF_MEM:
320 case BPF_LDX | BPF_MEM:
321 case BPF_ST:
322 case BPF_STX:
323 case BPF_JMP | BPF_JA:
324 case BPF_JMP | BPF_JEQ | BPF_K:
325 case BPF_JMP | BPF_JEQ | BPF_X:
326 case BPF_JMP | BPF_JGE | BPF_K:
327 case BPF_JMP | BPF_JGE | BPF_X:
328 case BPF_JMP | BPF_JGT | BPF_K:
329 case BPF_JMP | BPF_JGT | BPF_X:
330 case BPF_JMP | BPF_JSET | BPF_K:
331 case BPF_JMP | BPF_JSET | BPF_X:
332 continue;
333 default:
334 return -EINVAL;
335 }
336 }
337 return 0;
338}
339
340#ifdef SECCOMP_ARCH_NATIVE
341static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
342 size_t bitmap_size,
343 int syscall_nr)
344{
345 if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
346 return false;
347 syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
348
349 return test_bit(syscall_nr, bitmap);
350}
351
352
353
354
355
356
357
358
359static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
360 const struct seccomp_data *sd)
361{
362 int syscall_nr = sd->nr;
363 const struct action_cache *cache = &sfilter->cache;
364
365#ifndef SECCOMP_ARCH_COMPAT
366
367 return seccomp_cache_check_allow_bitmap(cache->allow_native,
368 SECCOMP_ARCH_NATIVE_NR,
369 syscall_nr);
370#else
371 if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
372 return seccomp_cache_check_allow_bitmap(cache->allow_native,
373 SECCOMP_ARCH_NATIVE_NR,
374 syscall_nr);
375 if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
376 return seccomp_cache_check_allow_bitmap(cache->allow_compat,
377 SECCOMP_ARCH_COMPAT_NR,
378 syscall_nr);
379#endif
380
381 WARN_ON_ONCE(true);
382 return false;
383}
384#endif
385
386
387
388
389
390
391
392
393
394
395#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
396static u32 seccomp_run_filters(const struct seccomp_data *sd,
397 struct seccomp_filter **match)
398{
399 u32 ret = SECCOMP_RET_ALLOW;
400
401 struct seccomp_filter *f =
402 READ_ONCE(current->seccomp.filter);
403
404
405 if (WARN_ON(f == NULL))
406 return SECCOMP_RET_KILL_PROCESS;
407
408 if (seccomp_cache_check_allow(f, sd))
409 return SECCOMP_RET_ALLOW;
410
411
412
413
414
415 for (; f; f = f->prev) {
416 u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
417
418 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
419 ret = cur_ret;
420 *match = f;
421 }
422 }
423 return ret;
424}
425#endif
426
427static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
428{
429 assert_spin_locked(¤t->sighand->siglock);
430
431 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
432 return false;
433
434 return true;
435}
436
437void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
438
439static inline void seccomp_assign_mode(struct task_struct *task,
440 unsigned long seccomp_mode,
441 unsigned long flags)
442{
443 assert_spin_locked(&task->sighand->siglock);
444
445 task->seccomp.mode = seccomp_mode;
446
447
448
449
450 smp_mb__before_atomic();
451
452 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
453 arch_seccomp_spec_mitigate(task);
454 set_task_syscall_work(task, SECCOMP);
455}
456
457#ifdef CONFIG_SECCOMP_FILTER
458
459static int is_ancestor(struct seccomp_filter *parent,
460 struct seccomp_filter *child)
461{
462
463 if (parent == NULL)
464 return 1;
465 for (; child; child = child->prev)
466 if (child == parent)
467 return 1;
468 return 0;
469}
470
471
472
473
474
475
476
477
478
479
480static inline pid_t seccomp_can_sync_threads(void)
481{
482 struct task_struct *thread, *caller;
483
484 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
485 assert_spin_locked(¤t->sighand->siglock);
486
487
488 caller = current;
489 for_each_thread(caller, thread) {
490 pid_t failed;
491
492
493 if (thread == caller)
494 continue;
495
496 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
497 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
498 is_ancestor(thread->seccomp.filter,
499 caller->seccomp.filter)))
500 continue;
501
502
503 failed = task_pid_vnr(thread);
504
505 if (WARN_ON(failed == 0))
506 failed = -ESRCH;
507 return failed;
508 }
509
510 return 0;
511}
512
513static inline void seccomp_filter_free(struct seccomp_filter *filter)
514{
515 if (filter) {
516 bpf_prog_destroy(filter->prog);
517 kfree(filter);
518 }
519}
520
521static void __seccomp_filter_orphan(struct seccomp_filter *orig)
522{
523 while (orig && refcount_dec_and_test(&orig->users)) {
524 if (waitqueue_active(&orig->wqh))
525 wake_up_poll(&orig->wqh, EPOLLHUP);
526 orig = orig->prev;
527 }
528}
529
530static void __put_seccomp_filter(struct seccomp_filter *orig)
531{
532
533 while (orig && refcount_dec_and_test(&orig->refs)) {
534 struct seccomp_filter *freeme = orig;
535 orig = orig->prev;
536 seccomp_filter_free(freeme);
537 }
538}
539
540static void __seccomp_filter_release(struct seccomp_filter *orig)
541{
542
543 __seccomp_filter_orphan(orig);
544
545 __put_seccomp_filter(orig);
546}
547
548
549
550
551
552
553
554
555
556
557void seccomp_filter_release(struct task_struct *tsk)
558{
559 struct seccomp_filter *orig = tsk->seccomp.filter;
560
561
562 WARN_ON(tsk->sighand != NULL);
563
564
565 tsk->seccomp.filter = NULL;
566 __seccomp_filter_release(orig);
567}
568
569
570
571
572
573
574
575
576
577static inline void seccomp_sync_threads(unsigned long flags)
578{
579 struct task_struct *thread, *caller;
580
581 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
582 assert_spin_locked(¤t->sighand->siglock);
583
584
585 caller = current;
586 for_each_thread(caller, thread) {
587
588 if (thread == caller)
589 continue;
590
591
592 get_seccomp_filter(caller);
593
594
595
596
597
598
599 __seccomp_filter_release(thread->seccomp.filter);
600
601
602 smp_store_release(&thread->seccomp.filter,
603 caller->seccomp.filter);
604 atomic_set(&thread->seccomp.filter_count,
605 atomic_read(&caller->seccomp.filter_count));
606
607
608
609
610
611
612
613 if (task_no_new_privs(caller))
614 task_set_no_new_privs(thread);
615
616
617
618
619
620
621
622 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
623 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
624 flags);
625 }
626}
627
628
629
630
631
632
633
634static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
635{
636 struct seccomp_filter *sfilter;
637 int ret;
638 const bool save_orig =
639#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
640 true;
641#else
642 false;
643#endif
644
645 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
646 return ERR_PTR(-EINVAL);
647
648 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
649
650
651
652
653
654
655
656 if (!task_no_new_privs(current) &&
657 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
658 return ERR_PTR(-EACCES);
659
660
661 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
662 if (!sfilter)
663 return ERR_PTR(-ENOMEM);
664
665 mutex_init(&sfilter->notify_lock);
666 ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
667 seccomp_check_filter, save_orig);
668 if (ret < 0) {
669 kfree(sfilter);
670 return ERR_PTR(ret);
671 }
672
673 refcount_set(&sfilter->refs, 1);
674 refcount_set(&sfilter->users, 1);
675 init_waitqueue_head(&sfilter->wqh);
676
677 return sfilter;
678}
679
680
681
682
683
684
685
686static struct seccomp_filter *
687seccomp_prepare_user_filter(const char __user *user_filter)
688{
689 struct sock_fprog fprog;
690 struct seccomp_filter *filter = ERR_PTR(-EFAULT);
691
692#ifdef CONFIG_COMPAT
693 if (in_compat_syscall()) {
694 struct compat_sock_fprog fprog32;
695 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
696 goto out;
697 fprog.len = fprog32.len;
698 fprog.filter = compat_ptr(fprog32.filter);
699 } else
700#endif
701 if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
702 goto out;
703 filter = seccomp_prepare_filter(&fprog);
704out:
705 return filter;
706}
707
708#ifdef SECCOMP_ARCH_NATIVE
709
710
711
712
713
714
715static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
716 struct seccomp_data *sd)
717{
718 unsigned int reg_value = 0;
719 unsigned int pc;
720 bool op_res;
721
722 if (WARN_ON_ONCE(!fprog))
723 return false;
724
725 for (pc = 0; pc < fprog->len; pc++) {
726 struct sock_filter *insn = &fprog->filter[pc];
727 u16 code = insn->code;
728 u32 k = insn->k;
729
730 switch (code) {
731 case BPF_LD | BPF_W | BPF_ABS:
732 switch (k) {
733 case offsetof(struct seccomp_data, nr):
734 reg_value = sd->nr;
735 break;
736 case offsetof(struct seccomp_data, arch):
737 reg_value = sd->arch;
738 break;
739 default:
740
741 return false;
742 }
743 break;
744 case BPF_RET | BPF_K:
745
746 return k == SECCOMP_RET_ALLOW;
747 case BPF_JMP | BPF_JA:
748 pc += insn->k;
749 break;
750 case BPF_JMP | BPF_JEQ | BPF_K:
751 case BPF_JMP | BPF_JGE | BPF_K:
752 case BPF_JMP | BPF_JGT | BPF_K:
753 case BPF_JMP | BPF_JSET | BPF_K:
754 switch (BPF_OP(code)) {
755 case BPF_JEQ:
756 op_res = reg_value == k;
757 break;
758 case BPF_JGE:
759 op_res = reg_value >= k;
760 break;
761 case BPF_JGT:
762 op_res = reg_value > k;
763 break;
764 case BPF_JSET:
765 op_res = !!(reg_value & k);
766 break;
767 default:
768
769 return false;
770 }
771
772 pc += op_res ? insn->jt : insn->jf;
773 break;
774 case BPF_ALU | BPF_AND | BPF_K:
775 reg_value &= k;
776 break;
777 default:
778
779 return false;
780 }
781 }
782
783
784 WARN_ON(1);
785 return false;
786}
787
788static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
789 void *bitmap, const void *bitmap_prev,
790 size_t bitmap_size, int arch)
791{
792 struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
793 struct seccomp_data sd;
794 int nr;
795
796 if (bitmap_prev) {
797
798 bitmap_copy(bitmap, bitmap_prev, bitmap_size);
799 } else {
800
801 bitmap_fill(bitmap, bitmap_size);
802 }
803
804 for (nr = 0; nr < bitmap_size; nr++) {
805
806 if (!test_bit(nr, bitmap))
807 continue;
808
809 sd.nr = nr;
810 sd.arch = arch;
811
812
813 if (seccomp_is_const_allow(fprog, &sd))
814 continue;
815
816
817
818
819
820 __clear_bit(nr, bitmap);
821 }
822}
823
824
825
826
827
828
829
830static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
831{
832 struct action_cache *cache = &sfilter->cache;
833 const struct action_cache *cache_prev =
834 sfilter->prev ? &sfilter->prev->cache : NULL;
835
836 seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
837 cache_prev ? cache_prev->allow_native : NULL,
838 SECCOMP_ARCH_NATIVE_NR,
839 SECCOMP_ARCH_NATIVE);
840
841#ifdef SECCOMP_ARCH_COMPAT
842 seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
843 cache_prev ? cache_prev->allow_compat : NULL,
844 SECCOMP_ARCH_COMPAT_NR,
845 SECCOMP_ARCH_COMPAT);
846#endif
847}
848#endif
849
850
851
852
853
854
855
856
857
858
859
860
861
862static long seccomp_attach_filter(unsigned int flags,
863 struct seccomp_filter *filter)
864{
865 unsigned long total_insns;
866 struct seccomp_filter *walker;
867
868 assert_spin_locked(¤t->sighand->siglock);
869
870
871 total_insns = filter->prog->len;
872 for (walker = current->seccomp.filter; walker; walker = walker->prev)
873 total_insns += walker->prog->len + 4;
874 if (total_insns > MAX_INSNS_PER_PATH)
875 return -ENOMEM;
876
877
878 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
879 int ret;
880
881 ret = seccomp_can_sync_threads();
882 if (ret) {
883 if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
884 return -ESRCH;
885 else
886 return ret;
887 }
888 }
889
890
891 if (flags & SECCOMP_FILTER_FLAG_LOG)
892 filter->log = true;
893
894
895
896
897
898 filter->prev = current->seccomp.filter;
899 seccomp_cache_prepare(filter);
900 current->seccomp.filter = filter;
901 atomic_inc(¤t->seccomp.filter_count);
902
903
904 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
905 seccomp_sync_threads(flags);
906
907 return 0;
908}
909
910static void __get_seccomp_filter(struct seccomp_filter *filter)
911{
912 refcount_inc(&filter->refs);
913}
914
915
916void get_seccomp_filter(struct task_struct *tsk)
917{
918 struct seccomp_filter *orig = tsk->seccomp.filter;
919 if (!orig)
920 return;
921 __get_seccomp_filter(orig);
922 refcount_inc(&orig->users);
923}
924
925static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
926{
927 clear_siginfo(info);
928 info->si_signo = SIGSYS;
929 info->si_code = SYS_SECCOMP;
930 info->si_call_addr = (void __user *)KSTK_EIP(current);
931 info->si_errno = reason;
932 info->si_arch = syscall_get_arch(current);
933 info->si_syscall = syscall;
934}
935
936
937
938
939
940
941
942
943static void seccomp_send_sigsys(int syscall, int reason)
944{
945 struct kernel_siginfo info;
946 seccomp_init_siginfo(&info, syscall, reason);
947 force_sig_info(&info);
948}
949#endif
950
951
952#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
953#define SECCOMP_LOG_KILL_THREAD (1 << 1)
954#define SECCOMP_LOG_TRAP (1 << 2)
955#define SECCOMP_LOG_ERRNO (1 << 3)
956#define SECCOMP_LOG_TRACE (1 << 4)
957#define SECCOMP_LOG_LOG (1 << 5)
958#define SECCOMP_LOG_ALLOW (1 << 6)
959#define SECCOMP_LOG_USER_NOTIF (1 << 7)
960
961static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
962 SECCOMP_LOG_KILL_THREAD |
963 SECCOMP_LOG_TRAP |
964 SECCOMP_LOG_ERRNO |
965 SECCOMP_LOG_USER_NOTIF |
966 SECCOMP_LOG_TRACE |
967 SECCOMP_LOG_LOG;
968
969static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
970 bool requested)
971{
972 bool log = false;
973
974 switch (action) {
975 case SECCOMP_RET_ALLOW:
976 break;
977 case SECCOMP_RET_TRAP:
978 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
979 break;
980 case SECCOMP_RET_ERRNO:
981 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
982 break;
983 case SECCOMP_RET_TRACE:
984 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
985 break;
986 case SECCOMP_RET_USER_NOTIF:
987 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
988 break;
989 case SECCOMP_RET_LOG:
990 log = seccomp_actions_logged & SECCOMP_LOG_LOG;
991 break;
992 case SECCOMP_RET_KILL_THREAD:
993 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
994 break;
995 case SECCOMP_RET_KILL_PROCESS:
996 default:
997 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
998 }
999
1000
1001
1002
1003
1004
1005
1006 if (!log)
1007 return;
1008
1009 audit_seccomp(syscall, signr, action);
1010}
1011
1012
1013
1014
1015
1016
1017static const int mode1_syscalls[] = {
1018 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
1019 -1,
1020};
1021
1022static void __secure_computing_strict(int this_syscall)
1023{
1024 const int *allowed_syscalls = mode1_syscalls;
1025#ifdef CONFIG_COMPAT
1026 if (in_compat_syscall())
1027 allowed_syscalls = get_compat_mode1_syscalls();
1028#endif
1029 do {
1030 if (*allowed_syscalls == this_syscall)
1031 return;
1032 } while (*++allowed_syscalls != -1);
1033
1034#ifdef SECCOMP_DEBUG
1035 dump_stack();
1036#endif
1037 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1038 do_exit(SIGKILL);
1039}
1040
1041#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1042void secure_computing_strict(int this_syscall)
1043{
1044 int mode = current->seccomp.mode;
1045
1046 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1047 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1048 return;
1049
1050 if (mode == SECCOMP_MODE_DISABLED)
1051 return;
1052 else if (mode == SECCOMP_MODE_STRICT)
1053 __secure_computing_strict(this_syscall);
1054 else
1055 BUG();
1056}
1057#else
1058
1059#ifdef CONFIG_SECCOMP_FILTER
1060static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1061{
1062
1063
1064
1065
1066 lockdep_assert_held(&filter->notify_lock);
1067 return filter->notif->next_id++;
1068}
1069
1070static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
1071{
1072 int fd;
1073
1074
1075
1076
1077
1078 list_del_init(&addfd->list);
1079 if (!addfd->setfd)
1080 fd = receive_fd(addfd->file, addfd->flags);
1081 else
1082 fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1083 addfd->ret = fd;
1084
1085 if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1086
1087 if (fd < 0) {
1088 n->state = SECCOMP_NOTIFY_SENT;
1089 } else {
1090
1091 n->flags = 0;
1092 n->error = 0;
1093 n->val = fd;
1094 }
1095 }
1096
1097
1098
1099
1100
1101 complete(&addfd->completion);
1102}
1103
1104static int seccomp_do_user_notification(int this_syscall,
1105 struct seccomp_filter *match,
1106 const struct seccomp_data *sd)
1107{
1108 int err;
1109 u32 flags = 0;
1110 long ret = 0;
1111 struct seccomp_knotif n = {};
1112 struct seccomp_kaddfd *addfd, *tmp;
1113
1114 mutex_lock(&match->notify_lock);
1115 err = -ENOSYS;
1116 if (!match->notif)
1117 goto out;
1118
1119 n.task = current;
1120 n.state = SECCOMP_NOTIFY_INIT;
1121 n.data = sd;
1122 n.id = seccomp_next_notify_id(match);
1123 init_completion(&n.ready);
1124 list_add(&n.list, &match->notif->notifications);
1125 INIT_LIST_HEAD(&n.addfd);
1126
1127 up(&match->notif->request);
1128 wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1129
1130
1131
1132
1133 do {
1134 mutex_unlock(&match->notify_lock);
1135 err = wait_for_completion_interruptible(&n.ready);
1136 mutex_lock(&match->notify_lock);
1137 if (err != 0)
1138 goto interrupted;
1139
1140 addfd = list_first_entry_or_null(&n.addfd,
1141 struct seccomp_kaddfd, list);
1142
1143 if (addfd)
1144 seccomp_handle_addfd(addfd, &n);
1145
1146 } while (n.state != SECCOMP_NOTIFY_REPLIED);
1147
1148 ret = n.val;
1149 err = n.error;
1150 flags = n.flags;
1151
1152interrupted:
1153
1154 list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1155
1156 addfd->ret = -ESRCH;
1157 list_del_init(&addfd->list);
1158 complete(&addfd->completion);
1159 }
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171 if (match->notif)
1172 list_del(&n.list);
1173out:
1174 mutex_unlock(&match->notify_lock);
1175
1176
1177 if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1178 return 0;
1179
1180 syscall_set_return_value(current, current_pt_regs(),
1181 err, ret);
1182 return -1;
1183}
1184
1185static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1186 const bool recheck_after_trace)
1187{
1188 u32 filter_ret, action;
1189 struct seccomp_filter *match = NULL;
1190 int data;
1191 struct seccomp_data sd_local;
1192
1193
1194
1195
1196
1197 smp_rmb();
1198
1199 if (!sd) {
1200 populate_seccomp_data(&sd_local);
1201 sd = &sd_local;
1202 }
1203
1204 filter_ret = seccomp_run_filters(sd, &match);
1205 data = filter_ret & SECCOMP_RET_DATA;
1206 action = filter_ret & SECCOMP_RET_ACTION_FULL;
1207
1208 switch (action) {
1209 case SECCOMP_RET_ERRNO:
1210
1211 if (data > MAX_ERRNO)
1212 data = MAX_ERRNO;
1213 syscall_set_return_value(current, current_pt_regs(),
1214 -data, 0);
1215 goto skip;
1216
1217 case SECCOMP_RET_TRAP:
1218
1219 syscall_rollback(current, current_pt_regs());
1220
1221 seccomp_send_sigsys(this_syscall, data);
1222 goto skip;
1223
1224 case SECCOMP_RET_TRACE:
1225
1226 if (recheck_after_trace)
1227 return 0;
1228
1229
1230 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1231 syscall_set_return_value(current,
1232 current_pt_regs(),
1233 -ENOSYS, 0);
1234 goto skip;
1235 }
1236
1237
1238 ptrace_event(PTRACE_EVENT_SECCOMP, data);
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249 if (fatal_signal_pending(current))
1250 goto skip;
1251
1252 this_syscall = syscall_get_nr(current, current_pt_regs());
1253 if (this_syscall < 0)
1254 goto skip;
1255
1256
1257
1258
1259
1260
1261
1262 if (__seccomp_filter(this_syscall, NULL, true))
1263 return -1;
1264
1265 return 0;
1266
1267 case SECCOMP_RET_USER_NOTIF:
1268 if (seccomp_do_user_notification(this_syscall, match, sd))
1269 goto skip;
1270
1271 return 0;
1272
1273 case SECCOMP_RET_LOG:
1274 seccomp_log(this_syscall, 0, action, true);
1275 return 0;
1276
1277 case SECCOMP_RET_ALLOW:
1278
1279
1280
1281
1282
1283 return 0;
1284
1285 case SECCOMP_RET_KILL_THREAD:
1286 case SECCOMP_RET_KILL_PROCESS:
1287 default:
1288 seccomp_log(this_syscall, SIGSYS, action, true);
1289
1290 if (action != SECCOMP_RET_KILL_THREAD ||
1291 get_nr_threads(current) == 1) {
1292 kernel_siginfo_t info;
1293
1294
1295 syscall_rollback(current, current_pt_regs());
1296
1297 seccomp_init_siginfo(&info, this_syscall, data);
1298 do_coredump(&info);
1299 }
1300 if (action == SECCOMP_RET_KILL_THREAD)
1301 do_exit(SIGSYS);
1302 else
1303 do_group_exit(SIGSYS);
1304 }
1305
1306 unreachable();
1307
1308skip:
1309 seccomp_log(this_syscall, 0, action, match ? match->log : false);
1310 return -1;
1311}
1312#else
1313static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1314 const bool recheck_after_trace)
1315{
1316 BUG();
1317
1318 return -1;
1319}
1320#endif
1321
1322int __secure_computing(const struct seccomp_data *sd)
1323{
1324 int mode = current->seccomp.mode;
1325 int this_syscall;
1326
1327 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1328 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1329 return 0;
1330
1331 this_syscall = sd ? sd->nr :
1332 syscall_get_nr(current, current_pt_regs());
1333
1334 switch (mode) {
1335 case SECCOMP_MODE_STRICT:
1336 __secure_computing_strict(this_syscall);
1337 return 0;
1338 case SECCOMP_MODE_FILTER:
1339 return __seccomp_filter(this_syscall, sd, false);
1340 default:
1341 BUG();
1342 }
1343}
1344#endif
1345
1346long prctl_get_seccomp(void)
1347{
1348 return current->seccomp.mode;
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358static long seccomp_set_mode_strict(void)
1359{
1360 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1361 long ret = -EINVAL;
1362
1363 spin_lock_irq(¤t->sighand->siglock);
1364
1365 if (!seccomp_may_assign_mode(seccomp_mode))
1366 goto out;
1367
1368#ifdef TIF_NOTSC
1369 disable_TSC();
1370#endif
1371 seccomp_assign_mode(current, seccomp_mode, 0);
1372 ret = 0;
1373
1374out:
1375 spin_unlock_irq(¤t->sighand->siglock);
1376
1377 return ret;
1378}
1379
1380#ifdef CONFIG_SECCOMP_FILTER
1381static void seccomp_notify_free(struct seccomp_filter *filter)
1382{
1383 kfree(filter->notif);
1384 filter->notif = NULL;
1385}
1386
1387static void seccomp_notify_detach(struct seccomp_filter *filter)
1388{
1389 struct seccomp_knotif *knotif;
1390
1391 if (!filter)
1392 return;
1393
1394 mutex_lock(&filter->notify_lock);
1395
1396
1397
1398
1399
1400 list_for_each_entry(knotif, &filter->notif->notifications, list) {
1401 if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1402 continue;
1403
1404 knotif->state = SECCOMP_NOTIFY_REPLIED;
1405 knotif->error = -ENOSYS;
1406 knotif->val = 0;
1407
1408
1409
1410
1411
1412
1413 complete(&knotif->ready);
1414 }
1415
1416 seccomp_notify_free(filter);
1417 mutex_unlock(&filter->notify_lock);
1418}
1419
1420static int seccomp_notify_release(struct inode *inode, struct file *file)
1421{
1422 struct seccomp_filter *filter = file->private_data;
1423
1424 seccomp_notify_detach(filter);
1425 __put_seccomp_filter(filter);
1426 return 0;
1427}
1428
1429
1430static inline struct seccomp_knotif *
1431find_notification(struct seccomp_filter *filter, u64 id)
1432{
1433 struct seccomp_knotif *cur;
1434
1435 lockdep_assert_held(&filter->notify_lock);
1436
1437 list_for_each_entry(cur, &filter->notif->notifications, list) {
1438 if (cur->id == id)
1439 return cur;
1440 }
1441
1442 return NULL;
1443}
1444
1445
1446static long seccomp_notify_recv(struct seccomp_filter *filter,
1447 void __user *buf)
1448{
1449 struct seccomp_knotif *knotif = NULL, *cur;
1450 struct seccomp_notif unotif;
1451 ssize_t ret;
1452
1453
1454 ret = check_zeroed_user(buf, sizeof(unotif));
1455 if (ret < 0)
1456 return ret;
1457 if (!ret)
1458 return -EINVAL;
1459
1460 memset(&unotif, 0, sizeof(unotif));
1461
1462 ret = down_interruptible(&filter->notif->request);
1463 if (ret < 0)
1464 return ret;
1465
1466 mutex_lock(&filter->notify_lock);
1467 list_for_each_entry(cur, &filter->notif->notifications, list) {
1468 if (cur->state == SECCOMP_NOTIFY_INIT) {
1469 knotif = cur;
1470 break;
1471 }
1472 }
1473
1474
1475
1476
1477
1478
1479 if (!knotif) {
1480 ret = -ENOENT;
1481 goto out;
1482 }
1483
1484 unotif.id = knotif->id;
1485 unotif.pid = task_pid_vnr(knotif->task);
1486 unotif.data = *(knotif->data);
1487
1488 knotif->state = SECCOMP_NOTIFY_SENT;
1489 wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1490 ret = 0;
1491out:
1492 mutex_unlock(&filter->notify_lock);
1493
1494 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1495 ret = -EFAULT;
1496
1497
1498
1499
1500
1501
1502
1503 mutex_lock(&filter->notify_lock);
1504 knotif = find_notification(filter, unotif.id);
1505 if (knotif) {
1506 knotif->state = SECCOMP_NOTIFY_INIT;
1507 up(&filter->notif->request);
1508 }
1509 mutex_unlock(&filter->notify_lock);
1510 }
1511
1512 return ret;
1513}
1514
1515static long seccomp_notify_send(struct seccomp_filter *filter,
1516 void __user *buf)
1517{
1518 struct seccomp_notif_resp resp = {};
1519 struct seccomp_knotif *knotif;
1520 long ret;
1521
1522 if (copy_from_user(&resp, buf, sizeof(resp)))
1523 return -EFAULT;
1524
1525 if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1526 return -EINVAL;
1527
1528 if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1529 (resp.error || resp.val))
1530 return -EINVAL;
1531
1532 ret = mutex_lock_interruptible(&filter->notify_lock);
1533 if (ret < 0)
1534 return ret;
1535
1536 knotif = find_notification(filter, resp.id);
1537 if (!knotif) {
1538 ret = -ENOENT;
1539 goto out;
1540 }
1541
1542
1543 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1544 ret = -EINPROGRESS;
1545 goto out;
1546 }
1547
1548 ret = 0;
1549 knotif->state = SECCOMP_NOTIFY_REPLIED;
1550 knotif->error = resp.error;
1551 knotif->val = resp.val;
1552 knotif->flags = resp.flags;
1553 complete(&knotif->ready);
1554out:
1555 mutex_unlock(&filter->notify_lock);
1556 return ret;
1557}
1558
1559static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1560 void __user *buf)
1561{
1562 struct seccomp_knotif *knotif;
1563 u64 id;
1564 long ret;
1565
1566 if (copy_from_user(&id, buf, sizeof(id)))
1567 return -EFAULT;
1568
1569 ret = mutex_lock_interruptible(&filter->notify_lock);
1570 if (ret < 0)
1571 return ret;
1572
1573 knotif = find_notification(filter, id);
1574 if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1575 ret = 0;
1576 else
1577 ret = -ENOENT;
1578
1579 mutex_unlock(&filter->notify_lock);
1580 return ret;
1581}
1582
1583static long seccomp_notify_addfd(struct seccomp_filter *filter,
1584 struct seccomp_notif_addfd __user *uaddfd,
1585 unsigned int size)
1586{
1587 struct seccomp_notif_addfd addfd;
1588 struct seccomp_knotif *knotif;
1589 struct seccomp_kaddfd kaddfd;
1590 int ret;
1591
1592 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1593 BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1594
1595 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1596 return -EINVAL;
1597
1598 ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1599 if (ret)
1600 return ret;
1601
1602 if (addfd.newfd_flags & ~O_CLOEXEC)
1603 return -EINVAL;
1604
1605 if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
1606 return -EINVAL;
1607
1608 if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1609 return -EINVAL;
1610
1611 kaddfd.file = fget(addfd.srcfd);
1612 if (!kaddfd.file)
1613 return -EBADF;
1614
1615 kaddfd.ioctl_flags = addfd.flags;
1616 kaddfd.flags = addfd.newfd_flags;
1617 kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1618 kaddfd.fd = addfd.newfd;
1619 init_completion(&kaddfd.completion);
1620
1621 ret = mutex_lock_interruptible(&filter->notify_lock);
1622 if (ret < 0)
1623 goto out;
1624
1625 knotif = find_notification(filter, addfd.id);
1626 if (!knotif) {
1627 ret = -ENOENT;
1628 goto out_unlock;
1629 }
1630
1631
1632
1633
1634
1635
1636 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1637 ret = -EINPROGRESS;
1638 goto out_unlock;
1639 }
1640
1641 if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1642
1643
1644
1645
1646
1647
1648
1649 if (!list_empty(&knotif->addfd)) {
1650 ret = -EBUSY;
1651 goto out_unlock;
1652 }
1653
1654
1655 knotif->state = SECCOMP_NOTIFY_REPLIED;
1656 }
1657
1658 list_add(&kaddfd.list, &knotif->addfd);
1659 complete(&knotif->ready);
1660 mutex_unlock(&filter->notify_lock);
1661
1662
1663 ret = wait_for_completion_interruptible(&kaddfd.completion);
1664 if (ret == 0) {
1665
1666
1667
1668
1669
1670
1671
1672 ret = kaddfd.ret;
1673 goto out;
1674 }
1675
1676 mutex_lock(&filter->notify_lock);
1677
1678
1679
1680
1681
1682
1683
1684 if (list_empty(&kaddfd.list))
1685 ret = kaddfd.ret;
1686 else
1687 list_del(&kaddfd.list);
1688
1689out_unlock:
1690 mutex_unlock(&filter->notify_lock);
1691out:
1692 fput(kaddfd.file);
1693
1694 return ret;
1695}
1696
1697static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1698 unsigned long arg)
1699{
1700 struct seccomp_filter *filter = file->private_data;
1701 void __user *buf = (void __user *)arg;
1702
1703
1704 switch (cmd) {
1705 case SECCOMP_IOCTL_NOTIF_RECV:
1706 return seccomp_notify_recv(filter, buf);
1707 case SECCOMP_IOCTL_NOTIF_SEND:
1708 return seccomp_notify_send(filter, buf);
1709 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1710 case SECCOMP_IOCTL_NOTIF_ID_VALID:
1711 return seccomp_notify_id_valid(filter, buf);
1712 }
1713
1714
1715#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1716 switch (EA_IOCTL(cmd)) {
1717 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1718 return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1719 default:
1720 return -EINVAL;
1721 }
1722}
1723
1724static __poll_t seccomp_notify_poll(struct file *file,
1725 struct poll_table_struct *poll_tab)
1726{
1727 struct seccomp_filter *filter = file->private_data;
1728 __poll_t ret = 0;
1729 struct seccomp_knotif *cur;
1730
1731 poll_wait(file, &filter->wqh, poll_tab);
1732
1733 if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1734 return EPOLLERR;
1735
1736 list_for_each_entry(cur, &filter->notif->notifications, list) {
1737 if (cur->state == SECCOMP_NOTIFY_INIT)
1738 ret |= EPOLLIN | EPOLLRDNORM;
1739 if (cur->state == SECCOMP_NOTIFY_SENT)
1740 ret |= EPOLLOUT | EPOLLWRNORM;
1741 if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1742 break;
1743 }
1744
1745 mutex_unlock(&filter->notify_lock);
1746
1747 if (refcount_read(&filter->users) == 0)
1748 ret |= EPOLLHUP;
1749
1750 return ret;
1751}
1752
1753static const struct file_operations seccomp_notify_ops = {
1754 .poll = seccomp_notify_poll,
1755 .release = seccomp_notify_release,
1756 .unlocked_ioctl = seccomp_notify_ioctl,
1757 .compat_ioctl = seccomp_notify_ioctl,
1758};
1759
1760static struct file *init_listener(struct seccomp_filter *filter)
1761{
1762 struct file *ret;
1763
1764 ret = ERR_PTR(-ENOMEM);
1765 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1766 if (!filter->notif)
1767 goto out;
1768
1769 sema_init(&filter->notif->request, 0);
1770 filter->notif->next_id = get_random_u64();
1771 INIT_LIST_HEAD(&filter->notif->notifications);
1772
1773 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1774 filter, O_RDWR);
1775 if (IS_ERR(ret))
1776 goto out_notif;
1777
1778
1779 __get_seccomp_filter(filter);
1780
1781out_notif:
1782 if (IS_ERR(ret))
1783 seccomp_notify_free(filter);
1784out:
1785 return ret;
1786}
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796static bool has_duplicate_listener(struct seccomp_filter *new_child)
1797{
1798 struct seccomp_filter *cur;
1799
1800
1801 lockdep_assert_held(¤t->sighand->siglock);
1802
1803 if (!new_child->notif)
1804 return false;
1805 for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1806 if (cur->notif)
1807 return true;
1808 }
1809
1810 return false;
1811}
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826static long seccomp_set_mode_filter(unsigned int flags,
1827 const char __user *filter)
1828{
1829 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1830 struct seccomp_filter *prepared = NULL;
1831 long ret = -EINVAL;
1832 int listener = -1;
1833 struct file *listener_f = NULL;
1834
1835
1836 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1837 return -EINVAL;
1838
1839
1840
1841
1842
1843
1844
1845
1846 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1847 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1848 ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1849 return -EINVAL;
1850
1851
1852 prepared = seccomp_prepare_user_filter(filter);
1853 if (IS_ERR(prepared))
1854 return PTR_ERR(prepared);
1855
1856 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1857 listener = get_unused_fd_flags(O_CLOEXEC);
1858 if (listener < 0) {
1859 ret = listener;
1860 goto out_free;
1861 }
1862
1863 listener_f = init_listener(prepared);
1864 if (IS_ERR(listener_f)) {
1865 put_unused_fd(listener);
1866 ret = PTR_ERR(listener_f);
1867 goto out_free;
1868 }
1869 }
1870
1871
1872
1873
1874
1875 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1876 mutex_lock_killable(¤t->signal->cred_guard_mutex))
1877 goto out_put_fd;
1878
1879 spin_lock_irq(¤t->sighand->siglock);
1880
1881 if (!seccomp_may_assign_mode(seccomp_mode))
1882 goto out;
1883
1884 if (has_duplicate_listener(prepared)) {
1885 ret = -EBUSY;
1886 goto out;
1887 }
1888
1889 ret = seccomp_attach_filter(flags, prepared);
1890 if (ret)
1891 goto out;
1892
1893 prepared = NULL;
1894
1895 seccomp_assign_mode(current, seccomp_mode, flags);
1896out:
1897 spin_unlock_irq(¤t->sighand->siglock);
1898 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1899 mutex_unlock(¤t->signal->cred_guard_mutex);
1900out_put_fd:
1901 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1902 if (ret) {
1903 listener_f->private_data = NULL;
1904 fput(listener_f);
1905 put_unused_fd(listener);
1906 seccomp_notify_detach(prepared);
1907 } else {
1908 fd_install(listener, listener_f);
1909 ret = listener;
1910 }
1911 }
1912out_free:
1913 seccomp_filter_free(prepared);
1914 return ret;
1915}
1916#else
1917static inline long seccomp_set_mode_filter(unsigned int flags,
1918 const char __user *filter)
1919{
1920 return -EINVAL;
1921}
1922#endif
1923
1924static long seccomp_get_action_avail(const char __user *uaction)
1925{
1926 u32 action;
1927
1928 if (copy_from_user(&action, uaction, sizeof(action)))
1929 return -EFAULT;
1930
1931 switch (action) {
1932 case SECCOMP_RET_KILL_PROCESS:
1933 case SECCOMP_RET_KILL_THREAD:
1934 case SECCOMP_RET_TRAP:
1935 case SECCOMP_RET_ERRNO:
1936 case SECCOMP_RET_USER_NOTIF:
1937 case SECCOMP_RET_TRACE:
1938 case SECCOMP_RET_LOG:
1939 case SECCOMP_RET_ALLOW:
1940 break;
1941 default:
1942 return -EOPNOTSUPP;
1943 }
1944
1945 return 0;
1946}
1947
1948static long seccomp_get_notif_sizes(void __user *usizes)
1949{
1950 struct seccomp_notif_sizes sizes = {
1951 .seccomp_notif = sizeof(struct seccomp_notif),
1952 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1953 .seccomp_data = sizeof(struct seccomp_data),
1954 };
1955
1956 if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1957 return -EFAULT;
1958
1959 return 0;
1960}
1961
1962
1963static long do_seccomp(unsigned int op, unsigned int flags,
1964 void __user *uargs)
1965{
1966 switch (op) {
1967 case SECCOMP_SET_MODE_STRICT:
1968 if (flags != 0 || uargs != NULL)
1969 return -EINVAL;
1970 return seccomp_set_mode_strict();
1971 case SECCOMP_SET_MODE_FILTER:
1972 return seccomp_set_mode_filter(flags, uargs);
1973 case SECCOMP_GET_ACTION_AVAIL:
1974 if (flags != 0)
1975 return -EINVAL;
1976
1977 return seccomp_get_action_avail(uargs);
1978 case SECCOMP_GET_NOTIF_SIZES:
1979 if (flags != 0)
1980 return -EINVAL;
1981
1982 return seccomp_get_notif_sizes(uargs);
1983 default:
1984 return -EINVAL;
1985 }
1986}
1987
1988SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
1989 void __user *, uargs)
1990{
1991 return do_seccomp(op, flags, uargs);
1992}
1993
1994
1995
1996
1997
1998
1999
2000
2001long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
2002{
2003 unsigned int op;
2004 void __user *uargs;
2005
2006 switch (seccomp_mode) {
2007 case SECCOMP_MODE_STRICT:
2008 op = SECCOMP_SET_MODE_STRICT;
2009
2010
2011
2012
2013
2014 uargs = NULL;
2015 break;
2016 case SECCOMP_MODE_FILTER:
2017 op = SECCOMP_SET_MODE_FILTER;
2018 uargs = filter;
2019 break;
2020 default:
2021 return -EINVAL;
2022 }
2023
2024
2025 return do_seccomp(op, 0, uargs);
2026}
2027
2028#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
2029static struct seccomp_filter *get_nth_filter(struct task_struct *task,
2030 unsigned long filter_off)
2031{
2032 struct seccomp_filter *orig, *filter;
2033 unsigned long count;
2034
2035
2036
2037
2038
2039 spin_lock_irq(&task->sighand->siglock);
2040
2041 if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2042 spin_unlock_irq(&task->sighand->siglock);
2043 return ERR_PTR(-EINVAL);
2044 }
2045
2046 orig = task->seccomp.filter;
2047 __get_seccomp_filter(orig);
2048 spin_unlock_irq(&task->sighand->siglock);
2049
2050 count = 0;
2051 for (filter = orig; filter; filter = filter->prev)
2052 count++;
2053
2054 if (filter_off >= count) {
2055 filter = ERR_PTR(-ENOENT);
2056 goto out;
2057 }
2058
2059 count -= filter_off;
2060 for (filter = orig; filter && count > 1; filter = filter->prev)
2061 count--;
2062
2063 if (WARN_ON(count != 1 || !filter)) {
2064 filter = ERR_PTR(-ENOENT);
2065 goto out;
2066 }
2067
2068 __get_seccomp_filter(filter);
2069
2070out:
2071 __put_seccomp_filter(orig);
2072 return filter;
2073}
2074
2075long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2076 void __user *data)
2077{
2078 struct seccomp_filter *filter;
2079 struct sock_fprog_kern *fprog;
2080 long ret;
2081
2082 if (!capable(CAP_SYS_ADMIN) ||
2083 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2084 return -EACCES;
2085 }
2086
2087 filter = get_nth_filter(task, filter_off);
2088 if (IS_ERR(filter))
2089 return PTR_ERR(filter);
2090
2091 fprog = filter->prog->orig_prog;
2092 if (!fprog) {
2093
2094
2095
2096
2097 ret = -EMEDIUMTYPE;
2098 goto out;
2099 }
2100
2101 ret = fprog->len;
2102 if (!data)
2103 goto out;
2104
2105 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2106 ret = -EFAULT;
2107
2108out:
2109 __put_seccomp_filter(filter);
2110 return ret;
2111}
2112
2113long seccomp_get_metadata(struct task_struct *task,
2114 unsigned long size, void __user *data)
2115{
2116 long ret;
2117 struct seccomp_filter *filter;
2118 struct seccomp_metadata kmd = {};
2119
2120 if (!capable(CAP_SYS_ADMIN) ||
2121 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2122 return -EACCES;
2123 }
2124
2125 size = min_t(unsigned long, size, sizeof(kmd));
2126
2127 if (size < sizeof(kmd.filter_off))
2128 return -EINVAL;
2129
2130 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2131 return -EFAULT;
2132
2133 filter = get_nth_filter(task, kmd.filter_off);
2134 if (IS_ERR(filter))
2135 return PTR_ERR(filter);
2136
2137 if (filter->log)
2138 kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2139
2140 ret = size;
2141 if (copy_to_user(data, &kmd, size))
2142 ret = -EFAULT;
2143
2144 __put_seccomp_filter(filter);
2145 return ret;
2146}
2147#endif
2148
2149#ifdef CONFIG_SYSCTL
2150
2151
2152#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2153#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2154#define SECCOMP_RET_TRAP_NAME "trap"
2155#define SECCOMP_RET_ERRNO_NAME "errno"
2156#define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2157#define SECCOMP_RET_TRACE_NAME "trace"
2158#define SECCOMP_RET_LOG_NAME "log"
2159#define SECCOMP_RET_ALLOW_NAME "allow"
2160
2161static const char seccomp_actions_avail[] =
2162 SECCOMP_RET_KILL_PROCESS_NAME " "
2163 SECCOMP_RET_KILL_THREAD_NAME " "
2164 SECCOMP_RET_TRAP_NAME " "
2165 SECCOMP_RET_ERRNO_NAME " "
2166 SECCOMP_RET_USER_NOTIF_NAME " "
2167 SECCOMP_RET_TRACE_NAME " "
2168 SECCOMP_RET_LOG_NAME " "
2169 SECCOMP_RET_ALLOW_NAME;
2170
2171struct seccomp_log_name {
2172 u32 log;
2173 const char *name;
2174};
2175
2176static const struct seccomp_log_name seccomp_log_names[] = {
2177 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2178 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2179 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2180 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2181 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2182 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2183 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2184 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2185 { }
2186};
2187
2188static bool seccomp_names_from_actions_logged(char *names, size_t size,
2189 u32 actions_logged,
2190 const char *sep)
2191{
2192 const struct seccomp_log_name *cur;
2193 bool append_sep = false;
2194
2195 for (cur = seccomp_log_names; cur->name && size; cur++) {
2196 ssize_t ret;
2197
2198 if (!(actions_logged & cur->log))
2199 continue;
2200
2201 if (append_sep) {
2202 ret = strscpy(names, sep, size);
2203 if (ret < 0)
2204 return false;
2205
2206 names += ret;
2207 size -= ret;
2208 } else
2209 append_sep = true;
2210
2211 ret = strscpy(names, cur->name, size);
2212 if (ret < 0)
2213 return false;
2214
2215 names += ret;
2216 size -= ret;
2217 }
2218
2219 return true;
2220}
2221
2222static bool seccomp_action_logged_from_name(u32 *action_logged,
2223 const char *name)
2224{
2225 const struct seccomp_log_name *cur;
2226
2227 for (cur = seccomp_log_names; cur->name; cur++) {
2228 if (!strcmp(cur->name, name)) {
2229 *action_logged = cur->log;
2230 return true;
2231 }
2232 }
2233
2234 return false;
2235}
2236
2237static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2238{
2239 char *name;
2240
2241 *actions_logged = 0;
2242 while ((name = strsep(&names, " ")) && *name) {
2243 u32 action_logged = 0;
2244
2245 if (!seccomp_action_logged_from_name(&action_logged, name))
2246 return false;
2247
2248 *actions_logged |= action_logged;
2249 }
2250
2251 return true;
2252}
2253
2254static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2255 size_t *lenp, loff_t *ppos)
2256{
2257 char names[sizeof(seccomp_actions_avail)];
2258 struct ctl_table table;
2259
2260 memset(names, 0, sizeof(names));
2261
2262 if (!seccomp_names_from_actions_logged(names, sizeof(names),
2263 seccomp_actions_logged, " "))
2264 return -EINVAL;
2265
2266 table = *ro_table;
2267 table.data = names;
2268 table.maxlen = sizeof(names);
2269 return proc_dostring(&table, 0, buffer, lenp, ppos);
2270}
2271
2272static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2273 size_t *lenp, loff_t *ppos, u32 *actions_logged)
2274{
2275 char names[sizeof(seccomp_actions_avail)];
2276 struct ctl_table table;
2277 int ret;
2278
2279 if (!capable(CAP_SYS_ADMIN))
2280 return -EPERM;
2281
2282 memset(names, 0, sizeof(names));
2283
2284 table = *ro_table;
2285 table.data = names;
2286 table.maxlen = sizeof(names);
2287 ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2288 if (ret)
2289 return ret;
2290
2291 if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2292 return -EINVAL;
2293
2294 if (*actions_logged & SECCOMP_LOG_ALLOW)
2295 return -EINVAL;
2296
2297 seccomp_actions_logged = *actions_logged;
2298 return 0;
2299}
2300
2301static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2302 int ret)
2303{
2304 char names[sizeof(seccomp_actions_avail)];
2305 char old_names[sizeof(seccomp_actions_avail)];
2306 const char *new = names;
2307 const char *old = old_names;
2308
2309 if (!audit_enabled)
2310 return;
2311
2312 memset(names, 0, sizeof(names));
2313 memset(old_names, 0, sizeof(old_names));
2314
2315 if (ret)
2316 new = "?";
2317 else if (!actions_logged)
2318 new = "(none)";
2319 else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2320 actions_logged, ","))
2321 new = "?";
2322
2323 if (!old_actions_logged)
2324 old = "(none)";
2325 else if (!seccomp_names_from_actions_logged(old_names,
2326 sizeof(old_names),
2327 old_actions_logged, ","))
2328 old = "?";
2329
2330 return audit_seccomp_actions_logged(new, old, !ret);
2331}
2332
2333static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2334 void *buffer, size_t *lenp,
2335 loff_t *ppos)
2336{
2337 int ret;
2338
2339 if (write) {
2340 u32 actions_logged = 0;
2341 u32 old_actions_logged = seccomp_actions_logged;
2342
2343 ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2344 &actions_logged);
2345 audit_actions_logged(actions_logged, old_actions_logged, ret);
2346 } else
2347 ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2348
2349 return ret;
2350}
2351
2352static struct ctl_path seccomp_sysctl_path[] = {
2353 { .procname = "kernel", },
2354 { .procname = "seccomp", },
2355 { }
2356};
2357
2358static struct ctl_table seccomp_sysctl_table[] = {
2359 {
2360 .procname = "actions_avail",
2361 .data = (void *) &seccomp_actions_avail,
2362 .maxlen = sizeof(seccomp_actions_avail),
2363 .mode = 0444,
2364 .proc_handler = proc_dostring,
2365 },
2366 {
2367 .procname = "actions_logged",
2368 .mode = 0644,
2369 .proc_handler = seccomp_actions_logged_handler,
2370 },
2371 { }
2372};
2373
2374static int __init seccomp_sysctl_init(void)
2375{
2376 struct ctl_table_header *hdr;
2377
2378 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2379 if (!hdr)
2380 pr_warn("sysctl registration failed\n");
2381 else
2382 kmemleak_not_leak(hdr);
2383
2384 return 0;
2385}
2386
2387device_initcall(seccomp_sysctl_init)
2388
2389#endif
2390
2391#ifdef CONFIG_SECCOMP_CACHE_DEBUG
2392
2393static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2394 const void *bitmap, size_t bitmap_size)
2395{
2396 int nr;
2397
2398 for (nr = 0; nr < bitmap_size; nr++) {
2399 bool cached = test_bit(nr, bitmap);
2400 char *status = cached ? "ALLOW" : "FILTER";
2401
2402 seq_printf(m, "%s %d %s\n", name, nr, status);
2403 }
2404}
2405
2406int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2407 struct pid *pid, struct task_struct *task)
2408{
2409 struct seccomp_filter *f;
2410 unsigned long flags;
2411
2412
2413
2414
2415
2416 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2417 return -EACCES;
2418
2419 if (!lock_task_sighand(task, &flags))
2420 return -ESRCH;
2421
2422 f = READ_ONCE(task->seccomp.filter);
2423 if (!f) {
2424 unlock_task_sighand(task, &flags);
2425 return 0;
2426 }
2427
2428
2429 __get_seccomp_filter(f);
2430 unlock_task_sighand(task, &flags);
2431
2432 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2433 f->cache.allow_native,
2434 SECCOMP_ARCH_NATIVE_NR);
2435
2436#ifdef SECCOMP_ARCH_COMPAT
2437 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2438 f->cache.allow_compat,
2439 SECCOMP_ARCH_COMPAT_NR);
2440#endif
2441
2442 __put_seccomp_filter(f);
2443 return 0;
2444}
2445#endif
2446