1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#define pr_fmt(fmt) "seccomp: " fmt
17
18#include <linux/refcount.h>
19#include <linux/audit.h>
20#include <linux/compat.h>
21#include <linux/coredump.h>
22#include <linux/kmemleak.h>
23#include <linux/nospec.h>
24#include <linux/prctl.h>
25#include <linux/sched.h>
26#include <linux/sched/task_stack.h>
27#include <linux/seccomp.h>
28#include <linux/slab.h>
29#include <linux/syscalls.h>
30#include <linux/sysctl.h>
31
32#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
33#include <asm/syscall.h>
34#endif
35
36#ifdef CONFIG_SECCOMP_FILTER
37#include <linux/file.h>
38#include <linux/filter.h>
39#include <linux/pid.h>
40#include <linux/ptrace.h>
41#include <linux/capability.h>
42#include <linux/tracehook.h>
43#include <linux/uaccess.h>
44#include <linux/anon_inodes.h>
45#include <linux/lockdep.h>
46
47
48
49
50
51
52
53#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64)
54
55enum notify_state {
56 SECCOMP_NOTIFY_INIT,
57 SECCOMP_NOTIFY_SENT,
58 SECCOMP_NOTIFY_REPLIED,
59};
60
61struct seccomp_knotif {
62
63 struct task_struct *task;
64
65
66 u64 id;
67
68
69
70
71
72
73 const struct seccomp_data *data;
74
75
76
77
78
79
80
81
82
83 enum notify_state state;
84
85
86 int error;
87 long val;
88 u32 flags;
89
90
91
92
93
94 struct completion ready;
95
96 struct list_head list;
97
98
99 struct list_head addfd;
100};
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118struct seccomp_kaddfd {
119 struct file *file;
120 int fd;
121 unsigned int flags;
122 __u32 ioctl_flags;
123
124 union {
125 bool setfd;
126
127 int ret;
128 };
129 struct completion completion;
130 struct list_head list;
131};
132
133
134
135
136
137
138
139
140
141
142
143
144
145struct notification {
146 struct semaphore request;
147 u64 next_id;
148 struct list_head notifications;
149};
150
151#ifdef SECCOMP_ARCH_NATIVE
152
153
154
155
156
157
158
159
160
161
162
163struct action_cache {
164 DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR);
165#ifdef SECCOMP_ARCH_COMPAT
166 DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR);
167#endif
168};
169#else
170struct action_cache { };
171
172static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
173 const struct seccomp_data *sd)
174{
175 return false;
176}
177
178static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter)
179{
180}
181#endif
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217struct seccomp_filter {
218 refcount_t refs;
219 refcount_t users;
220 bool log;
221 struct action_cache cache;
222 struct seccomp_filter *prev;
223 struct bpf_prog *prog;
224 struct notification *notif;
225 struct mutex notify_lock;
226 wait_queue_head_t wqh;
227};
228
229
230#define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
231
232
233
234
235
236static void populate_seccomp_data(struct seccomp_data *sd)
237{
238
239
240
241
242 struct task_struct *task = current;
243 struct pt_regs *regs = task_pt_regs(task);
244 unsigned long args[6];
245
246 sd->nr = syscall_get_nr(task, regs);
247 sd->arch = syscall_get_arch(task);
248 syscall_get_arguments(task, regs, args);
249 sd->args[0] = args[0];
250 sd->args[1] = args[1];
251 sd->args[2] = args[2];
252 sd->args[3] = args[3];
253 sd->args[4] = args[4];
254 sd->args[5] = args[5];
255 sd->instruction_pointer = KSTK_EIP(task);
256}
257
258
259
260
261
262
263
264
265
266
267
268
269
270static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
271{
272 int pc;
273 for (pc = 0; pc < flen; pc++) {
274 struct sock_filter *ftest = &filter[pc];
275 u16 code = ftest->code;
276 u32 k = ftest->k;
277
278 switch (code) {
279 case BPF_LD | BPF_W | BPF_ABS:
280 ftest->code = BPF_LDX | BPF_W | BPF_ABS;
281
282 if (k >= sizeof(struct seccomp_data) || k & 3)
283 return -EINVAL;
284 continue;
285 case BPF_LD | BPF_W | BPF_LEN:
286 ftest->code = BPF_LD | BPF_IMM;
287 ftest->k = sizeof(struct seccomp_data);
288 continue;
289 case BPF_LDX | BPF_W | BPF_LEN:
290 ftest->code = BPF_LDX | BPF_IMM;
291 ftest->k = sizeof(struct seccomp_data);
292 continue;
293
294 case BPF_RET | BPF_K:
295 case BPF_RET | BPF_A:
296 case BPF_ALU | BPF_ADD | BPF_K:
297 case BPF_ALU | BPF_ADD | BPF_X:
298 case BPF_ALU | BPF_SUB | BPF_K:
299 case BPF_ALU | BPF_SUB | BPF_X:
300 case BPF_ALU | BPF_MUL | BPF_K:
301 case BPF_ALU | BPF_MUL | BPF_X:
302 case BPF_ALU | BPF_DIV | BPF_K:
303 case BPF_ALU | BPF_DIV | BPF_X:
304 case BPF_ALU | BPF_AND | BPF_K:
305 case BPF_ALU | BPF_AND | BPF_X:
306 case BPF_ALU | BPF_OR | BPF_K:
307 case BPF_ALU | BPF_OR | BPF_X:
308 case BPF_ALU | BPF_XOR | BPF_K:
309 case BPF_ALU | BPF_XOR | BPF_X:
310 case BPF_ALU | BPF_LSH | BPF_K:
311 case BPF_ALU | BPF_LSH | BPF_X:
312 case BPF_ALU | BPF_RSH | BPF_K:
313 case BPF_ALU | BPF_RSH | BPF_X:
314 case BPF_ALU | BPF_NEG:
315 case BPF_LD | BPF_IMM:
316 case BPF_LDX | BPF_IMM:
317 case BPF_MISC | BPF_TAX:
318 case BPF_MISC | BPF_TXA:
319 case BPF_LD | BPF_MEM:
320 case BPF_LDX | BPF_MEM:
321 case BPF_ST:
322 case BPF_STX:
323 case BPF_JMP | BPF_JA:
324 case BPF_JMP | BPF_JEQ | BPF_K:
325 case BPF_JMP | BPF_JEQ | BPF_X:
326 case BPF_JMP | BPF_JGE | BPF_K:
327 case BPF_JMP | BPF_JGE | BPF_X:
328 case BPF_JMP | BPF_JGT | BPF_K:
329 case BPF_JMP | BPF_JGT | BPF_X:
330 case BPF_JMP | BPF_JSET | BPF_K:
331 case BPF_JMP | BPF_JSET | BPF_X:
332 continue;
333 default:
334 return -EINVAL;
335 }
336 }
337 return 0;
338}
339
340#ifdef SECCOMP_ARCH_NATIVE
341static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap,
342 size_t bitmap_size,
343 int syscall_nr)
344{
345 if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size))
346 return false;
347 syscall_nr = array_index_nospec(syscall_nr, bitmap_size);
348
349 return test_bit(syscall_nr, bitmap);
350}
351
352
353
354
355
356
357
358
359static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter,
360 const struct seccomp_data *sd)
361{
362 int syscall_nr = sd->nr;
363 const struct action_cache *cache = &sfilter->cache;
364
365#ifndef SECCOMP_ARCH_COMPAT
366
367 return seccomp_cache_check_allow_bitmap(cache->allow_native,
368 SECCOMP_ARCH_NATIVE_NR,
369 syscall_nr);
370#else
371 if (likely(sd->arch == SECCOMP_ARCH_NATIVE))
372 return seccomp_cache_check_allow_bitmap(cache->allow_native,
373 SECCOMP_ARCH_NATIVE_NR,
374 syscall_nr);
375 if (likely(sd->arch == SECCOMP_ARCH_COMPAT))
376 return seccomp_cache_check_allow_bitmap(cache->allow_compat,
377 SECCOMP_ARCH_COMPAT_NR,
378 syscall_nr);
379#endif
380
381 WARN_ON_ONCE(true);
382 return false;
383}
384#endif
385
386
387
388
389
390
391
392
393
394
395#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
396static u32 seccomp_run_filters(const struct seccomp_data *sd,
397 struct seccomp_filter **match)
398{
399 u32 ret = SECCOMP_RET_ALLOW;
400
401 struct seccomp_filter *f =
402 READ_ONCE(current->seccomp.filter);
403
404
405 if (WARN_ON(f == NULL))
406 return SECCOMP_RET_KILL_PROCESS;
407
408 if (seccomp_cache_check_allow(f, sd))
409 return SECCOMP_RET_ALLOW;
410
411
412
413
414
415 for (; f; f = f->prev) {
416 u32 cur_ret = bpf_prog_run_pin_on_cpu(f->prog, sd);
417
418 if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
419 ret = cur_ret;
420 *match = f;
421 }
422 }
423 return ret;
424}
425#endif
426
427static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
428{
429 assert_spin_locked(¤t->sighand->siglock);
430
431 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
432 return false;
433
434 return true;
435}
436
437void __weak arch_seccomp_spec_mitigate(struct task_struct *task) { }
438
439static inline void seccomp_assign_mode(struct task_struct *task,
440 unsigned long seccomp_mode,
441 unsigned long flags)
442{
443 assert_spin_locked(&task->sighand->siglock);
444
445 task->seccomp.mode = seccomp_mode;
446
447
448
449
450 smp_mb__before_atomic();
451
452 if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0)
453 arch_seccomp_spec_mitigate(task);
454 set_task_syscall_work(task, SECCOMP);
455}
456
457#ifdef CONFIG_SECCOMP_FILTER
458
459static int is_ancestor(struct seccomp_filter *parent,
460 struct seccomp_filter *child)
461{
462
463 if (parent == NULL)
464 return 1;
465 for (; child; child = child->prev)
466 if (child == parent)
467 return 1;
468 return 0;
469}
470
471
472
473
474
475
476
477
478
479
480static inline pid_t seccomp_can_sync_threads(void)
481{
482 struct task_struct *thread, *caller;
483
484 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
485 assert_spin_locked(¤t->sighand->siglock);
486
487
488 caller = current;
489 for_each_thread(caller, thread) {
490 pid_t failed;
491
492
493 if (thread == caller)
494 continue;
495
496 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
497 (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
498 is_ancestor(thread->seccomp.filter,
499 caller->seccomp.filter)))
500 continue;
501
502
503 failed = task_pid_vnr(thread);
504
505 if (WARN_ON(failed == 0))
506 failed = -ESRCH;
507 return failed;
508 }
509
510 return 0;
511}
512
513static inline void seccomp_filter_free(struct seccomp_filter *filter)
514{
515 if (filter) {
516 bpf_prog_destroy(filter->prog);
517 kfree(filter);
518 }
519}
520
521static void __seccomp_filter_orphan(struct seccomp_filter *orig)
522{
523 while (orig && refcount_dec_and_test(&orig->users)) {
524 if (waitqueue_active(&orig->wqh))
525 wake_up_poll(&orig->wqh, EPOLLHUP);
526 orig = orig->prev;
527 }
528}
529
530static void __put_seccomp_filter(struct seccomp_filter *orig)
531{
532
533 while (orig && refcount_dec_and_test(&orig->refs)) {
534 struct seccomp_filter *freeme = orig;
535 orig = orig->prev;
536 seccomp_filter_free(freeme);
537 }
538}
539
540static void __seccomp_filter_release(struct seccomp_filter *orig)
541{
542
543 __seccomp_filter_orphan(orig);
544
545 __put_seccomp_filter(orig);
546}
547
548
549
550
551
552
553
554
555
556
557void seccomp_filter_release(struct task_struct *tsk)
558{
559 struct seccomp_filter *orig = tsk->seccomp.filter;
560
561
562 WARN_ON(tsk->sighand != NULL);
563
564
565 tsk->seccomp.filter = NULL;
566 __seccomp_filter_release(orig);
567}
568
569
570
571
572
573
574
575
576
577static inline void seccomp_sync_threads(unsigned long flags)
578{
579 struct task_struct *thread, *caller;
580
581 BUG_ON(!mutex_is_locked(¤t->signal->cred_guard_mutex));
582 assert_spin_locked(¤t->sighand->siglock);
583
584
585 caller = current;
586 for_each_thread(caller, thread) {
587
588 if (thread == caller)
589 continue;
590
591
592 get_seccomp_filter(caller);
593
594
595
596
597
598
599 __seccomp_filter_release(thread->seccomp.filter);
600
601
602 smp_store_release(&thread->seccomp.filter,
603 caller->seccomp.filter);
604 atomic_set(&thread->seccomp.filter_count,
605 atomic_read(&caller->seccomp.filter_count));
606
607
608
609
610
611
612
613 if (task_no_new_privs(caller))
614 task_set_no_new_privs(thread);
615
616
617
618
619
620
621
622 if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
623 seccomp_assign_mode(thread, SECCOMP_MODE_FILTER,
624 flags);
625 }
626}
627
628
629
630
631
632
633
634static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
635{
636 struct seccomp_filter *sfilter;
637 int ret;
638 const bool save_orig =
639#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE)
640 true;
641#else
642 false;
643#endif
644
645 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
646 return ERR_PTR(-EINVAL);
647
648 BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
649
650
651
652
653
654
655
656 if (!task_no_new_privs(current) &&
657 !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
658 return ERR_PTR(-EACCES);
659
660
661 sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
662 if (!sfilter)
663 return ERR_PTR(-ENOMEM);
664
665 mutex_init(&sfilter->notify_lock);
666 ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
667 seccomp_check_filter, save_orig);
668 if (ret < 0) {
669 kfree(sfilter);
670 return ERR_PTR(ret);
671 }
672
673 refcount_set(&sfilter->refs, 1);
674 refcount_set(&sfilter->users, 1);
675 init_waitqueue_head(&sfilter->wqh);
676
677 return sfilter;
678}
679
680
681
682
683
684
685
686static struct seccomp_filter *
687seccomp_prepare_user_filter(const char __user *user_filter)
688{
689 struct sock_fprog fprog;
690 struct seccomp_filter *filter = ERR_PTR(-EFAULT);
691
692#ifdef CONFIG_COMPAT
693 if (in_compat_syscall()) {
694 struct compat_sock_fprog fprog32;
695 if (copy_from_user(&fprog32, user_filter, sizeof(fprog32)))
696 goto out;
697 fprog.len = fprog32.len;
698 fprog.filter = compat_ptr(fprog32.filter);
699 } else
700#endif
701 if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
702 goto out;
703 filter = seccomp_prepare_filter(&fprog);
704out:
705 return filter;
706}
707
708#ifdef SECCOMP_ARCH_NATIVE
709
710
711
712
713
714
715static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog,
716 struct seccomp_data *sd)
717{
718 unsigned int reg_value = 0;
719 unsigned int pc;
720 bool op_res;
721
722 if (WARN_ON_ONCE(!fprog))
723 return false;
724
725 for (pc = 0; pc < fprog->len; pc++) {
726 struct sock_filter *insn = &fprog->filter[pc];
727 u16 code = insn->code;
728 u32 k = insn->k;
729
730 switch (code) {
731 case BPF_LD | BPF_W | BPF_ABS:
732 switch (k) {
733 case offsetof(struct seccomp_data, nr):
734 reg_value = sd->nr;
735 break;
736 case offsetof(struct seccomp_data, arch):
737 reg_value = sd->arch;
738 break;
739 default:
740
741 return false;
742 }
743 break;
744 case BPF_RET | BPF_K:
745
746 return k == SECCOMP_RET_ALLOW;
747 case BPF_JMP | BPF_JA:
748 pc += insn->k;
749 break;
750 case BPF_JMP | BPF_JEQ | BPF_K:
751 case BPF_JMP | BPF_JGE | BPF_K:
752 case BPF_JMP | BPF_JGT | BPF_K:
753 case BPF_JMP | BPF_JSET | BPF_K:
754 switch (BPF_OP(code)) {
755 case BPF_JEQ:
756 op_res = reg_value == k;
757 break;
758 case BPF_JGE:
759 op_res = reg_value >= k;
760 break;
761 case BPF_JGT:
762 op_res = reg_value > k;
763 break;
764 case BPF_JSET:
765 op_res = !!(reg_value & k);
766 break;
767 default:
768
769 return false;
770 }
771
772 pc += op_res ? insn->jt : insn->jf;
773 break;
774 case BPF_ALU | BPF_AND | BPF_K:
775 reg_value &= k;
776 break;
777 default:
778
779 return false;
780 }
781 }
782
783
784 WARN_ON(1);
785 return false;
786}
787
788static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter,
789 void *bitmap, const void *bitmap_prev,
790 size_t bitmap_size, int arch)
791{
792 struct sock_fprog_kern *fprog = sfilter->prog->orig_prog;
793 struct seccomp_data sd;
794 int nr;
795
796 if (bitmap_prev) {
797
798 bitmap_copy(bitmap, bitmap_prev, bitmap_size);
799 } else {
800
801 bitmap_fill(bitmap, bitmap_size);
802 }
803
804 for (nr = 0; nr < bitmap_size; nr++) {
805
806 if (!test_bit(nr, bitmap))
807 continue;
808
809 sd.nr = nr;
810 sd.arch = arch;
811
812
813 if (seccomp_is_const_allow(fprog, &sd))
814 continue;
815
816
817
818
819
820 __clear_bit(nr, bitmap);
821 }
822}
823
824
825
826
827
828
829
830static void seccomp_cache_prepare(struct seccomp_filter *sfilter)
831{
832 struct action_cache *cache = &sfilter->cache;
833 const struct action_cache *cache_prev =
834 sfilter->prev ? &sfilter->prev->cache : NULL;
835
836 seccomp_cache_prepare_bitmap(sfilter, cache->allow_native,
837 cache_prev ? cache_prev->allow_native : NULL,
838 SECCOMP_ARCH_NATIVE_NR,
839 SECCOMP_ARCH_NATIVE);
840
841#ifdef SECCOMP_ARCH_COMPAT
842 seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat,
843 cache_prev ? cache_prev->allow_compat : NULL,
844 SECCOMP_ARCH_COMPAT_NR,
845 SECCOMP_ARCH_COMPAT);
846#endif
847}
848#endif
849
850
851
852
853
854
855
856
857
858
859
860
861
862static long seccomp_attach_filter(unsigned int flags,
863 struct seccomp_filter *filter)
864{
865 unsigned long total_insns;
866 struct seccomp_filter *walker;
867
868 assert_spin_locked(¤t->sighand->siglock);
869
870
871 total_insns = filter->prog->len;
872 for (walker = current->seccomp.filter; walker; walker = walker->prev)
873 total_insns += walker->prog->len + 4;
874 if (total_insns > MAX_INSNS_PER_PATH)
875 return -ENOMEM;
876
877
878 if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
879 int ret;
880
881 ret = seccomp_can_sync_threads();
882 if (ret) {
883 if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
884 return -ESRCH;
885 else
886 return ret;
887 }
888 }
889
890
891 if (flags & SECCOMP_FILTER_FLAG_LOG)
892 filter->log = true;
893
894
895
896
897
898 filter->prev = current->seccomp.filter;
899 seccomp_cache_prepare(filter);
900 current->seccomp.filter = filter;
901 atomic_inc(¤t->seccomp.filter_count);
902
903
904 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
905 seccomp_sync_threads(flags);
906
907 return 0;
908}
909
910static void __get_seccomp_filter(struct seccomp_filter *filter)
911{
912 refcount_inc(&filter->refs);
913}
914
915
916void get_seccomp_filter(struct task_struct *tsk)
917{
918 struct seccomp_filter *orig = tsk->seccomp.filter;
919 if (!orig)
920 return;
921 __get_seccomp_filter(orig);
922 refcount_inc(&orig->users);
923}
924
925#endif
926
927
928#define SECCOMP_LOG_KILL_PROCESS (1 << 0)
929#define SECCOMP_LOG_KILL_THREAD (1 << 1)
930#define SECCOMP_LOG_TRAP (1 << 2)
931#define SECCOMP_LOG_ERRNO (1 << 3)
932#define SECCOMP_LOG_TRACE (1 << 4)
933#define SECCOMP_LOG_LOG (1 << 5)
934#define SECCOMP_LOG_ALLOW (1 << 6)
935#define SECCOMP_LOG_USER_NOTIF (1 << 7)
936
937static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
938 SECCOMP_LOG_KILL_THREAD |
939 SECCOMP_LOG_TRAP |
940 SECCOMP_LOG_ERRNO |
941 SECCOMP_LOG_USER_NOTIF |
942 SECCOMP_LOG_TRACE |
943 SECCOMP_LOG_LOG;
944
945static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
946 bool requested)
947{
948 bool log = false;
949
950 switch (action) {
951 case SECCOMP_RET_ALLOW:
952 break;
953 case SECCOMP_RET_TRAP:
954 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
955 break;
956 case SECCOMP_RET_ERRNO:
957 log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
958 break;
959 case SECCOMP_RET_TRACE:
960 log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
961 break;
962 case SECCOMP_RET_USER_NOTIF:
963 log = requested && seccomp_actions_logged & SECCOMP_LOG_USER_NOTIF;
964 break;
965 case SECCOMP_RET_LOG:
966 log = seccomp_actions_logged & SECCOMP_LOG_LOG;
967 break;
968 case SECCOMP_RET_KILL_THREAD:
969 log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
970 break;
971 case SECCOMP_RET_KILL_PROCESS:
972 default:
973 log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
974 }
975
976
977
978
979
980
981
982 if (!log)
983 return;
984
985 audit_seccomp(syscall, signr, action);
986}
987
988
989
990
991
992
993static const int mode1_syscalls[] = {
994 __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
995 -1,
996};
997
998static void __secure_computing_strict(int this_syscall)
999{
1000 const int *allowed_syscalls = mode1_syscalls;
1001#ifdef CONFIG_COMPAT
1002 if (in_compat_syscall())
1003 allowed_syscalls = get_compat_mode1_syscalls();
1004#endif
1005 do {
1006 if (*allowed_syscalls == this_syscall)
1007 return;
1008 } while (*++allowed_syscalls != -1);
1009
1010#ifdef SECCOMP_DEBUG
1011 dump_stack();
1012#endif
1013 seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
1014 do_exit(SIGKILL);
1015}
1016
1017#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
1018void secure_computing_strict(int this_syscall)
1019{
1020 int mode = current->seccomp.mode;
1021
1022 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1023 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1024 return;
1025
1026 if (mode == SECCOMP_MODE_DISABLED)
1027 return;
1028 else if (mode == SECCOMP_MODE_STRICT)
1029 __secure_computing_strict(this_syscall);
1030 else
1031 BUG();
1032}
1033#else
1034
1035#ifdef CONFIG_SECCOMP_FILTER
1036static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
1037{
1038
1039
1040
1041
1042 lockdep_assert_held(&filter->notify_lock);
1043 return filter->notif->next_id++;
1044}
1045
1046static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
1047{
1048 int fd;
1049
1050
1051
1052
1053
1054 list_del_init(&addfd->list);
1055 if (!addfd->setfd)
1056 fd = receive_fd(addfd->file, addfd->flags);
1057 else
1058 fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
1059 addfd->ret = fd;
1060
1061 if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
1062
1063 if (fd < 0) {
1064 n->state = SECCOMP_NOTIFY_SENT;
1065 } else {
1066
1067 n->flags = 0;
1068 n->error = 0;
1069 n->val = fd;
1070 }
1071 }
1072
1073
1074
1075
1076
1077 complete(&addfd->completion);
1078}
1079
1080static int seccomp_do_user_notification(int this_syscall,
1081 struct seccomp_filter *match,
1082 const struct seccomp_data *sd)
1083{
1084 int err;
1085 u32 flags = 0;
1086 long ret = 0;
1087 struct seccomp_knotif n = {};
1088 struct seccomp_kaddfd *addfd, *tmp;
1089
1090 mutex_lock(&match->notify_lock);
1091 err = -ENOSYS;
1092 if (!match->notif)
1093 goto out;
1094
1095 n.task = current;
1096 n.state = SECCOMP_NOTIFY_INIT;
1097 n.data = sd;
1098 n.id = seccomp_next_notify_id(match);
1099 init_completion(&n.ready);
1100 list_add(&n.list, &match->notif->notifications);
1101 INIT_LIST_HEAD(&n.addfd);
1102
1103 up(&match->notif->request);
1104 wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
1105
1106
1107
1108
1109 do {
1110 mutex_unlock(&match->notify_lock);
1111 err = wait_for_completion_interruptible(&n.ready);
1112 mutex_lock(&match->notify_lock);
1113 if (err != 0)
1114 goto interrupted;
1115
1116 addfd = list_first_entry_or_null(&n.addfd,
1117 struct seccomp_kaddfd, list);
1118
1119 if (addfd)
1120 seccomp_handle_addfd(addfd, &n);
1121
1122 } while (n.state != SECCOMP_NOTIFY_REPLIED);
1123
1124 ret = n.val;
1125 err = n.error;
1126 flags = n.flags;
1127
1128interrupted:
1129
1130 list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
1131
1132 addfd->ret = -ESRCH;
1133 list_del_init(&addfd->list);
1134 complete(&addfd->completion);
1135 }
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147 if (match->notif)
1148 list_del(&n.list);
1149out:
1150 mutex_unlock(&match->notify_lock);
1151
1152
1153 if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1154 return 0;
1155
1156 syscall_set_return_value(current, current_pt_regs(),
1157 err, ret);
1158 return -1;
1159}
1160
1161static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1162 const bool recheck_after_trace)
1163{
1164 u32 filter_ret, action;
1165 struct seccomp_filter *match = NULL;
1166 int data;
1167 struct seccomp_data sd_local;
1168
1169
1170
1171
1172
1173 smp_rmb();
1174
1175 if (!sd) {
1176 populate_seccomp_data(&sd_local);
1177 sd = &sd_local;
1178 }
1179
1180 filter_ret = seccomp_run_filters(sd, &match);
1181 data = filter_ret & SECCOMP_RET_DATA;
1182 action = filter_ret & SECCOMP_RET_ACTION_FULL;
1183
1184 switch (action) {
1185 case SECCOMP_RET_ERRNO:
1186
1187 if (data > MAX_ERRNO)
1188 data = MAX_ERRNO;
1189 syscall_set_return_value(current, current_pt_regs(),
1190 -data, 0);
1191 goto skip;
1192
1193 case SECCOMP_RET_TRAP:
1194
1195 syscall_rollback(current, current_pt_regs());
1196
1197 force_sig_seccomp(this_syscall, data, false);
1198 goto skip;
1199
1200 case SECCOMP_RET_TRACE:
1201
1202 if (recheck_after_trace)
1203 return 0;
1204
1205
1206 if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
1207 syscall_set_return_value(current,
1208 current_pt_regs(),
1209 -ENOSYS, 0);
1210 goto skip;
1211 }
1212
1213
1214 ptrace_event(PTRACE_EVENT_SECCOMP, data);
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225 if (fatal_signal_pending(current))
1226 goto skip;
1227
1228 this_syscall = syscall_get_nr(current, current_pt_regs());
1229 if (this_syscall < 0)
1230 goto skip;
1231
1232
1233
1234
1235
1236
1237
1238 if (__seccomp_filter(this_syscall, NULL, true))
1239 return -1;
1240
1241 return 0;
1242
1243 case SECCOMP_RET_USER_NOTIF:
1244 if (seccomp_do_user_notification(this_syscall, match, sd))
1245 goto skip;
1246
1247 return 0;
1248
1249 case SECCOMP_RET_LOG:
1250 seccomp_log(this_syscall, 0, action, true);
1251 return 0;
1252
1253 case SECCOMP_RET_ALLOW:
1254
1255
1256
1257
1258
1259 return 0;
1260
1261 case SECCOMP_RET_KILL_THREAD:
1262 case SECCOMP_RET_KILL_PROCESS:
1263 default:
1264 seccomp_log(this_syscall, SIGSYS, action, true);
1265
1266 if (action != SECCOMP_RET_KILL_THREAD ||
1267 (atomic_read(¤t->signal->live) == 1)) {
1268
1269 syscall_rollback(current, current_pt_regs());
1270
1271 force_sig_seccomp(this_syscall, data, true);
1272 } else {
1273 do_exit(SIGSYS);
1274 }
1275 return -1;
1276 }
1277
1278 unreachable();
1279
1280skip:
1281 seccomp_log(this_syscall, 0, action, match ? match->log : false);
1282 return -1;
1283}
1284#else
1285static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
1286 const bool recheck_after_trace)
1287{
1288 BUG();
1289
1290 return -1;
1291}
1292#endif
1293
1294int __secure_computing(const struct seccomp_data *sd)
1295{
1296 int mode = current->seccomp.mode;
1297 int this_syscall;
1298
1299 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
1300 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
1301 return 0;
1302
1303 this_syscall = sd ? sd->nr :
1304 syscall_get_nr(current, current_pt_regs());
1305
1306 switch (mode) {
1307 case SECCOMP_MODE_STRICT:
1308 __secure_computing_strict(this_syscall);
1309 return 0;
1310 case SECCOMP_MODE_FILTER:
1311 return __seccomp_filter(this_syscall, sd, false);
1312 default:
1313 BUG();
1314 }
1315}
1316#endif
1317
1318long prctl_get_seccomp(void)
1319{
1320 return current->seccomp.mode;
1321}
1322
1323
1324
1325
1326
1327
1328
1329
1330static long seccomp_set_mode_strict(void)
1331{
1332 const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
1333 long ret = -EINVAL;
1334
1335 spin_lock_irq(¤t->sighand->siglock);
1336
1337 if (!seccomp_may_assign_mode(seccomp_mode))
1338 goto out;
1339
1340#ifdef TIF_NOTSC
1341 disable_TSC();
1342#endif
1343 seccomp_assign_mode(current, seccomp_mode, 0);
1344 ret = 0;
1345
1346out:
1347 spin_unlock_irq(¤t->sighand->siglock);
1348
1349 return ret;
1350}
1351
1352#ifdef CONFIG_SECCOMP_FILTER
1353static void seccomp_notify_free(struct seccomp_filter *filter)
1354{
1355 kfree(filter->notif);
1356 filter->notif = NULL;
1357}
1358
1359static void seccomp_notify_detach(struct seccomp_filter *filter)
1360{
1361 struct seccomp_knotif *knotif;
1362
1363 if (!filter)
1364 return;
1365
1366 mutex_lock(&filter->notify_lock);
1367
1368
1369
1370
1371
1372 list_for_each_entry(knotif, &filter->notif->notifications, list) {
1373 if (knotif->state == SECCOMP_NOTIFY_REPLIED)
1374 continue;
1375
1376 knotif->state = SECCOMP_NOTIFY_REPLIED;
1377 knotif->error = -ENOSYS;
1378 knotif->val = 0;
1379
1380
1381
1382
1383
1384
1385 complete(&knotif->ready);
1386 }
1387
1388 seccomp_notify_free(filter);
1389 mutex_unlock(&filter->notify_lock);
1390}
1391
1392static int seccomp_notify_release(struct inode *inode, struct file *file)
1393{
1394 struct seccomp_filter *filter = file->private_data;
1395
1396 seccomp_notify_detach(filter);
1397 __put_seccomp_filter(filter);
1398 return 0;
1399}
1400
1401
1402static inline struct seccomp_knotif *
1403find_notification(struct seccomp_filter *filter, u64 id)
1404{
1405 struct seccomp_knotif *cur;
1406
1407 lockdep_assert_held(&filter->notify_lock);
1408
1409 list_for_each_entry(cur, &filter->notif->notifications, list) {
1410 if (cur->id == id)
1411 return cur;
1412 }
1413
1414 return NULL;
1415}
1416
1417
1418static long seccomp_notify_recv(struct seccomp_filter *filter,
1419 void __user *buf)
1420{
1421 struct seccomp_knotif *knotif = NULL, *cur;
1422 struct seccomp_notif unotif;
1423 ssize_t ret;
1424
1425
1426 ret = check_zeroed_user(buf, sizeof(unotif));
1427 if (ret < 0)
1428 return ret;
1429 if (!ret)
1430 return -EINVAL;
1431
1432 memset(&unotif, 0, sizeof(unotif));
1433
1434 ret = down_interruptible(&filter->notif->request);
1435 if (ret < 0)
1436 return ret;
1437
1438 mutex_lock(&filter->notify_lock);
1439 list_for_each_entry(cur, &filter->notif->notifications, list) {
1440 if (cur->state == SECCOMP_NOTIFY_INIT) {
1441 knotif = cur;
1442 break;
1443 }
1444 }
1445
1446
1447
1448
1449
1450
1451 if (!knotif) {
1452 ret = -ENOENT;
1453 goto out;
1454 }
1455
1456 unotif.id = knotif->id;
1457 unotif.pid = task_pid_vnr(knotif->task);
1458 unotif.data = *(knotif->data);
1459
1460 knotif->state = SECCOMP_NOTIFY_SENT;
1461 wake_up_poll(&filter->wqh, EPOLLOUT | EPOLLWRNORM);
1462 ret = 0;
1463out:
1464 mutex_unlock(&filter->notify_lock);
1465
1466 if (ret == 0 && copy_to_user(buf, &unotif, sizeof(unotif))) {
1467 ret = -EFAULT;
1468
1469
1470
1471
1472
1473
1474
1475 mutex_lock(&filter->notify_lock);
1476 knotif = find_notification(filter, unotif.id);
1477 if (knotif) {
1478 knotif->state = SECCOMP_NOTIFY_INIT;
1479 up(&filter->notif->request);
1480 }
1481 mutex_unlock(&filter->notify_lock);
1482 }
1483
1484 return ret;
1485}
1486
1487static long seccomp_notify_send(struct seccomp_filter *filter,
1488 void __user *buf)
1489{
1490 struct seccomp_notif_resp resp = {};
1491 struct seccomp_knotif *knotif;
1492 long ret;
1493
1494 if (copy_from_user(&resp, buf, sizeof(resp)))
1495 return -EFAULT;
1496
1497 if (resp.flags & ~SECCOMP_USER_NOTIF_FLAG_CONTINUE)
1498 return -EINVAL;
1499
1500 if ((resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) &&
1501 (resp.error || resp.val))
1502 return -EINVAL;
1503
1504 ret = mutex_lock_interruptible(&filter->notify_lock);
1505 if (ret < 0)
1506 return ret;
1507
1508 knotif = find_notification(filter, resp.id);
1509 if (!knotif) {
1510 ret = -ENOENT;
1511 goto out;
1512 }
1513
1514
1515 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1516 ret = -EINPROGRESS;
1517 goto out;
1518 }
1519
1520 ret = 0;
1521 knotif->state = SECCOMP_NOTIFY_REPLIED;
1522 knotif->error = resp.error;
1523 knotif->val = resp.val;
1524 knotif->flags = resp.flags;
1525 complete(&knotif->ready);
1526out:
1527 mutex_unlock(&filter->notify_lock);
1528 return ret;
1529}
1530
1531static long seccomp_notify_id_valid(struct seccomp_filter *filter,
1532 void __user *buf)
1533{
1534 struct seccomp_knotif *knotif;
1535 u64 id;
1536 long ret;
1537
1538 if (copy_from_user(&id, buf, sizeof(id)))
1539 return -EFAULT;
1540
1541 ret = mutex_lock_interruptible(&filter->notify_lock);
1542 if (ret < 0)
1543 return ret;
1544
1545 knotif = find_notification(filter, id);
1546 if (knotif && knotif->state == SECCOMP_NOTIFY_SENT)
1547 ret = 0;
1548 else
1549 ret = -ENOENT;
1550
1551 mutex_unlock(&filter->notify_lock);
1552 return ret;
1553}
1554
1555static long seccomp_notify_addfd(struct seccomp_filter *filter,
1556 struct seccomp_notif_addfd __user *uaddfd,
1557 unsigned int size)
1558{
1559 struct seccomp_notif_addfd addfd;
1560 struct seccomp_knotif *knotif;
1561 struct seccomp_kaddfd kaddfd;
1562 int ret;
1563
1564 BUILD_BUG_ON(sizeof(addfd) < SECCOMP_NOTIFY_ADDFD_SIZE_VER0);
1565 BUILD_BUG_ON(sizeof(addfd) != SECCOMP_NOTIFY_ADDFD_SIZE_LATEST);
1566
1567 if (size < SECCOMP_NOTIFY_ADDFD_SIZE_VER0 || size >= PAGE_SIZE)
1568 return -EINVAL;
1569
1570 ret = copy_struct_from_user(&addfd, sizeof(addfd), uaddfd, size);
1571 if (ret)
1572 return ret;
1573
1574 if (addfd.newfd_flags & ~O_CLOEXEC)
1575 return -EINVAL;
1576
1577 if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
1578 return -EINVAL;
1579
1580 if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
1581 return -EINVAL;
1582
1583 kaddfd.file = fget(addfd.srcfd);
1584 if (!kaddfd.file)
1585 return -EBADF;
1586
1587 kaddfd.ioctl_flags = addfd.flags;
1588 kaddfd.flags = addfd.newfd_flags;
1589 kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
1590 kaddfd.fd = addfd.newfd;
1591 init_completion(&kaddfd.completion);
1592
1593 ret = mutex_lock_interruptible(&filter->notify_lock);
1594 if (ret < 0)
1595 goto out;
1596
1597 knotif = find_notification(filter, addfd.id);
1598 if (!knotif) {
1599 ret = -ENOENT;
1600 goto out_unlock;
1601 }
1602
1603
1604
1605
1606
1607
1608 if (knotif->state != SECCOMP_NOTIFY_SENT) {
1609 ret = -EINPROGRESS;
1610 goto out_unlock;
1611 }
1612
1613 if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
1614
1615
1616
1617
1618
1619
1620
1621 if (!list_empty(&knotif->addfd)) {
1622 ret = -EBUSY;
1623 goto out_unlock;
1624 }
1625
1626
1627 knotif->state = SECCOMP_NOTIFY_REPLIED;
1628 }
1629
1630 list_add(&kaddfd.list, &knotif->addfd);
1631 complete(&knotif->ready);
1632 mutex_unlock(&filter->notify_lock);
1633
1634
1635 ret = wait_for_completion_interruptible(&kaddfd.completion);
1636 if (ret == 0) {
1637
1638
1639
1640
1641
1642
1643
1644 ret = kaddfd.ret;
1645 goto out;
1646 }
1647
1648 mutex_lock(&filter->notify_lock);
1649
1650
1651
1652
1653
1654
1655
1656 if (list_empty(&kaddfd.list))
1657 ret = kaddfd.ret;
1658 else
1659 list_del(&kaddfd.list);
1660
1661out_unlock:
1662 mutex_unlock(&filter->notify_lock);
1663out:
1664 fput(kaddfd.file);
1665
1666 return ret;
1667}
1668
1669static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
1670 unsigned long arg)
1671{
1672 struct seccomp_filter *filter = file->private_data;
1673 void __user *buf = (void __user *)arg;
1674
1675
1676 switch (cmd) {
1677 case SECCOMP_IOCTL_NOTIF_RECV:
1678 return seccomp_notify_recv(filter, buf);
1679 case SECCOMP_IOCTL_NOTIF_SEND:
1680 return seccomp_notify_send(filter, buf);
1681 case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
1682 case SECCOMP_IOCTL_NOTIF_ID_VALID:
1683 return seccomp_notify_id_valid(filter, buf);
1684 }
1685
1686
1687#define EA_IOCTL(cmd) ((cmd) & ~(IOC_INOUT | IOCSIZE_MASK))
1688 switch (EA_IOCTL(cmd)) {
1689 case EA_IOCTL(SECCOMP_IOCTL_NOTIF_ADDFD):
1690 return seccomp_notify_addfd(filter, buf, _IOC_SIZE(cmd));
1691 default:
1692 return -EINVAL;
1693 }
1694}
1695
1696static __poll_t seccomp_notify_poll(struct file *file,
1697 struct poll_table_struct *poll_tab)
1698{
1699 struct seccomp_filter *filter = file->private_data;
1700 __poll_t ret = 0;
1701 struct seccomp_knotif *cur;
1702
1703 poll_wait(file, &filter->wqh, poll_tab);
1704
1705 if (mutex_lock_interruptible(&filter->notify_lock) < 0)
1706 return EPOLLERR;
1707
1708 list_for_each_entry(cur, &filter->notif->notifications, list) {
1709 if (cur->state == SECCOMP_NOTIFY_INIT)
1710 ret |= EPOLLIN | EPOLLRDNORM;
1711 if (cur->state == SECCOMP_NOTIFY_SENT)
1712 ret |= EPOLLOUT | EPOLLWRNORM;
1713 if ((ret & EPOLLIN) && (ret & EPOLLOUT))
1714 break;
1715 }
1716
1717 mutex_unlock(&filter->notify_lock);
1718
1719 if (refcount_read(&filter->users) == 0)
1720 ret |= EPOLLHUP;
1721
1722 return ret;
1723}
1724
1725static const struct file_operations seccomp_notify_ops = {
1726 .poll = seccomp_notify_poll,
1727 .release = seccomp_notify_release,
1728 .unlocked_ioctl = seccomp_notify_ioctl,
1729 .compat_ioctl = seccomp_notify_ioctl,
1730};
1731
1732static struct file *init_listener(struct seccomp_filter *filter)
1733{
1734 struct file *ret;
1735
1736 ret = ERR_PTR(-ENOMEM);
1737 filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL);
1738 if (!filter->notif)
1739 goto out;
1740
1741 sema_init(&filter->notif->request, 0);
1742 filter->notif->next_id = get_random_u64();
1743 INIT_LIST_HEAD(&filter->notif->notifications);
1744
1745 ret = anon_inode_getfile("seccomp notify", &seccomp_notify_ops,
1746 filter, O_RDWR);
1747 if (IS_ERR(ret))
1748 goto out_notif;
1749
1750
1751 __get_seccomp_filter(filter);
1752
1753out_notif:
1754 if (IS_ERR(ret))
1755 seccomp_notify_free(filter);
1756out:
1757 return ret;
1758}
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768static bool has_duplicate_listener(struct seccomp_filter *new_child)
1769{
1770 struct seccomp_filter *cur;
1771
1772
1773 lockdep_assert_held(¤t->sighand->siglock);
1774
1775 if (!new_child->notif)
1776 return false;
1777 for (cur = current->seccomp.filter; cur; cur = cur->prev) {
1778 if (cur->notif)
1779 return true;
1780 }
1781
1782 return false;
1783}
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798static long seccomp_set_mode_filter(unsigned int flags,
1799 const char __user *filter)
1800{
1801 const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
1802 struct seccomp_filter *prepared = NULL;
1803 long ret = -EINVAL;
1804 int listener = -1;
1805 struct file *listener_f = NULL;
1806
1807
1808 if (flags & ~SECCOMP_FILTER_FLAG_MASK)
1809 return -EINVAL;
1810
1811
1812
1813
1814
1815
1816
1817
1818 if ((flags & SECCOMP_FILTER_FLAG_TSYNC) &&
1819 (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) &&
1820 ((flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) == 0))
1821 return -EINVAL;
1822
1823
1824 prepared = seccomp_prepare_user_filter(filter);
1825 if (IS_ERR(prepared))
1826 return PTR_ERR(prepared);
1827
1828 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1829 listener = get_unused_fd_flags(O_CLOEXEC);
1830 if (listener < 0) {
1831 ret = listener;
1832 goto out_free;
1833 }
1834
1835 listener_f = init_listener(prepared);
1836 if (IS_ERR(listener_f)) {
1837 put_unused_fd(listener);
1838 ret = PTR_ERR(listener_f);
1839 goto out_free;
1840 }
1841 }
1842
1843
1844
1845
1846
1847 if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
1848 mutex_lock_killable(¤t->signal->cred_guard_mutex))
1849 goto out_put_fd;
1850
1851 spin_lock_irq(¤t->sighand->siglock);
1852
1853 if (!seccomp_may_assign_mode(seccomp_mode))
1854 goto out;
1855
1856 if (has_duplicate_listener(prepared)) {
1857 ret = -EBUSY;
1858 goto out;
1859 }
1860
1861 ret = seccomp_attach_filter(flags, prepared);
1862 if (ret)
1863 goto out;
1864
1865 prepared = NULL;
1866
1867 seccomp_assign_mode(current, seccomp_mode, flags);
1868out:
1869 spin_unlock_irq(¤t->sighand->siglock);
1870 if (flags & SECCOMP_FILTER_FLAG_TSYNC)
1871 mutex_unlock(¤t->signal->cred_guard_mutex);
1872out_put_fd:
1873 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1874 if (ret) {
1875 listener_f->private_data = NULL;
1876 fput(listener_f);
1877 put_unused_fd(listener);
1878 seccomp_notify_detach(prepared);
1879 } else {
1880 fd_install(listener, listener_f);
1881 ret = listener;
1882 }
1883 }
1884out_free:
1885 seccomp_filter_free(prepared);
1886 return ret;
1887}
1888#else
1889static inline long seccomp_set_mode_filter(unsigned int flags,
1890 const char __user *filter)
1891{
1892 return -EINVAL;
1893}
1894#endif
1895
1896static long seccomp_get_action_avail(const char __user *uaction)
1897{
1898 u32 action;
1899
1900 if (copy_from_user(&action, uaction, sizeof(action)))
1901 return -EFAULT;
1902
1903 switch (action) {
1904 case SECCOMP_RET_KILL_PROCESS:
1905 case SECCOMP_RET_KILL_THREAD:
1906 case SECCOMP_RET_TRAP:
1907 case SECCOMP_RET_ERRNO:
1908 case SECCOMP_RET_USER_NOTIF:
1909 case SECCOMP_RET_TRACE:
1910 case SECCOMP_RET_LOG:
1911 case SECCOMP_RET_ALLOW:
1912 break;
1913 default:
1914 return -EOPNOTSUPP;
1915 }
1916
1917 return 0;
1918}
1919
1920static long seccomp_get_notif_sizes(void __user *usizes)
1921{
1922 struct seccomp_notif_sizes sizes = {
1923 .seccomp_notif = sizeof(struct seccomp_notif),
1924 .seccomp_notif_resp = sizeof(struct seccomp_notif_resp),
1925 .seccomp_data = sizeof(struct seccomp_data),
1926 };
1927
1928 if (copy_to_user(usizes, &sizes, sizeof(sizes)))
1929 return -EFAULT;
1930
1931 return 0;
1932}
1933
1934
1935static long do_seccomp(unsigned int op, unsigned int flags,
1936 void __user *uargs)
1937{
1938 switch (op) {
1939 case SECCOMP_SET_MODE_STRICT:
1940 if (flags != 0 || uargs != NULL)
1941 return -EINVAL;
1942 return seccomp_set_mode_strict();
1943 case SECCOMP_SET_MODE_FILTER:
1944 return seccomp_set_mode_filter(flags, uargs);
1945 case SECCOMP_GET_ACTION_AVAIL:
1946 if (flags != 0)
1947 return -EINVAL;
1948
1949 return seccomp_get_action_avail(uargs);
1950 case SECCOMP_GET_NOTIF_SIZES:
1951 if (flags != 0)
1952 return -EINVAL;
1953
1954 return seccomp_get_notif_sizes(uargs);
1955 default:
1956 return -EINVAL;
1957 }
1958}
1959
1960SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
1961 void __user *, uargs)
1962{
1963 return do_seccomp(op, flags, uargs);
1964}
1965
1966
1967
1968
1969
1970
1971
1972
1973long prctl_set_seccomp(unsigned long seccomp_mode, void __user *filter)
1974{
1975 unsigned int op;
1976 void __user *uargs;
1977
1978 switch (seccomp_mode) {
1979 case SECCOMP_MODE_STRICT:
1980 op = SECCOMP_SET_MODE_STRICT;
1981
1982
1983
1984
1985
1986 uargs = NULL;
1987 break;
1988 case SECCOMP_MODE_FILTER:
1989 op = SECCOMP_SET_MODE_FILTER;
1990 uargs = filter;
1991 break;
1992 default:
1993 return -EINVAL;
1994 }
1995
1996
1997 return do_seccomp(op, 0, uargs);
1998}
1999
2000#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
2001static struct seccomp_filter *get_nth_filter(struct task_struct *task,
2002 unsigned long filter_off)
2003{
2004 struct seccomp_filter *orig, *filter;
2005 unsigned long count;
2006
2007
2008
2009
2010
2011 spin_lock_irq(&task->sighand->siglock);
2012
2013 if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
2014 spin_unlock_irq(&task->sighand->siglock);
2015 return ERR_PTR(-EINVAL);
2016 }
2017
2018 orig = task->seccomp.filter;
2019 __get_seccomp_filter(orig);
2020 spin_unlock_irq(&task->sighand->siglock);
2021
2022 count = 0;
2023 for (filter = orig; filter; filter = filter->prev)
2024 count++;
2025
2026 if (filter_off >= count) {
2027 filter = ERR_PTR(-ENOENT);
2028 goto out;
2029 }
2030
2031 count -= filter_off;
2032 for (filter = orig; filter && count > 1; filter = filter->prev)
2033 count--;
2034
2035 if (WARN_ON(count != 1 || !filter)) {
2036 filter = ERR_PTR(-ENOENT);
2037 goto out;
2038 }
2039
2040 __get_seccomp_filter(filter);
2041
2042out:
2043 __put_seccomp_filter(orig);
2044 return filter;
2045}
2046
2047long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
2048 void __user *data)
2049{
2050 struct seccomp_filter *filter;
2051 struct sock_fprog_kern *fprog;
2052 long ret;
2053
2054 if (!capable(CAP_SYS_ADMIN) ||
2055 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2056 return -EACCES;
2057 }
2058
2059 filter = get_nth_filter(task, filter_off);
2060 if (IS_ERR(filter))
2061 return PTR_ERR(filter);
2062
2063 fprog = filter->prog->orig_prog;
2064 if (!fprog) {
2065
2066
2067
2068
2069 ret = -EMEDIUMTYPE;
2070 goto out;
2071 }
2072
2073 ret = fprog->len;
2074 if (!data)
2075 goto out;
2076
2077 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
2078 ret = -EFAULT;
2079
2080out:
2081 __put_seccomp_filter(filter);
2082 return ret;
2083}
2084
2085long seccomp_get_metadata(struct task_struct *task,
2086 unsigned long size, void __user *data)
2087{
2088 long ret;
2089 struct seccomp_filter *filter;
2090 struct seccomp_metadata kmd = {};
2091
2092 if (!capable(CAP_SYS_ADMIN) ||
2093 current->seccomp.mode != SECCOMP_MODE_DISABLED) {
2094 return -EACCES;
2095 }
2096
2097 size = min_t(unsigned long, size, sizeof(kmd));
2098
2099 if (size < sizeof(kmd.filter_off))
2100 return -EINVAL;
2101
2102 if (copy_from_user(&kmd.filter_off, data, sizeof(kmd.filter_off)))
2103 return -EFAULT;
2104
2105 filter = get_nth_filter(task, kmd.filter_off);
2106 if (IS_ERR(filter))
2107 return PTR_ERR(filter);
2108
2109 if (filter->log)
2110 kmd.flags |= SECCOMP_FILTER_FLAG_LOG;
2111
2112 ret = size;
2113 if (copy_to_user(data, &kmd, size))
2114 ret = -EFAULT;
2115
2116 __put_seccomp_filter(filter);
2117 return ret;
2118}
2119#endif
2120
2121#ifdef CONFIG_SYSCTL
2122
2123
2124#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process"
2125#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread"
2126#define SECCOMP_RET_TRAP_NAME "trap"
2127#define SECCOMP_RET_ERRNO_NAME "errno"
2128#define SECCOMP_RET_USER_NOTIF_NAME "user_notif"
2129#define SECCOMP_RET_TRACE_NAME "trace"
2130#define SECCOMP_RET_LOG_NAME "log"
2131#define SECCOMP_RET_ALLOW_NAME "allow"
2132
2133static const char seccomp_actions_avail[] =
2134 SECCOMP_RET_KILL_PROCESS_NAME " "
2135 SECCOMP_RET_KILL_THREAD_NAME " "
2136 SECCOMP_RET_TRAP_NAME " "
2137 SECCOMP_RET_ERRNO_NAME " "
2138 SECCOMP_RET_USER_NOTIF_NAME " "
2139 SECCOMP_RET_TRACE_NAME " "
2140 SECCOMP_RET_LOG_NAME " "
2141 SECCOMP_RET_ALLOW_NAME;
2142
2143struct seccomp_log_name {
2144 u32 log;
2145 const char *name;
2146};
2147
2148static const struct seccomp_log_name seccomp_log_names[] = {
2149 { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
2150 { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
2151 { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
2152 { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
2153 { SECCOMP_LOG_USER_NOTIF, SECCOMP_RET_USER_NOTIF_NAME },
2154 { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
2155 { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
2156 { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
2157 { }
2158};
2159
2160static bool seccomp_names_from_actions_logged(char *names, size_t size,
2161 u32 actions_logged,
2162 const char *sep)
2163{
2164 const struct seccomp_log_name *cur;
2165 bool append_sep = false;
2166
2167 for (cur = seccomp_log_names; cur->name && size; cur++) {
2168 ssize_t ret;
2169
2170 if (!(actions_logged & cur->log))
2171 continue;
2172
2173 if (append_sep) {
2174 ret = strscpy(names, sep, size);
2175 if (ret < 0)
2176 return false;
2177
2178 names += ret;
2179 size -= ret;
2180 } else
2181 append_sep = true;
2182
2183 ret = strscpy(names, cur->name, size);
2184 if (ret < 0)
2185 return false;
2186
2187 names += ret;
2188 size -= ret;
2189 }
2190
2191 return true;
2192}
2193
2194static bool seccomp_action_logged_from_name(u32 *action_logged,
2195 const char *name)
2196{
2197 const struct seccomp_log_name *cur;
2198
2199 for (cur = seccomp_log_names; cur->name; cur++) {
2200 if (!strcmp(cur->name, name)) {
2201 *action_logged = cur->log;
2202 return true;
2203 }
2204 }
2205
2206 return false;
2207}
2208
2209static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
2210{
2211 char *name;
2212
2213 *actions_logged = 0;
2214 while ((name = strsep(&names, " ")) && *name) {
2215 u32 action_logged = 0;
2216
2217 if (!seccomp_action_logged_from_name(&action_logged, name))
2218 return false;
2219
2220 *actions_logged |= action_logged;
2221 }
2222
2223 return true;
2224}
2225
2226static int read_actions_logged(struct ctl_table *ro_table, void *buffer,
2227 size_t *lenp, loff_t *ppos)
2228{
2229 char names[sizeof(seccomp_actions_avail)];
2230 struct ctl_table table;
2231
2232 memset(names, 0, sizeof(names));
2233
2234 if (!seccomp_names_from_actions_logged(names, sizeof(names),
2235 seccomp_actions_logged, " "))
2236 return -EINVAL;
2237
2238 table = *ro_table;
2239 table.data = names;
2240 table.maxlen = sizeof(names);
2241 return proc_dostring(&table, 0, buffer, lenp, ppos);
2242}
2243
2244static int write_actions_logged(struct ctl_table *ro_table, void *buffer,
2245 size_t *lenp, loff_t *ppos, u32 *actions_logged)
2246{
2247 char names[sizeof(seccomp_actions_avail)];
2248 struct ctl_table table;
2249 int ret;
2250
2251 if (!capable(CAP_SYS_ADMIN))
2252 return -EPERM;
2253
2254 memset(names, 0, sizeof(names));
2255
2256 table = *ro_table;
2257 table.data = names;
2258 table.maxlen = sizeof(names);
2259 ret = proc_dostring(&table, 1, buffer, lenp, ppos);
2260 if (ret)
2261 return ret;
2262
2263 if (!seccomp_actions_logged_from_names(actions_logged, table.data))
2264 return -EINVAL;
2265
2266 if (*actions_logged & SECCOMP_LOG_ALLOW)
2267 return -EINVAL;
2268
2269 seccomp_actions_logged = *actions_logged;
2270 return 0;
2271}
2272
2273static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
2274 int ret)
2275{
2276 char names[sizeof(seccomp_actions_avail)];
2277 char old_names[sizeof(seccomp_actions_avail)];
2278 const char *new = names;
2279 const char *old = old_names;
2280
2281 if (!audit_enabled)
2282 return;
2283
2284 memset(names, 0, sizeof(names));
2285 memset(old_names, 0, sizeof(old_names));
2286
2287 if (ret)
2288 new = "?";
2289 else if (!actions_logged)
2290 new = "(none)";
2291 else if (!seccomp_names_from_actions_logged(names, sizeof(names),
2292 actions_logged, ","))
2293 new = "?";
2294
2295 if (!old_actions_logged)
2296 old = "(none)";
2297 else if (!seccomp_names_from_actions_logged(old_names,
2298 sizeof(old_names),
2299 old_actions_logged, ","))
2300 old = "?";
2301
2302 return audit_seccomp_actions_logged(new, old, !ret);
2303}
2304
2305static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
2306 void *buffer, size_t *lenp,
2307 loff_t *ppos)
2308{
2309 int ret;
2310
2311 if (write) {
2312 u32 actions_logged = 0;
2313 u32 old_actions_logged = seccomp_actions_logged;
2314
2315 ret = write_actions_logged(ro_table, buffer, lenp, ppos,
2316 &actions_logged);
2317 audit_actions_logged(actions_logged, old_actions_logged, ret);
2318 } else
2319 ret = read_actions_logged(ro_table, buffer, lenp, ppos);
2320
2321 return ret;
2322}
2323
2324static struct ctl_path seccomp_sysctl_path[] = {
2325 { .procname = "kernel", },
2326 { .procname = "seccomp", },
2327 { }
2328};
2329
2330static struct ctl_table seccomp_sysctl_table[] = {
2331 {
2332 .procname = "actions_avail",
2333 .data = (void *) &seccomp_actions_avail,
2334 .maxlen = sizeof(seccomp_actions_avail),
2335 .mode = 0444,
2336 .proc_handler = proc_dostring,
2337 },
2338 {
2339 .procname = "actions_logged",
2340 .mode = 0644,
2341 .proc_handler = seccomp_actions_logged_handler,
2342 },
2343 { }
2344};
2345
2346static int __init seccomp_sysctl_init(void)
2347{
2348 struct ctl_table_header *hdr;
2349
2350 hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table);
2351 if (!hdr)
2352 pr_warn("sysctl registration failed\n");
2353 else
2354 kmemleak_not_leak(hdr);
2355
2356 return 0;
2357}
2358
2359device_initcall(seccomp_sysctl_init)
2360
2361#endif
2362
2363#ifdef CONFIG_SECCOMP_CACHE_DEBUG
2364
2365static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name,
2366 const void *bitmap, size_t bitmap_size)
2367{
2368 int nr;
2369
2370 for (nr = 0; nr < bitmap_size; nr++) {
2371 bool cached = test_bit(nr, bitmap);
2372 char *status = cached ? "ALLOW" : "FILTER";
2373
2374 seq_printf(m, "%s %d %s\n", name, nr, status);
2375 }
2376}
2377
2378int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns,
2379 struct pid *pid, struct task_struct *task)
2380{
2381 struct seccomp_filter *f;
2382 unsigned long flags;
2383
2384
2385
2386
2387
2388 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
2389 return -EACCES;
2390
2391 if (!lock_task_sighand(task, &flags))
2392 return -ESRCH;
2393
2394 f = READ_ONCE(task->seccomp.filter);
2395 if (!f) {
2396 unlock_task_sighand(task, &flags);
2397 return 0;
2398 }
2399
2400
2401 __get_seccomp_filter(f);
2402 unlock_task_sighand(task, &flags);
2403
2404 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME,
2405 f->cache.allow_native,
2406 SECCOMP_ARCH_NATIVE_NR);
2407
2408#ifdef SECCOMP_ARCH_COMPAT
2409 proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME,
2410 f->cache.allow_compat,
2411 SECCOMP_ARCH_COMPAT_NR);
2412#endif
2413
2414 __put_seccomp_filter(f);
2415 return 0;
2416}
2417#endif
2418