linux/tools/testing/selftests/seccomp/seccomp_bpf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   4 *
   5 * Test code for seccomp bpf.
   6 */
   7
   8#define _GNU_SOURCE
   9#include <sys/types.h>
  10
  11/*
  12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
  13 * we need to use the kernel's siginfo.h file and trick glibc
  14 * into accepting it.
  15 */
  16#if !__GLIBC_PREREQ(2, 26)
  17# include <asm/siginfo.h>
  18# define __have_siginfo_t 1
  19# define __have_sigval_t 1
  20# define __have_sigevent_t 1
  21#endif
  22
  23#include <errno.h>
  24#include <linux/filter.h>
  25#include <sys/prctl.h>
  26#include <sys/ptrace.h>
  27#include <sys/user.h>
  28#include <linux/prctl.h>
  29#include <linux/ptrace.h>
  30#include <linux/seccomp.h>
  31#include <pthread.h>
  32#include <semaphore.h>
  33#include <signal.h>
  34#include <stddef.h>
  35#include <stdbool.h>
  36#include <string.h>
  37#include <time.h>
  38#include <limits.h>
  39#include <linux/elf.h>
  40#include <sys/uio.h>
  41#include <sys/utsname.h>
  42#include <sys/fcntl.h>
  43#include <sys/mman.h>
  44#include <sys/times.h>
  45#include <sys/socket.h>
  46#include <sys/ioctl.h>
  47#include <linux/kcmp.h>
  48#include <sys/resource.h>
  49
  50#include <unistd.h>
  51#include <sys/syscall.h>
  52#include <poll.h>
  53
  54#include "../kselftest_harness.h"
  55#include "../clone3/clone3_selftests.h"
  56
  57/* Attempt to de-conflict with the selftests tree. */
  58#ifndef SKIP
  59#define SKIP(s, ...)    XFAIL(s, ##__VA_ARGS__)
  60#endif
  61
  62#ifndef PR_SET_PTRACER
  63# define PR_SET_PTRACER 0x59616d61
  64#endif
  65
  66#ifndef PR_SET_NO_NEW_PRIVS
  67#define PR_SET_NO_NEW_PRIVS 38
  68#define PR_GET_NO_NEW_PRIVS 39
  69#endif
  70
  71#ifndef PR_SECCOMP_EXT
  72#define PR_SECCOMP_EXT 43
  73#endif
  74
  75#ifndef SECCOMP_EXT_ACT
  76#define SECCOMP_EXT_ACT 1
  77#endif
  78
  79#ifndef SECCOMP_EXT_ACT_TSYNC
  80#define SECCOMP_EXT_ACT_TSYNC 1
  81#endif
  82
  83#ifndef SECCOMP_MODE_STRICT
  84#define SECCOMP_MODE_STRICT 1
  85#endif
  86
  87#ifndef SECCOMP_MODE_FILTER
  88#define SECCOMP_MODE_FILTER 2
  89#endif
  90
  91#ifndef SECCOMP_RET_ALLOW
  92struct seccomp_data {
  93        int nr;
  94        __u32 arch;
  95        __u64 instruction_pointer;
  96        __u64 args[6];
  97};
  98#endif
  99
 100#ifndef SECCOMP_RET_KILL_PROCESS
 101#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
 102#define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
 103#endif
 104#ifndef SECCOMP_RET_KILL
 105#define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
 106#define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
 107#define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
 108#define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
 109#define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
 110#endif
 111#ifndef SECCOMP_RET_LOG
 112#define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
 113#endif
 114
 115#ifndef __NR_seccomp
 116# if defined(__i386__)
 117#  define __NR_seccomp 354
 118# elif defined(__x86_64__)
 119#  define __NR_seccomp 317
 120# elif defined(__arm__)
 121#  define __NR_seccomp 383
 122# elif defined(__aarch64__)
 123#  define __NR_seccomp 277
 124# elif defined(__riscv)
 125#  define __NR_seccomp 277
 126# elif defined(__csky__)
 127#  define __NR_seccomp 277
 128# elif defined(__hppa__)
 129#  define __NR_seccomp 338
 130# elif defined(__powerpc__)
 131#  define __NR_seccomp 358
 132# elif defined(__s390__)
 133#  define __NR_seccomp 348
 134# elif defined(__xtensa__)
 135#  define __NR_seccomp 337
 136# elif defined(__sh__)
 137#  define __NR_seccomp 372
 138# else
 139#  warning "seccomp syscall number unknown for this architecture"
 140#  define __NR_seccomp 0xffff
 141# endif
 142#endif
 143
 144#ifndef SECCOMP_SET_MODE_STRICT
 145#define SECCOMP_SET_MODE_STRICT 0
 146#endif
 147
 148#ifndef SECCOMP_SET_MODE_FILTER
 149#define SECCOMP_SET_MODE_FILTER 1
 150#endif
 151
 152#ifndef SECCOMP_GET_ACTION_AVAIL
 153#define SECCOMP_GET_ACTION_AVAIL 2
 154#endif
 155
 156#ifndef SECCOMP_GET_NOTIF_SIZES
 157#define SECCOMP_GET_NOTIF_SIZES 3
 158#endif
 159
 160#ifndef SECCOMP_FILTER_FLAG_TSYNC
 161#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
 162#endif
 163
 164#ifndef SECCOMP_FILTER_FLAG_LOG
 165#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
 166#endif
 167
 168#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
 169#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
 170#endif
 171
 172#ifndef PTRACE_SECCOMP_GET_METADATA
 173#define PTRACE_SECCOMP_GET_METADATA     0x420d
 174
 175struct seccomp_metadata {
 176        __u64 filter_off;       /* Input: which filter */
 177        __u64 flags;             /* Output: filter's flags */
 178};
 179#endif
 180
 181#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
 182#define SECCOMP_FILTER_FLAG_NEW_LISTENER        (1UL << 3)
 183#endif
 184
 185#ifndef SECCOMP_RET_USER_NOTIF
 186#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
 187
 188#define SECCOMP_IOC_MAGIC               '!'
 189#define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
 190#define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
 191#define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
 192#define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
 193
 194/* Flags for seccomp notification fd ioctl. */
 195#define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
 196#define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
 197                                                struct seccomp_notif_resp)
 198#define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOW(2, __u64)
 199
 200struct seccomp_notif {
 201        __u64 id;
 202        __u32 pid;
 203        __u32 flags;
 204        struct seccomp_data data;
 205};
 206
 207struct seccomp_notif_resp {
 208        __u64 id;
 209        __s64 val;
 210        __s32 error;
 211        __u32 flags;
 212};
 213
 214struct seccomp_notif_sizes {
 215        __u16 seccomp_notif;
 216        __u16 seccomp_notif_resp;
 217        __u16 seccomp_data;
 218};
 219#endif
 220
 221#ifndef SECCOMP_IOCTL_NOTIF_ADDFD
 222/* On success, the return value is the remote process's added fd number */
 223#define SECCOMP_IOCTL_NOTIF_ADDFD       SECCOMP_IOW(3,  \
 224                                                struct seccomp_notif_addfd)
 225
 226/* valid flags for seccomp_notif_addfd */
 227#define SECCOMP_ADDFD_FLAG_SETFD        (1UL << 0) /* Specify remote fd */
 228
 229struct seccomp_notif_addfd {
 230        __u64 id;
 231        __u32 flags;
 232        __u32 srcfd;
 233        __u32 newfd;
 234        __u32 newfd_flags;
 235};
 236#endif
 237
 238struct seccomp_notif_addfd_small {
 239        __u64 id;
 240        char weird[4];
 241};
 242#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
 243        SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
 244
 245struct seccomp_notif_addfd_big {
 246        union {
 247                struct seccomp_notif_addfd addfd;
 248                char buf[sizeof(struct seccomp_notif_addfd) + 8];
 249        };
 250};
 251#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG   \
 252        SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
 253
 254#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
 255#define PTRACE_EVENTMSG_SYSCALL_ENTRY   1
 256#define PTRACE_EVENTMSG_SYSCALL_EXIT    2
 257#endif
 258
 259#ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
 260#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
 261#endif
 262
 263#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
 264#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
 265#endif
 266
 267#ifndef seccomp
 268int seccomp(unsigned int op, unsigned int flags, void *args)
 269{
 270        errno = 0;
 271        return syscall(__NR_seccomp, op, flags, args);
 272}
 273#endif
 274
 275#if __BYTE_ORDER == __LITTLE_ENDIAN
 276#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
 277#elif __BYTE_ORDER == __BIG_ENDIAN
 278#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
 279#else
 280#error "wut? Unknown __BYTE_ORDER?!"
 281#endif
 282
 283#define SIBLING_EXIT_UNKILLED   0xbadbeef
 284#define SIBLING_EXIT_FAILURE    0xbadface
 285#define SIBLING_EXIT_NEWPRIVS   0xbadfeed
 286
 287static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
 288{
 289#ifdef __NR_kcmp
 290        errno = 0;
 291        return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
 292#else
 293        errno = ENOSYS;
 294        return -1;
 295#endif
 296}
 297
 298/* Have TH_LOG report actual location filecmp() is used. */
 299#define filecmp(pid1, pid2, fd1, fd2)   ({              \
 300        int _ret;                                       \
 301                                                        \
 302        _ret = __filecmp(pid1, pid2, fd1, fd2);         \
 303        if (_ret != 0) {                                \
 304                if (_ret < 0 && errno == ENOSYS) {      \
 305                        TH_LOG("kcmp() syscall missing (test is less accurate)");\
 306                        _ret = 0;                       \
 307                }                                       \
 308        }                                               \
 309        _ret; })
 310
 311TEST(kcmp)
 312{
 313        int ret;
 314
 315        ret = __filecmp(getpid(), getpid(), 1, 1);
 316        EXPECT_EQ(ret, 0);
 317        if (ret != 0 && errno == ENOSYS)
 318                SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
 319}
 320
 321TEST(mode_strict_support)
 322{
 323        long ret;
 324
 325        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 326        ASSERT_EQ(0, ret) {
 327                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 328        }
 329        syscall(__NR_exit, 0);
 330}
 331
 332TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
 333{
 334        long ret;
 335
 336        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 337        ASSERT_EQ(0, ret) {
 338                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 339        }
 340        syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
 341                NULL, NULL, NULL);
 342        EXPECT_FALSE(true) {
 343                TH_LOG("Unreachable!");
 344        }
 345}
 346
 347/* Note! This doesn't test no new privs behavior */
 348TEST(no_new_privs_support)
 349{
 350        long ret;
 351
 352        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 353        EXPECT_EQ(0, ret) {
 354                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 355        }
 356}
 357
 358/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 359TEST(mode_filter_support)
 360{
 361        long ret;
 362
 363        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
 364        ASSERT_EQ(0, ret) {
 365                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 366        }
 367        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
 368        EXPECT_EQ(-1, ret);
 369        EXPECT_EQ(EFAULT, errno) {
 370                TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
 371        }
 372}
 373
 374TEST(mode_filter_without_nnp)
 375{
 376        struct sock_filter filter[] = {
 377                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 378        };
 379        struct sock_fprog prog = {
 380                .len = (unsigned short)ARRAY_SIZE(filter),
 381                .filter = filter,
 382        };
 383        long ret;
 384
 385        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
 386        ASSERT_LE(0, ret) {
 387                TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
 388        }
 389        errno = 0;
 390        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 391        /* Succeeds with CAP_SYS_ADMIN, fails without */
 392        /* TODO(wad) check caps not euid */
 393        if (geteuid()) {
 394                EXPECT_EQ(-1, ret);
 395                EXPECT_EQ(EACCES, errno);
 396        } else {
 397                EXPECT_EQ(0, ret);
 398        }
 399}
 400
 401#define MAX_INSNS_PER_PATH 32768
 402
 403TEST(filter_size_limits)
 404{
 405        int i;
 406        int count = BPF_MAXINSNS + 1;
 407        struct sock_filter allow[] = {
 408                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 409        };
 410        struct sock_filter *filter;
 411        struct sock_fprog prog = { };
 412        long ret;
 413
 414        filter = calloc(count, sizeof(*filter));
 415        ASSERT_NE(NULL, filter);
 416
 417        for (i = 0; i < count; i++)
 418                filter[i] = allow[0];
 419
 420        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 421        ASSERT_EQ(0, ret);
 422
 423        prog.filter = filter;
 424        prog.len = count;
 425
 426        /* Too many filter instructions in a single filter. */
 427        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 428        ASSERT_NE(0, ret) {
 429                TH_LOG("Installing %d insn filter was allowed", prog.len);
 430        }
 431
 432        /* One less is okay, though. */
 433        prog.len -= 1;
 434        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 435        ASSERT_EQ(0, ret) {
 436                TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
 437        }
 438}
 439
 440TEST(filter_chain_limits)
 441{
 442        int i;
 443        int count = BPF_MAXINSNS;
 444        struct sock_filter allow[] = {
 445                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 446        };
 447        struct sock_filter *filter;
 448        struct sock_fprog prog = { };
 449        long ret;
 450
 451        filter = calloc(count, sizeof(*filter));
 452        ASSERT_NE(NULL, filter);
 453
 454        for (i = 0; i < count; i++)
 455                filter[i] = allow[0];
 456
 457        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 458        ASSERT_EQ(0, ret);
 459
 460        prog.filter = filter;
 461        prog.len = 1;
 462
 463        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 464        ASSERT_EQ(0, ret);
 465
 466        prog.len = count;
 467
 468        /* Too many total filter instructions. */
 469        for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
 470                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 471                if (ret != 0)
 472                        break;
 473        }
 474        ASSERT_NE(0, ret) {
 475                TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
 476                       i, count, i * (count + 4));
 477        }
 478}
 479
 480TEST(mode_filter_cannot_move_to_strict)
 481{
 482        struct sock_filter filter[] = {
 483                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 484        };
 485        struct sock_fprog prog = {
 486                .len = (unsigned short)ARRAY_SIZE(filter),
 487                .filter = filter,
 488        };
 489        long ret;
 490
 491        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 492        ASSERT_EQ(0, ret);
 493
 494        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 495        ASSERT_EQ(0, ret);
 496
 497        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
 498        EXPECT_EQ(-1, ret);
 499        EXPECT_EQ(EINVAL, errno);
 500}
 501
 502
 503TEST(mode_filter_get_seccomp)
 504{
 505        struct sock_filter filter[] = {
 506                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 507        };
 508        struct sock_fprog prog = {
 509                .len = (unsigned short)ARRAY_SIZE(filter),
 510                .filter = filter,
 511        };
 512        long ret;
 513
 514        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 515        ASSERT_EQ(0, ret);
 516
 517        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 518        EXPECT_EQ(0, ret);
 519
 520        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 521        ASSERT_EQ(0, ret);
 522
 523        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 524        EXPECT_EQ(2, ret);
 525}
 526
 527
 528TEST(ALLOW_all)
 529{
 530        struct sock_filter filter[] = {
 531                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 532        };
 533        struct sock_fprog prog = {
 534                .len = (unsigned short)ARRAY_SIZE(filter),
 535                .filter = filter,
 536        };
 537        long ret;
 538
 539        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 540        ASSERT_EQ(0, ret);
 541
 542        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 543        ASSERT_EQ(0, ret);
 544}
 545
 546TEST(empty_prog)
 547{
 548        struct sock_filter filter[] = {
 549        };
 550        struct sock_fprog prog = {
 551                .len = (unsigned short)ARRAY_SIZE(filter),
 552                .filter = filter,
 553        };
 554        long ret;
 555
 556        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 557        ASSERT_EQ(0, ret);
 558
 559        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 560        EXPECT_EQ(-1, ret);
 561        EXPECT_EQ(EINVAL, errno);
 562}
 563
 564TEST(log_all)
 565{
 566        struct sock_filter filter[] = {
 567                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
 568        };
 569        struct sock_fprog prog = {
 570                .len = (unsigned short)ARRAY_SIZE(filter),
 571                .filter = filter,
 572        };
 573        long ret;
 574        pid_t parent = getppid();
 575
 576        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 577        ASSERT_EQ(0, ret);
 578
 579        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 580        ASSERT_EQ(0, ret);
 581
 582        /* getppid() should succeed and be logged (no check for logging) */
 583        EXPECT_EQ(parent, syscall(__NR_getppid));
 584}
 585
 586TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 587{
 588        struct sock_filter filter[] = {
 589                BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
 590        };
 591        struct sock_fprog prog = {
 592                .len = (unsigned short)ARRAY_SIZE(filter),
 593                .filter = filter,
 594        };
 595        long ret;
 596
 597        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 598        ASSERT_EQ(0, ret);
 599
 600        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 601        ASSERT_EQ(0, ret);
 602        EXPECT_EQ(0, syscall(__NR_getpid)) {
 603                TH_LOG("getpid() shouldn't ever return");
 604        }
 605}
 606
 607/* return code >= 0x80000000 is unused. */
 608TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
 609{
 610        struct sock_filter filter[] = {
 611                BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
 612        };
 613        struct sock_fprog prog = {
 614                .len = (unsigned short)ARRAY_SIZE(filter),
 615                .filter = filter,
 616        };
 617        long ret;
 618
 619        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 620        ASSERT_EQ(0, ret);
 621
 622        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 623        ASSERT_EQ(0, ret);
 624        EXPECT_EQ(0, syscall(__NR_getpid)) {
 625                TH_LOG("getpid() shouldn't ever return");
 626        }
 627}
 628
 629TEST_SIGNAL(KILL_all, SIGSYS)
 630{
 631        struct sock_filter filter[] = {
 632                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 633        };
 634        struct sock_fprog prog = {
 635                .len = (unsigned short)ARRAY_SIZE(filter),
 636                .filter = filter,
 637        };
 638        long ret;
 639
 640        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 641        ASSERT_EQ(0, ret);
 642
 643        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 644        ASSERT_EQ(0, ret);
 645}
 646
 647TEST_SIGNAL(KILL_one, SIGSYS)
 648{
 649        struct sock_filter filter[] = {
 650                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 651                        offsetof(struct seccomp_data, nr)),
 652                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 653                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 654                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 655        };
 656        struct sock_fprog prog = {
 657                .len = (unsigned short)ARRAY_SIZE(filter),
 658                .filter = filter,
 659        };
 660        long ret;
 661        pid_t parent = getppid();
 662
 663        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 664        ASSERT_EQ(0, ret);
 665
 666        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 667        ASSERT_EQ(0, ret);
 668
 669        EXPECT_EQ(parent, syscall(__NR_getppid));
 670        /* getpid() should never return. */
 671        EXPECT_EQ(0, syscall(__NR_getpid));
 672}
 673
 674TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
 675{
 676        void *fatal_address;
 677        struct sock_filter filter[] = {
 678                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 679                        offsetof(struct seccomp_data, nr)),
 680                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
 681                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 682                /* Only both with lower 32-bit for now. */
 683                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
 684                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
 685                        (unsigned long)&fatal_address, 0, 1),
 686                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 687                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 688        };
 689        struct sock_fprog prog = {
 690                .len = (unsigned short)ARRAY_SIZE(filter),
 691                .filter = filter,
 692        };
 693        long ret;
 694        pid_t parent = getppid();
 695        struct tms timebuf;
 696        clock_t clock = times(&timebuf);
 697
 698        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 699        ASSERT_EQ(0, ret);
 700
 701        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 702        ASSERT_EQ(0, ret);
 703
 704        EXPECT_EQ(parent, syscall(__NR_getppid));
 705        EXPECT_LE(clock, syscall(__NR_times, &timebuf));
 706        /* times() should never return. */
 707        EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
 708}
 709
 710TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 711{
 712#ifndef __NR_mmap2
 713        int sysno = __NR_mmap;
 714#else
 715        int sysno = __NR_mmap2;
 716#endif
 717        struct sock_filter filter[] = {
 718                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 719                        offsetof(struct seccomp_data, nr)),
 720                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
 721                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 722                /* Only both with lower 32-bit for now. */
 723                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
 724                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
 725                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 726                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 727        };
 728        struct sock_fprog prog = {
 729                .len = (unsigned short)ARRAY_SIZE(filter),
 730                .filter = filter,
 731        };
 732        long ret;
 733        pid_t parent = getppid();
 734        int fd;
 735        void *map1, *map2;
 736        int page_size = sysconf(_SC_PAGESIZE);
 737
 738        ASSERT_LT(0, page_size);
 739
 740        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 741        ASSERT_EQ(0, ret);
 742
 743        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 744        ASSERT_EQ(0, ret);
 745
 746        fd = open("/dev/zero", O_RDONLY);
 747        ASSERT_NE(-1, fd);
 748
 749        EXPECT_EQ(parent, syscall(__NR_getppid));
 750        map1 = (void *)syscall(sysno,
 751                NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 752        EXPECT_NE(MAP_FAILED, map1);
 753        /* mmap2() should never return. */
 754        map2 = (void *)syscall(sysno,
 755                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 756        EXPECT_EQ(MAP_FAILED, map2);
 757
 758        /* The test failed, so clean up the resources. */
 759        munmap(map1, page_size);
 760        munmap(map2, page_size);
 761        close(fd);
 762}
 763
 764/* This is a thread task to die via seccomp filter violation. */
 765void *kill_thread(void *data)
 766{
 767        bool die = (bool)data;
 768
 769        if (die) {
 770                prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 771                return (void *)SIBLING_EXIT_FAILURE;
 772        }
 773
 774        return (void *)SIBLING_EXIT_UNKILLED;
 775}
 776
 777/* Prepare a thread that will kill itself or both of us. */
 778void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 779{
 780        pthread_t thread;
 781        void *status;
 782        /* Kill only when calling __NR_prctl. */
 783        struct sock_filter filter_thread[] = {
 784                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 785                        offsetof(struct seccomp_data, nr)),
 786                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 787                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
 788                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 789        };
 790        struct sock_fprog prog_thread = {
 791                .len = (unsigned short)ARRAY_SIZE(filter_thread),
 792                .filter = filter_thread,
 793        };
 794        struct sock_filter filter_process[] = {
 795                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 796                        offsetof(struct seccomp_data, nr)),
 797                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 798                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
 799                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 800        };
 801        struct sock_fprog prog_process = {
 802                .len = (unsigned short)ARRAY_SIZE(filter_process),
 803                .filter = filter_process,
 804        };
 805
 806        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
 807                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 808        }
 809
 810        ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
 811                             kill_process ? &prog_process : &prog_thread));
 812
 813        /*
 814         * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
 815         * flag cannot be downgraded by a new filter.
 816         */
 817        ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
 818
 819        /* Start a thread that will exit immediately. */
 820        ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
 821        ASSERT_EQ(0, pthread_join(thread, &status));
 822        ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
 823
 824        /* Start a thread that will die immediately. */
 825        ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
 826        ASSERT_EQ(0, pthread_join(thread, &status));
 827        ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
 828
 829        /*
 830         * If we get here, only the spawned thread died. Let the parent know
 831         * the whole process didn't die (i.e. this thread, the spawner,
 832         * stayed running).
 833         */
 834        exit(42);
 835}
 836
 837TEST(KILL_thread)
 838{
 839        int status;
 840        pid_t child_pid;
 841
 842        child_pid = fork();
 843        ASSERT_LE(0, child_pid);
 844        if (child_pid == 0) {
 845                kill_thread_or_group(_metadata, false);
 846                _exit(38);
 847        }
 848
 849        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 850
 851        /* If only the thread was killed, we'll see exit 42. */
 852        ASSERT_TRUE(WIFEXITED(status));
 853        ASSERT_EQ(42, WEXITSTATUS(status));
 854}
 855
 856TEST(KILL_process)
 857{
 858        int status;
 859        pid_t child_pid;
 860
 861        child_pid = fork();
 862        ASSERT_LE(0, child_pid);
 863        if (child_pid == 0) {
 864                kill_thread_or_group(_metadata, true);
 865                _exit(38);
 866        }
 867
 868        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 869
 870        /* If the entire process was killed, we'll see SIGSYS. */
 871        ASSERT_TRUE(WIFSIGNALED(status));
 872        ASSERT_EQ(SIGSYS, WTERMSIG(status));
 873}
 874
 875/* TODO(wad) add 64-bit versus 32-bit arg tests. */
 876TEST(arg_out_of_range)
 877{
 878        struct sock_filter filter[] = {
 879                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
 880                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 881        };
 882        struct sock_fprog prog = {
 883                .len = (unsigned short)ARRAY_SIZE(filter),
 884                .filter = filter,
 885        };
 886        long ret;
 887
 888        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 889        ASSERT_EQ(0, ret);
 890
 891        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 892        EXPECT_EQ(-1, ret);
 893        EXPECT_EQ(EINVAL, errno);
 894}
 895
 896#define ERRNO_FILTER(name, errno)                                       \
 897        struct sock_filter _read_filter_##name[] = {                    \
 898                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
 899                        offsetof(struct seccomp_data, nr)),             \
 900                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
 901                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
 902                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
 903        };                                                              \
 904        struct sock_fprog prog_##name = {                               \
 905                .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
 906                .filter = _read_filter_##name,                          \
 907        }
 908
 909/* Make sure basic errno values are correctly passed through a filter. */
 910TEST(ERRNO_valid)
 911{
 912        ERRNO_FILTER(valid, E2BIG);
 913        long ret;
 914        pid_t parent = getppid();
 915
 916        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 917        ASSERT_EQ(0, ret);
 918
 919        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 920        ASSERT_EQ(0, ret);
 921
 922        EXPECT_EQ(parent, syscall(__NR_getppid));
 923        EXPECT_EQ(-1, read(0, NULL, 0));
 924        EXPECT_EQ(E2BIG, errno);
 925}
 926
 927/* Make sure an errno of zero is correctly handled by the arch code. */
 928TEST(ERRNO_zero)
 929{
 930        ERRNO_FILTER(zero, 0);
 931        long ret;
 932        pid_t parent = getppid();
 933
 934        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 935        ASSERT_EQ(0, ret);
 936
 937        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 938        ASSERT_EQ(0, ret);
 939
 940        EXPECT_EQ(parent, syscall(__NR_getppid));
 941        /* "errno" of 0 is ok. */
 942        EXPECT_EQ(0, read(0, NULL, 0));
 943}
 944
 945/*
 946 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
 947 * This tests that the errno value gets capped correctly, fixed by
 948 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
 949 */
 950TEST(ERRNO_capped)
 951{
 952        ERRNO_FILTER(capped, 4096);
 953        long ret;
 954        pid_t parent = getppid();
 955
 956        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 957        ASSERT_EQ(0, ret);
 958
 959        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 960        ASSERT_EQ(0, ret);
 961
 962        EXPECT_EQ(parent, syscall(__NR_getppid));
 963        EXPECT_EQ(-1, read(0, NULL, 0));
 964        EXPECT_EQ(4095, errno);
 965}
 966
 967/*
 968 * Filters are processed in reverse order: last applied is executed first.
 969 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
 970 * SECCOMP_RET_DATA mask results will follow the most recently applied
 971 * matching filter return (and not the lowest or highest value).
 972 */
 973TEST(ERRNO_order)
 974{
 975        ERRNO_FILTER(first,  11);
 976        ERRNO_FILTER(second, 13);
 977        ERRNO_FILTER(third,  12);
 978        long ret;
 979        pid_t parent = getppid();
 980
 981        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 982        ASSERT_EQ(0, ret);
 983
 984        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
 985        ASSERT_EQ(0, ret);
 986
 987        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
 988        ASSERT_EQ(0, ret);
 989
 990        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
 991        ASSERT_EQ(0, ret);
 992
 993        EXPECT_EQ(parent, syscall(__NR_getppid));
 994        EXPECT_EQ(-1, read(0, NULL, 0));
 995        EXPECT_EQ(12, errno);
 996}
 997
 998FIXTURE(TRAP) {
 999        struct sock_fprog prog;
1000};
1001
1002FIXTURE_SETUP(TRAP)
1003{
1004        struct sock_filter filter[] = {
1005                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1006                        offsetof(struct seccomp_data, nr)),
1007                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1008                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1009                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1010        };
1011
1012        memset(&self->prog, 0, sizeof(self->prog));
1013        self->prog.filter = malloc(sizeof(filter));
1014        ASSERT_NE(NULL, self->prog.filter);
1015        memcpy(self->prog.filter, filter, sizeof(filter));
1016        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1017}
1018
1019FIXTURE_TEARDOWN(TRAP)
1020{
1021        if (self->prog.filter)
1022                free(self->prog.filter);
1023}
1024
1025TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
1026{
1027        long ret;
1028
1029        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1030        ASSERT_EQ(0, ret);
1031
1032        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1033        ASSERT_EQ(0, ret);
1034        syscall(__NR_getpid);
1035}
1036
1037/* Ensure that SIGSYS overrides SIG_IGN */
1038TEST_F_SIGNAL(TRAP, ign, SIGSYS)
1039{
1040        long ret;
1041
1042        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1043        ASSERT_EQ(0, ret);
1044
1045        signal(SIGSYS, SIG_IGN);
1046
1047        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1048        ASSERT_EQ(0, ret);
1049        syscall(__NR_getpid);
1050}
1051
1052static siginfo_t TRAP_info;
1053static volatile int TRAP_nr;
1054static void TRAP_action(int nr, siginfo_t *info, void *void_context)
1055{
1056        memcpy(&TRAP_info, info, sizeof(TRAP_info));
1057        TRAP_nr = nr;
1058}
1059
1060TEST_F(TRAP, handler)
1061{
1062        int ret, test;
1063        struct sigaction act;
1064        sigset_t mask;
1065
1066        memset(&act, 0, sizeof(act));
1067        sigemptyset(&mask);
1068        sigaddset(&mask, SIGSYS);
1069
1070        act.sa_sigaction = &TRAP_action;
1071        act.sa_flags = SA_SIGINFO;
1072        ret = sigaction(SIGSYS, &act, NULL);
1073        ASSERT_EQ(0, ret) {
1074                TH_LOG("sigaction failed");
1075        }
1076        ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
1077        ASSERT_EQ(0, ret) {
1078                TH_LOG("sigprocmask failed");
1079        }
1080
1081        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1082        ASSERT_EQ(0, ret);
1083        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1084        ASSERT_EQ(0, ret);
1085        TRAP_nr = 0;
1086        memset(&TRAP_info, 0, sizeof(TRAP_info));
1087        /* Expect the registers to be rolled back. (nr = error) may vary
1088         * based on arch. */
1089        ret = syscall(__NR_getpid);
1090        /* Silence gcc warning about volatile. */
1091        test = TRAP_nr;
1092        EXPECT_EQ(SIGSYS, test);
1093        struct local_sigsys {
1094                void *_call_addr;       /* calling user insn */
1095                int _syscall;           /* triggering system call number */
1096                unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
1097        } *sigsys = (struct local_sigsys *)
1098#ifdef si_syscall
1099                &(TRAP_info.si_call_addr);
1100#else
1101                &TRAP_info.si_pid;
1102#endif
1103        EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1104        /* Make sure arch is non-zero. */
1105        EXPECT_NE(0, sigsys->_arch);
1106        EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1107}
1108
1109FIXTURE(precedence) {
1110        struct sock_fprog allow;
1111        struct sock_fprog log;
1112        struct sock_fprog trace;
1113        struct sock_fprog error;
1114        struct sock_fprog trap;
1115        struct sock_fprog kill;
1116};
1117
1118FIXTURE_SETUP(precedence)
1119{
1120        struct sock_filter allow_insns[] = {
1121                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1122        };
1123        struct sock_filter log_insns[] = {
1124                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1125                        offsetof(struct seccomp_data, nr)),
1126                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1127                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1128                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1129        };
1130        struct sock_filter trace_insns[] = {
1131                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1132                        offsetof(struct seccomp_data, nr)),
1133                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1134                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1135                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1136        };
1137        struct sock_filter error_insns[] = {
1138                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1139                        offsetof(struct seccomp_data, nr)),
1140                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1141                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1142                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1143        };
1144        struct sock_filter trap_insns[] = {
1145                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1146                        offsetof(struct seccomp_data, nr)),
1147                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1148                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1149                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1150        };
1151        struct sock_filter kill_insns[] = {
1152                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1153                        offsetof(struct seccomp_data, nr)),
1154                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1155                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1156                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1157        };
1158
1159        memset(self, 0, sizeof(*self));
1160#define FILTER_ALLOC(_x) \
1161        self->_x.filter = malloc(sizeof(_x##_insns)); \
1162        ASSERT_NE(NULL, self->_x.filter); \
1163        memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1164        self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1165        FILTER_ALLOC(allow);
1166        FILTER_ALLOC(log);
1167        FILTER_ALLOC(trace);
1168        FILTER_ALLOC(error);
1169        FILTER_ALLOC(trap);
1170        FILTER_ALLOC(kill);
1171}
1172
1173FIXTURE_TEARDOWN(precedence)
1174{
1175#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1176        FILTER_FREE(allow);
1177        FILTER_FREE(log);
1178        FILTER_FREE(trace);
1179        FILTER_FREE(error);
1180        FILTER_FREE(trap);
1181        FILTER_FREE(kill);
1182}
1183
1184TEST_F(precedence, allow_ok)
1185{
1186        pid_t parent, res = 0;
1187        long ret;
1188
1189        parent = getppid();
1190        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1191        ASSERT_EQ(0, ret);
1192
1193        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1194        ASSERT_EQ(0, ret);
1195        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1196        ASSERT_EQ(0, ret);
1197        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1198        ASSERT_EQ(0, ret);
1199        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1200        ASSERT_EQ(0, ret);
1201        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1202        ASSERT_EQ(0, ret);
1203        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1204        ASSERT_EQ(0, ret);
1205        /* Should work just fine. */
1206        res = syscall(__NR_getppid);
1207        EXPECT_EQ(parent, res);
1208}
1209
1210TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1211{
1212        pid_t parent, res = 0;
1213        long ret;
1214
1215        parent = getppid();
1216        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1217        ASSERT_EQ(0, ret);
1218
1219        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1220        ASSERT_EQ(0, ret);
1221        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1222        ASSERT_EQ(0, ret);
1223        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1224        ASSERT_EQ(0, ret);
1225        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1226        ASSERT_EQ(0, ret);
1227        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1228        ASSERT_EQ(0, ret);
1229        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1230        ASSERT_EQ(0, ret);
1231        /* Should work just fine. */
1232        res = syscall(__NR_getppid);
1233        EXPECT_EQ(parent, res);
1234        /* getpid() should never return. */
1235        res = syscall(__NR_getpid);
1236        EXPECT_EQ(0, res);
1237}
1238
1239TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1240{
1241        pid_t parent;
1242        long ret;
1243
1244        parent = getppid();
1245        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1246        ASSERT_EQ(0, ret);
1247
1248        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1249        ASSERT_EQ(0, ret);
1250        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1251        ASSERT_EQ(0, ret);
1252        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1253        ASSERT_EQ(0, ret);
1254        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1255        ASSERT_EQ(0, ret);
1256        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1257        ASSERT_EQ(0, ret);
1258        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1259        ASSERT_EQ(0, ret);
1260        /* Should work just fine. */
1261        EXPECT_EQ(parent, syscall(__NR_getppid));
1262        /* getpid() should never return. */
1263        EXPECT_EQ(0, syscall(__NR_getpid));
1264}
1265
1266TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1267{
1268        pid_t parent;
1269        long ret;
1270
1271        parent = getppid();
1272        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1273        ASSERT_EQ(0, ret);
1274
1275        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1276        ASSERT_EQ(0, ret);
1277        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1278        ASSERT_EQ(0, ret);
1279        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1280        ASSERT_EQ(0, ret);
1281        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1282        ASSERT_EQ(0, ret);
1283        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1284        ASSERT_EQ(0, ret);
1285        /* Should work just fine. */
1286        EXPECT_EQ(parent, syscall(__NR_getppid));
1287        /* getpid() should never return. */
1288        EXPECT_EQ(0, syscall(__NR_getpid));
1289}
1290
1291TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1292{
1293        pid_t parent;
1294        long ret;
1295
1296        parent = getppid();
1297        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1298        ASSERT_EQ(0, ret);
1299
1300        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1301        ASSERT_EQ(0, ret);
1302        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1303        ASSERT_EQ(0, ret);
1304        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1305        ASSERT_EQ(0, ret);
1306        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1307        ASSERT_EQ(0, ret);
1308        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1309        ASSERT_EQ(0, ret);
1310        /* Should work just fine. */
1311        EXPECT_EQ(parent, syscall(__NR_getppid));
1312        /* getpid() should never return. */
1313        EXPECT_EQ(0, syscall(__NR_getpid));
1314}
1315
1316TEST_F(precedence, errno_is_third)
1317{
1318        pid_t parent;
1319        long ret;
1320
1321        parent = getppid();
1322        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1323        ASSERT_EQ(0, ret);
1324
1325        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1326        ASSERT_EQ(0, ret);
1327        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1328        ASSERT_EQ(0, ret);
1329        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1330        ASSERT_EQ(0, ret);
1331        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1332        ASSERT_EQ(0, ret);
1333        /* Should work just fine. */
1334        EXPECT_EQ(parent, syscall(__NR_getppid));
1335        EXPECT_EQ(0, syscall(__NR_getpid));
1336}
1337
1338TEST_F(precedence, errno_is_third_in_any_order)
1339{
1340        pid_t parent;
1341        long ret;
1342
1343        parent = getppid();
1344        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1345        ASSERT_EQ(0, ret);
1346
1347        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1348        ASSERT_EQ(0, ret);
1349        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1350        ASSERT_EQ(0, ret);
1351        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1352        ASSERT_EQ(0, ret);
1353        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1354        ASSERT_EQ(0, ret);
1355        /* Should work just fine. */
1356        EXPECT_EQ(parent, syscall(__NR_getppid));
1357        EXPECT_EQ(0, syscall(__NR_getpid));
1358}
1359
1360TEST_F(precedence, trace_is_fourth)
1361{
1362        pid_t parent;
1363        long ret;
1364
1365        parent = getppid();
1366        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1367        ASSERT_EQ(0, ret);
1368
1369        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1370        ASSERT_EQ(0, ret);
1371        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1372        ASSERT_EQ(0, ret);
1373        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1374        ASSERT_EQ(0, ret);
1375        /* Should work just fine. */
1376        EXPECT_EQ(parent, syscall(__NR_getppid));
1377        /* No ptracer */
1378        EXPECT_EQ(-1, syscall(__NR_getpid));
1379}
1380
1381TEST_F(precedence, trace_is_fourth_in_any_order)
1382{
1383        pid_t parent;
1384        long ret;
1385
1386        parent = getppid();
1387        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1388        ASSERT_EQ(0, ret);
1389
1390        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1391        ASSERT_EQ(0, ret);
1392        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1393        ASSERT_EQ(0, ret);
1394        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1395        ASSERT_EQ(0, ret);
1396        /* Should work just fine. */
1397        EXPECT_EQ(parent, syscall(__NR_getppid));
1398        /* No ptracer */
1399        EXPECT_EQ(-1, syscall(__NR_getpid));
1400}
1401
1402TEST_F(precedence, log_is_fifth)
1403{
1404        pid_t mypid, parent;
1405        long ret;
1406
1407        mypid = getpid();
1408        parent = getppid();
1409        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1410        ASSERT_EQ(0, ret);
1411
1412        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1413        ASSERT_EQ(0, ret);
1414        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1415        ASSERT_EQ(0, ret);
1416        /* Should work just fine. */
1417        EXPECT_EQ(parent, syscall(__NR_getppid));
1418        /* Should also work just fine */
1419        EXPECT_EQ(mypid, syscall(__NR_getpid));
1420}
1421
1422TEST_F(precedence, log_is_fifth_in_any_order)
1423{
1424        pid_t mypid, parent;
1425        long ret;
1426
1427        mypid = getpid();
1428        parent = getppid();
1429        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1430        ASSERT_EQ(0, ret);
1431
1432        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1433        ASSERT_EQ(0, ret);
1434        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1435        ASSERT_EQ(0, ret);
1436        /* Should work just fine. */
1437        EXPECT_EQ(parent, syscall(__NR_getppid));
1438        /* Should also work just fine */
1439        EXPECT_EQ(mypid, syscall(__NR_getpid));
1440}
1441
1442#ifndef PTRACE_O_TRACESECCOMP
1443#define PTRACE_O_TRACESECCOMP   0x00000080
1444#endif
1445
1446/* Catch the Ubuntu 12.04 value error. */
1447#if PTRACE_EVENT_SECCOMP != 7
1448#undef PTRACE_EVENT_SECCOMP
1449#endif
1450
1451#ifndef PTRACE_EVENT_SECCOMP
1452#define PTRACE_EVENT_SECCOMP 7
1453#endif
1454
1455#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1456bool tracer_running;
1457void tracer_stop(int sig)
1458{
1459        tracer_running = false;
1460}
1461
1462typedef void tracer_func_t(struct __test_metadata *_metadata,
1463                           pid_t tracee, int status, void *args);
1464
1465void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1466            tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1467{
1468        int ret = -1;
1469        struct sigaction action = {
1470                .sa_handler = tracer_stop,
1471        };
1472
1473        /* Allow external shutdown. */
1474        tracer_running = true;
1475        ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1476
1477        errno = 0;
1478        while (ret == -1 && errno != EINVAL)
1479                ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1480        ASSERT_EQ(0, ret) {
1481                kill(tracee, SIGKILL);
1482        }
1483        /* Wait for attach stop */
1484        wait(NULL);
1485
1486        ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1487                                                      PTRACE_O_TRACESYSGOOD :
1488                                                      PTRACE_O_TRACESECCOMP);
1489        ASSERT_EQ(0, ret) {
1490                TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1491                kill(tracee, SIGKILL);
1492        }
1493        ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1494                     tracee, NULL, 0);
1495        ASSERT_EQ(0, ret);
1496
1497        /* Unblock the tracee */
1498        ASSERT_EQ(1, write(fd, "A", 1));
1499        ASSERT_EQ(0, close(fd));
1500
1501        /* Run until we're shut down. Must assert to stop execution. */
1502        while (tracer_running) {
1503                int status;
1504
1505                if (wait(&status) != tracee)
1506                        continue;
1507                if (WIFSIGNALED(status) || WIFEXITED(status))
1508                        /* Child is dead. Time to go. */
1509                        return;
1510
1511                /* Check if this is a seccomp event. */
1512                ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1513
1514                tracer_func(_metadata, tracee, status, args);
1515
1516                ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1517                             tracee, NULL, 0);
1518                ASSERT_EQ(0, ret);
1519        }
1520        /* Directly report the status of our test harness results. */
1521        syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1522}
1523
1524/* Common tracer setup/teardown functions. */
1525void cont_handler(int num)
1526{ }
1527pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1528                          tracer_func_t func, void *args, bool ptrace_syscall)
1529{
1530        char sync;
1531        int pipefd[2];
1532        pid_t tracer_pid;
1533        pid_t tracee = getpid();
1534
1535        /* Setup a pipe for clean synchronization. */
1536        ASSERT_EQ(0, pipe(pipefd));
1537
1538        /* Fork a child which we'll promote to tracer */
1539        tracer_pid = fork();
1540        ASSERT_LE(0, tracer_pid);
1541        signal(SIGALRM, cont_handler);
1542        if (tracer_pid == 0) {
1543                close(pipefd[0]);
1544                start_tracer(_metadata, pipefd[1], tracee, func, args,
1545                             ptrace_syscall);
1546                syscall(__NR_exit, 0);
1547        }
1548        close(pipefd[1]);
1549        prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1550        read(pipefd[0], &sync, 1);
1551        close(pipefd[0]);
1552
1553        return tracer_pid;
1554}
1555
1556void teardown_trace_fixture(struct __test_metadata *_metadata,
1557                            pid_t tracer)
1558{
1559        if (tracer) {
1560                int status;
1561                /*
1562                 * Extract the exit code from the other process and
1563                 * adopt it for ourselves in case its asserts failed.
1564                 */
1565                ASSERT_EQ(0, kill(tracer, SIGUSR1));
1566                ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1567                if (WEXITSTATUS(status))
1568                        _metadata->passed = 0;
1569        }
1570}
1571
1572/* "poke" tracer arguments and function. */
1573struct tracer_args_poke_t {
1574        unsigned long poke_addr;
1575};
1576
1577void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1578                 void *args)
1579{
1580        int ret;
1581        unsigned long msg;
1582        struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1583
1584        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1585        EXPECT_EQ(0, ret);
1586        /* If this fails, don't try to recover. */
1587        ASSERT_EQ(0x1001, msg) {
1588                kill(tracee, SIGKILL);
1589        }
1590        /*
1591         * Poke in the message.
1592         * Registers are not touched to try to keep this relatively arch
1593         * agnostic.
1594         */
1595        ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1596        EXPECT_EQ(0, ret);
1597}
1598
1599FIXTURE(TRACE_poke) {
1600        struct sock_fprog prog;
1601        pid_t tracer;
1602        long poked;
1603        struct tracer_args_poke_t tracer_args;
1604};
1605
1606FIXTURE_SETUP(TRACE_poke)
1607{
1608        struct sock_filter filter[] = {
1609                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1610                        offsetof(struct seccomp_data, nr)),
1611                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1612                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1613                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1614        };
1615
1616        self->poked = 0;
1617        memset(&self->prog, 0, sizeof(self->prog));
1618        self->prog.filter = malloc(sizeof(filter));
1619        ASSERT_NE(NULL, self->prog.filter);
1620        memcpy(self->prog.filter, filter, sizeof(filter));
1621        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1622
1623        /* Set up tracer args. */
1624        self->tracer_args.poke_addr = (unsigned long)&self->poked;
1625
1626        /* Launch tracer. */
1627        self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1628                                           &self->tracer_args, false);
1629}
1630
1631FIXTURE_TEARDOWN(TRACE_poke)
1632{
1633        teardown_trace_fixture(_metadata, self->tracer);
1634        if (self->prog.filter)
1635                free(self->prog.filter);
1636}
1637
1638TEST_F(TRACE_poke, read_has_side_effects)
1639{
1640        ssize_t ret;
1641
1642        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1643        ASSERT_EQ(0, ret);
1644
1645        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1646        ASSERT_EQ(0, ret);
1647
1648        EXPECT_EQ(0, self->poked);
1649        ret = read(-1, NULL, 0);
1650        EXPECT_EQ(-1, ret);
1651        EXPECT_EQ(0x1001, self->poked);
1652}
1653
1654TEST_F(TRACE_poke, getpid_runs_normally)
1655{
1656        long ret;
1657
1658        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1659        ASSERT_EQ(0, ret);
1660
1661        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1662        ASSERT_EQ(0, ret);
1663
1664        EXPECT_EQ(0, self->poked);
1665        EXPECT_NE(0, syscall(__NR_getpid));
1666        EXPECT_EQ(0, self->poked);
1667}
1668
1669#if defined(__x86_64__)
1670# define ARCH_REGS      struct user_regs_struct
1671# define SYSCALL_NUM    orig_rax
1672# define SYSCALL_RET    rax
1673#elif defined(__i386__)
1674# define ARCH_REGS      struct user_regs_struct
1675# define SYSCALL_NUM    orig_eax
1676# define SYSCALL_RET    eax
1677#elif defined(__arm__)
1678# define ARCH_REGS      struct pt_regs
1679# define SYSCALL_NUM    ARM_r7
1680# define SYSCALL_RET    ARM_r0
1681#elif defined(__aarch64__)
1682# define ARCH_REGS      struct user_pt_regs
1683# define SYSCALL_NUM    regs[8]
1684# define SYSCALL_RET    regs[0]
1685#elif defined(__riscv) && __riscv_xlen == 64
1686# define ARCH_REGS      struct user_regs_struct
1687# define SYSCALL_NUM    a7
1688# define SYSCALL_RET    a0
1689#elif defined(__csky__)
1690# define ARCH_REGS      struct pt_regs
1691#if defined(__CSKYABIV2__)
1692# define SYSCALL_NUM    regs[3]
1693#else
1694# define SYSCALL_NUM    regs[9]
1695#endif
1696# define SYSCALL_RET    a0
1697#elif defined(__hppa__)
1698# define ARCH_REGS      struct user_regs_struct
1699# define SYSCALL_NUM    gr[20]
1700# define SYSCALL_RET    gr[28]
1701#elif defined(__powerpc__)
1702# define ARCH_REGS      struct pt_regs
1703# define SYSCALL_NUM    gpr[0]
1704# define SYSCALL_RET    gpr[3]
1705#elif defined(__s390__)
1706# define ARCH_REGS     s390_regs
1707# define SYSCALL_NUM   gprs[2]
1708# define SYSCALL_RET   gprs[2]
1709# define SYSCALL_NUM_RET_SHARE_REG
1710#elif defined(__mips__)
1711# define ARCH_REGS      struct pt_regs
1712# define SYSCALL_NUM    regs[2]
1713# define SYSCALL_SYSCALL_NUM regs[4]
1714# define SYSCALL_RET    regs[2]
1715# define SYSCALL_NUM_RET_SHARE_REG
1716#elif defined(__xtensa__)
1717# define ARCH_REGS      struct user_pt_regs
1718# define SYSCALL_NUM    syscall
1719/*
1720 * On xtensa syscall return value is in the register
1721 * a2 of the current window which is not fixed.
1722 */
1723#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
1724#elif defined(__sh__)
1725# define ARCH_REGS      struct pt_regs
1726# define SYSCALL_NUM    gpr[3]
1727# define SYSCALL_RET    gpr[0]
1728#else
1729# error "Do not know how to find your architecture's registers and syscalls"
1730#endif
1731
1732/* When the syscall return can't be changed, stub out the tests for it. */
1733#ifdef SYSCALL_NUM_RET_SHARE_REG
1734# define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1735#else
1736# define EXPECT_SYSCALL_RETURN(val, action)             \
1737        do {                                            \
1738                errno = 0;                              \
1739                if (val < 0) {                          \
1740                        EXPECT_EQ(-1, action);          \
1741                        EXPECT_EQ(-(val), errno);       \
1742                } else {                                \
1743                        EXPECT_EQ(val, action);         \
1744                }                                       \
1745        } while (0)
1746#endif
1747
1748/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1749 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1750 */
1751#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1752#define HAVE_GETREGS
1753#endif
1754
1755/* Architecture-specific syscall fetching routine. */
1756int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1757{
1758        ARCH_REGS regs;
1759#ifdef HAVE_GETREGS
1760        EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1761                TH_LOG("PTRACE_GETREGS failed");
1762                return -1;
1763        }
1764#else
1765        struct iovec iov;
1766
1767        iov.iov_base = &regs;
1768        iov.iov_len = sizeof(regs);
1769        EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1770                TH_LOG("PTRACE_GETREGSET failed");
1771                return -1;
1772        }
1773#endif
1774
1775#if defined(__mips__)
1776        if (regs.SYSCALL_NUM == __NR_O32_Linux)
1777                return regs.SYSCALL_SYSCALL_NUM;
1778#endif
1779        return regs.SYSCALL_NUM;
1780}
1781
1782/* Architecture-specific syscall changing routine. */
1783void change_syscall(struct __test_metadata *_metadata,
1784                    pid_t tracee, int syscall, int result)
1785{
1786        int ret;
1787        ARCH_REGS regs;
1788#ifdef HAVE_GETREGS
1789        ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1790#else
1791        struct iovec iov;
1792        iov.iov_base = &regs;
1793        iov.iov_len = sizeof(regs);
1794        ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1795#endif
1796        EXPECT_EQ(0, ret) {}
1797
1798#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1799        defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
1800        defined(__xtensa__) || defined(__csky__) || defined(__sh__)
1801        {
1802                regs.SYSCALL_NUM = syscall;
1803        }
1804#elif defined(__mips__)
1805        {
1806                if (regs.SYSCALL_NUM == __NR_O32_Linux)
1807                        regs.SYSCALL_SYSCALL_NUM = syscall;
1808                else
1809                        regs.SYSCALL_NUM = syscall;
1810        }
1811
1812#elif defined(__arm__)
1813# ifndef PTRACE_SET_SYSCALL
1814#  define PTRACE_SET_SYSCALL   23
1815# endif
1816        {
1817                ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1818                EXPECT_EQ(0, ret);
1819        }
1820
1821#elif defined(__aarch64__)
1822# ifndef NT_ARM_SYSTEM_CALL
1823#  define NT_ARM_SYSTEM_CALL 0x404
1824# endif
1825        {
1826                iov.iov_base = &syscall;
1827                iov.iov_len = sizeof(syscall);
1828                ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1829                             &iov);
1830                EXPECT_EQ(0, ret);
1831        }
1832
1833#else
1834        ASSERT_EQ(1, 0) {
1835                TH_LOG("How is the syscall changed on this architecture?");
1836        }
1837#endif
1838
1839        /* If syscall is skipped, change return value. */
1840        if (syscall == -1)
1841#ifdef SYSCALL_NUM_RET_SHARE_REG
1842                TH_LOG("Can't modify syscall return on this architecture");
1843
1844#elif defined(__xtensa__)
1845                regs.SYSCALL_RET(regs) = result;
1846#else
1847                regs.SYSCALL_RET = result;
1848#endif
1849
1850#ifdef HAVE_GETREGS
1851        ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1852#else
1853        iov.iov_base = &regs;
1854        iov.iov_len = sizeof(regs);
1855        ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1856#endif
1857        EXPECT_EQ(0, ret);
1858}
1859
1860void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
1861                    int status, void *args)
1862{
1863        int ret;
1864        unsigned long msg;
1865
1866        /* Make sure we got the right message. */
1867        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1868        EXPECT_EQ(0, ret);
1869
1870        /* Validate and take action on expected syscalls. */
1871        switch (msg) {
1872        case 0x1002:
1873                /* change getpid to getppid. */
1874                EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1875                change_syscall(_metadata, tracee, __NR_getppid, 0);
1876                break;
1877        case 0x1003:
1878                /* skip gettid with valid return code. */
1879                EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1880                change_syscall(_metadata, tracee, -1, 45000);
1881                break;
1882        case 0x1004:
1883                /* skip openat with error. */
1884                EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1885                change_syscall(_metadata, tracee, -1, -ESRCH);
1886                break;
1887        case 0x1005:
1888                /* do nothing (allow getppid) */
1889                EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1890                break;
1891        default:
1892                EXPECT_EQ(0, msg) {
1893                        TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1894                        kill(tracee, SIGKILL);
1895                }
1896        }
1897
1898}
1899
1900void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1901                   int status, void *args)
1902{
1903        int ret, nr;
1904        unsigned long msg;
1905        static bool entry;
1906
1907        /*
1908         * The traditional way to tell PTRACE_SYSCALL entry/exit
1909         * is by counting.
1910         */
1911        entry = !entry;
1912
1913        /* Make sure we got an appropriate message. */
1914        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1915        EXPECT_EQ(0, ret);
1916        EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1917                        : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1918
1919        if (!entry)
1920                return;
1921
1922        nr = get_syscall(_metadata, tracee);
1923
1924        if (nr == __NR_getpid)
1925                change_syscall(_metadata, tracee, __NR_getppid, 0);
1926        if (nr == __NR_gettid)
1927                change_syscall(_metadata, tracee, -1, 45000);
1928        if (nr == __NR_openat)
1929                change_syscall(_metadata, tracee, -1, -ESRCH);
1930}
1931
1932FIXTURE(TRACE_syscall) {
1933        struct sock_fprog prog;
1934        pid_t tracer, mytid, mypid, parent;
1935};
1936
1937FIXTURE_VARIANT(TRACE_syscall) {
1938        /*
1939         * All of the SECCOMP_RET_TRACE behaviors can be tested with either
1940         * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
1941         * This indicates if we should use SECCOMP_RET_TRACE (false), or
1942         * ptrace (true).
1943         */
1944        bool use_ptrace;
1945};
1946
1947FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
1948        .use_ptrace = true,
1949};
1950
1951FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
1952        .use_ptrace = false,
1953};
1954
1955FIXTURE_SETUP(TRACE_syscall)
1956{
1957        struct sock_filter filter[] = {
1958                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1959                        offsetof(struct seccomp_data, nr)),
1960                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1961                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1962                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1963                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1964                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1965                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1966                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1967                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1968                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1969        };
1970        struct sock_fprog prog = {
1971                .len = (unsigned short)ARRAY_SIZE(filter),
1972                .filter = filter,
1973        };
1974        long ret;
1975
1976        /* Prepare some testable syscall results. */
1977        self->mytid = syscall(__NR_gettid);
1978        ASSERT_GT(self->mytid, 0);
1979        ASSERT_NE(self->mytid, 1) {
1980                TH_LOG("Running this test as init is not supported. :)");
1981        }
1982
1983        self->mypid = getpid();
1984        ASSERT_GT(self->mypid, 0);
1985        ASSERT_EQ(self->mytid, self->mypid);
1986
1987        self->parent = getppid();
1988        ASSERT_GT(self->parent, 0);
1989        ASSERT_NE(self->parent, self->mypid);
1990
1991        /* Launch tracer. */
1992        self->tracer = setup_trace_fixture(_metadata,
1993                                           variant->use_ptrace ? tracer_ptrace
1994                                                               : tracer_seccomp,
1995                                           NULL, variant->use_ptrace);
1996
1997        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1998        ASSERT_EQ(0, ret);
1999
2000        if (variant->use_ptrace)
2001                return;
2002
2003        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2004        ASSERT_EQ(0, ret);
2005}
2006
2007FIXTURE_TEARDOWN(TRACE_syscall)
2008{
2009        teardown_trace_fixture(_metadata, self->tracer);
2010}
2011
2012TEST(negative_ENOSYS)
2013{
2014        /*
2015         * There should be no difference between an "internal" skip
2016         * and userspace asking for syscall "-1".
2017         */
2018        errno = 0;
2019        EXPECT_EQ(-1, syscall(-1));
2020        EXPECT_EQ(errno, ENOSYS);
2021        /* And no difference for "still not valid but not -1". */
2022        errno = 0;
2023        EXPECT_EQ(-1, syscall(-101));
2024        EXPECT_EQ(errno, ENOSYS);
2025}
2026
2027TEST_F(TRACE_syscall, negative_ENOSYS)
2028{
2029        negative_ENOSYS(_metadata);
2030}
2031
2032TEST_F(TRACE_syscall, syscall_allowed)
2033{
2034        /* getppid works as expected (no changes). */
2035        EXPECT_EQ(self->parent, syscall(__NR_getppid));
2036        EXPECT_NE(self->mypid, syscall(__NR_getppid));
2037}
2038
2039TEST_F(TRACE_syscall, syscall_redirected)
2040{
2041        /* getpid has been redirected to getppid as expected. */
2042        EXPECT_EQ(self->parent, syscall(__NR_getpid));
2043        EXPECT_NE(self->mypid, syscall(__NR_getpid));
2044}
2045
2046TEST_F(TRACE_syscall, syscall_errno)
2047{
2048        /* Tracer should skip the open syscall, resulting in ESRCH. */
2049        EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
2050}
2051
2052TEST_F(TRACE_syscall, syscall_faked)
2053{
2054        /* Tracer skips the gettid syscall and store altered return value. */
2055        EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
2056}
2057
2058TEST_F(TRACE_syscall, skip_after)
2059{
2060        struct sock_filter filter[] = {
2061                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2062                        offsetof(struct seccomp_data, nr)),
2063                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2064                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2065                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2066        };
2067        struct sock_fprog prog = {
2068                .len = (unsigned short)ARRAY_SIZE(filter),
2069                .filter = filter,
2070        };
2071        long ret;
2072
2073        /* Install additional "errno on getppid" filter. */
2074        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2075        ASSERT_EQ(0, ret);
2076
2077        /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2078        errno = 0;
2079        EXPECT_EQ(-1, syscall(__NR_getpid));
2080        EXPECT_EQ(EPERM, errno);
2081}
2082
2083TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
2084{
2085        struct sock_filter filter[] = {
2086                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2087                        offsetof(struct seccomp_data, nr)),
2088                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2089                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2090                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2091        };
2092        struct sock_fprog prog = {
2093                .len = (unsigned short)ARRAY_SIZE(filter),
2094                .filter = filter,
2095        };
2096        long ret;
2097
2098        /* Install additional "death on getppid" filter. */
2099        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2100        ASSERT_EQ(0, ret);
2101
2102        /* Tracer will redirect getpid to getppid, and we should die. */
2103        EXPECT_NE(self->mypid, syscall(__NR_getpid));
2104}
2105
2106TEST(seccomp_syscall)
2107{
2108        struct sock_filter filter[] = {
2109                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2110        };
2111        struct sock_fprog prog = {
2112                .len = (unsigned short)ARRAY_SIZE(filter),
2113                .filter = filter,
2114        };
2115        long ret;
2116
2117        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2118        ASSERT_EQ(0, ret) {
2119                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2120        }
2121
2122        /* Reject insane operation. */
2123        ret = seccomp(-1, 0, &prog);
2124        ASSERT_NE(ENOSYS, errno) {
2125                TH_LOG("Kernel does not support seccomp syscall!");
2126        }
2127        EXPECT_EQ(EINVAL, errno) {
2128                TH_LOG("Did not reject crazy op value!");
2129        }
2130
2131        /* Reject strict with flags or pointer. */
2132        ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2133        EXPECT_EQ(EINVAL, errno) {
2134                TH_LOG("Did not reject mode strict with flags!");
2135        }
2136        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2137        EXPECT_EQ(EINVAL, errno) {
2138                TH_LOG("Did not reject mode strict with uargs!");
2139        }
2140
2141        /* Reject insane args for filter. */
2142        ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2143        EXPECT_EQ(EINVAL, errno) {
2144                TH_LOG("Did not reject crazy filter flags!");
2145        }
2146        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2147        EXPECT_EQ(EFAULT, errno) {
2148                TH_LOG("Did not reject NULL filter!");
2149        }
2150
2151        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2152        EXPECT_EQ(0, errno) {
2153                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2154                        strerror(errno));
2155        }
2156}
2157
2158TEST(seccomp_syscall_mode_lock)
2159{
2160        struct sock_filter filter[] = {
2161                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2162        };
2163        struct sock_fprog prog = {
2164                .len = (unsigned short)ARRAY_SIZE(filter),
2165                .filter = filter,
2166        };
2167        long ret;
2168
2169        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2170        ASSERT_EQ(0, ret) {
2171                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2172        }
2173
2174        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2175        ASSERT_NE(ENOSYS, errno) {
2176                TH_LOG("Kernel does not support seccomp syscall!");
2177        }
2178        EXPECT_EQ(0, ret) {
2179                TH_LOG("Could not install filter!");
2180        }
2181
2182        /* Make sure neither entry point will switch to strict. */
2183        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2184        EXPECT_EQ(EINVAL, errno) {
2185                TH_LOG("Switched to mode strict!");
2186        }
2187
2188        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2189        EXPECT_EQ(EINVAL, errno) {
2190                TH_LOG("Switched to mode strict!");
2191        }
2192}
2193
2194/*
2195 * Test detection of known and unknown filter flags. Userspace needs to be able
2196 * to check if a filter flag is supported by the current kernel and a good way
2197 * of doing that is by attempting to enter filter mode, with the flag bit in
2198 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2199 * that the flag is valid and EINVAL indicates that the flag is invalid.
2200 */
2201TEST(detect_seccomp_filter_flags)
2202{
2203        unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2204                                 SECCOMP_FILTER_FLAG_LOG,
2205                                 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2206                                 SECCOMP_FILTER_FLAG_NEW_LISTENER,
2207                                 SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
2208        unsigned int exclusive[] = {
2209                                SECCOMP_FILTER_FLAG_TSYNC,
2210                                SECCOMP_FILTER_FLAG_NEW_LISTENER };
2211        unsigned int flag, all_flags, exclusive_mask;
2212        int i;
2213        long ret;
2214
2215        /* Test detection of individual known-good filter flags */
2216        for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2217                int bits = 0;
2218
2219                flag = flags[i];
2220                /* Make sure the flag is a single bit! */
2221                while (flag) {
2222                        if (flag & 0x1)
2223                                bits ++;
2224                        flag >>= 1;
2225                }
2226                ASSERT_EQ(1, bits);
2227                flag = flags[i];
2228
2229                ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2230                ASSERT_NE(ENOSYS, errno) {
2231                        TH_LOG("Kernel does not support seccomp syscall!");
2232                }
2233                EXPECT_EQ(-1, ret);
2234                EXPECT_EQ(EFAULT, errno) {
2235                        TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2236                               flag);
2237                }
2238
2239                all_flags |= flag;
2240        }
2241
2242        /*
2243         * Test detection of all known-good filter flags combined. But
2244         * for the exclusive flags we need to mask them out and try them
2245         * individually for the "all flags" testing.
2246         */
2247        exclusive_mask = 0;
2248        for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2249                exclusive_mask |= exclusive[i];
2250        for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2251                flag = all_flags & ~exclusive_mask;
2252                flag |= exclusive[i];
2253
2254                ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2255                EXPECT_EQ(-1, ret);
2256                EXPECT_EQ(EFAULT, errno) {
2257                        TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2258                               flag);
2259                }
2260        }
2261
2262        /* Test detection of an unknown filter flags, without exclusives. */
2263        flag = -1;
2264        flag &= ~exclusive_mask;
2265        ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2266        EXPECT_EQ(-1, ret);
2267        EXPECT_EQ(EINVAL, errno) {
2268                TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2269                       flag);
2270        }
2271
2272        /*
2273         * Test detection of an unknown filter flag that may simply need to be
2274         * added to this test
2275         */
2276        flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2277        ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2278        EXPECT_EQ(-1, ret);
2279        EXPECT_EQ(EINVAL, errno) {
2280                TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2281                       flag);
2282        }
2283}
2284
2285TEST(TSYNC_first)
2286{
2287        struct sock_filter filter[] = {
2288                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2289        };
2290        struct sock_fprog prog = {
2291                .len = (unsigned short)ARRAY_SIZE(filter),
2292                .filter = filter,
2293        };
2294        long ret;
2295
2296        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2297        ASSERT_EQ(0, ret) {
2298                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2299        }
2300
2301        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2302                      &prog);
2303        ASSERT_NE(ENOSYS, errno) {
2304                TH_LOG("Kernel does not support seccomp syscall!");
2305        }
2306        EXPECT_EQ(0, ret) {
2307                TH_LOG("Could not install initial filter with TSYNC!");
2308        }
2309}
2310
2311#define TSYNC_SIBLINGS 2
2312struct tsync_sibling {
2313        pthread_t tid;
2314        pid_t system_tid;
2315        sem_t *started;
2316        pthread_cond_t *cond;
2317        pthread_mutex_t *mutex;
2318        int diverge;
2319        int num_waits;
2320        struct sock_fprog *prog;
2321        struct __test_metadata *metadata;
2322};
2323
2324/*
2325 * To avoid joining joined threads (which is not allowed by Bionic),
2326 * make sure we both successfully join and clear the tid to skip a
2327 * later join attempt during fixture teardown. Any remaining threads
2328 * will be directly killed during teardown.
2329 */
2330#define PTHREAD_JOIN(tid, status)                                       \
2331        do {                                                            \
2332                int _rc = pthread_join(tid, status);                    \
2333                if (_rc) {                                              \
2334                        TH_LOG("pthread_join of tid %u failed: %d\n",   \
2335                                (unsigned int)tid, _rc);                \
2336                } else {                                                \
2337                        tid = 0;                                        \
2338                }                                                       \
2339        } while (0)
2340
2341FIXTURE(TSYNC) {
2342        struct sock_fprog root_prog, apply_prog;
2343        struct tsync_sibling sibling[TSYNC_SIBLINGS];
2344        sem_t started;
2345        pthread_cond_t cond;
2346        pthread_mutex_t mutex;
2347        int sibling_count;
2348};
2349
2350FIXTURE_SETUP(TSYNC)
2351{
2352        struct sock_filter root_filter[] = {
2353                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2354        };
2355        struct sock_filter apply_filter[] = {
2356                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2357                        offsetof(struct seccomp_data, nr)),
2358                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2359                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2360                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2361        };
2362
2363        memset(&self->root_prog, 0, sizeof(self->root_prog));
2364        memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2365        memset(&self->sibling, 0, sizeof(self->sibling));
2366        self->root_prog.filter = malloc(sizeof(root_filter));
2367        ASSERT_NE(NULL, self->root_prog.filter);
2368        memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2369        self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2370
2371        self->apply_prog.filter = malloc(sizeof(apply_filter));
2372        ASSERT_NE(NULL, self->apply_prog.filter);
2373        memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2374        self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2375
2376        self->sibling_count = 0;
2377        pthread_mutex_init(&self->mutex, NULL);
2378        pthread_cond_init(&self->cond, NULL);
2379        sem_init(&self->started, 0, 0);
2380        self->sibling[0].tid = 0;
2381        self->sibling[0].cond = &self->cond;
2382        self->sibling[0].started = &self->started;
2383        self->sibling[0].mutex = &self->mutex;
2384        self->sibling[0].diverge = 0;
2385        self->sibling[0].num_waits = 1;
2386        self->sibling[0].prog = &self->root_prog;
2387        self->sibling[0].metadata = _metadata;
2388        self->sibling[1].tid = 0;
2389        self->sibling[1].cond = &self->cond;
2390        self->sibling[1].started = &self->started;
2391        self->sibling[1].mutex = &self->mutex;
2392        self->sibling[1].diverge = 0;
2393        self->sibling[1].prog = &self->root_prog;
2394        self->sibling[1].num_waits = 1;
2395        self->sibling[1].metadata = _metadata;
2396}
2397
2398FIXTURE_TEARDOWN(TSYNC)
2399{
2400        int sib = 0;
2401
2402        if (self->root_prog.filter)
2403                free(self->root_prog.filter);
2404        if (self->apply_prog.filter)
2405                free(self->apply_prog.filter);
2406
2407        for ( ; sib < self->sibling_count; ++sib) {
2408                struct tsync_sibling *s = &self->sibling[sib];
2409
2410                if (!s->tid)
2411                        continue;
2412                /*
2413                 * If a thread is still running, it may be stuck, so hit
2414                 * it over the head really hard.
2415                 */
2416                pthread_kill(s->tid, 9);
2417        }
2418        pthread_mutex_destroy(&self->mutex);
2419        pthread_cond_destroy(&self->cond);
2420        sem_destroy(&self->started);
2421}
2422
2423void *tsync_sibling(void *data)
2424{
2425        long ret = 0;
2426        struct tsync_sibling *me = data;
2427
2428        me->system_tid = syscall(__NR_gettid);
2429
2430        pthread_mutex_lock(me->mutex);
2431        if (me->diverge) {
2432                /* Just re-apply the root prog to fork the tree */
2433                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2434                                me->prog, 0, 0);
2435        }
2436        sem_post(me->started);
2437        /* Return outside of started so parent notices failures. */
2438        if (ret) {
2439                pthread_mutex_unlock(me->mutex);
2440                return (void *)SIBLING_EXIT_FAILURE;
2441        }
2442        do {
2443                pthread_cond_wait(me->cond, me->mutex);
2444                me->num_waits = me->num_waits - 1;
2445        } while (me->num_waits);
2446        pthread_mutex_unlock(me->mutex);
2447
2448        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2449        if (!ret)
2450                return (void *)SIBLING_EXIT_NEWPRIVS;
2451        read(0, NULL, 0);
2452        return (void *)SIBLING_EXIT_UNKILLED;
2453}
2454
2455void tsync_start_sibling(struct tsync_sibling *sibling)
2456{
2457        pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2458}
2459
2460TEST_F(TSYNC, siblings_fail_prctl)
2461{
2462        long ret;
2463        void *status;
2464        struct sock_filter filter[] = {
2465                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2466                        offsetof(struct seccomp_data, nr)),
2467                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2468                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2469                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2470        };
2471        struct sock_fprog prog = {
2472                .len = (unsigned short)ARRAY_SIZE(filter),
2473                .filter = filter,
2474        };
2475
2476        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2477                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2478        }
2479
2480        /* Check prctl failure detection by requesting sib 0 diverge. */
2481        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2482        ASSERT_NE(ENOSYS, errno) {
2483                TH_LOG("Kernel does not support seccomp syscall!");
2484        }
2485        ASSERT_EQ(0, ret) {
2486                TH_LOG("setting filter failed");
2487        }
2488
2489        self->sibling[0].diverge = 1;
2490        tsync_start_sibling(&self->sibling[0]);
2491        tsync_start_sibling(&self->sibling[1]);
2492
2493        while (self->sibling_count < TSYNC_SIBLINGS) {
2494                sem_wait(&self->started);
2495                self->sibling_count++;
2496        }
2497
2498        /* Signal the threads to clean up*/
2499        pthread_mutex_lock(&self->mutex);
2500        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2501                TH_LOG("cond broadcast non-zero");
2502        }
2503        pthread_mutex_unlock(&self->mutex);
2504
2505        /* Ensure diverging sibling failed to call prctl. */
2506        PTHREAD_JOIN(self->sibling[0].tid, &status);
2507        EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2508        PTHREAD_JOIN(self->sibling[1].tid, &status);
2509        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2510}
2511
2512TEST_F(TSYNC, two_siblings_with_ancestor)
2513{
2514        long ret;
2515        void *status;
2516
2517        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2518                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2519        }
2520
2521        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2522        ASSERT_NE(ENOSYS, errno) {
2523                TH_LOG("Kernel does not support seccomp syscall!");
2524        }
2525        ASSERT_EQ(0, ret) {
2526                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2527        }
2528        tsync_start_sibling(&self->sibling[0]);
2529        tsync_start_sibling(&self->sibling[1]);
2530
2531        while (self->sibling_count < TSYNC_SIBLINGS) {
2532                sem_wait(&self->started);
2533                self->sibling_count++;
2534        }
2535
2536        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2537                      &self->apply_prog);
2538        ASSERT_EQ(0, ret) {
2539                TH_LOG("Could install filter on all threads!");
2540        }
2541        /* Tell the siblings to test the policy */
2542        pthread_mutex_lock(&self->mutex);
2543        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2544                TH_LOG("cond broadcast non-zero");
2545        }
2546        pthread_mutex_unlock(&self->mutex);
2547        /* Ensure they are both killed and don't exit cleanly. */
2548        PTHREAD_JOIN(self->sibling[0].tid, &status);
2549        EXPECT_EQ(0x0, (long)status);
2550        PTHREAD_JOIN(self->sibling[1].tid, &status);
2551        EXPECT_EQ(0x0, (long)status);
2552}
2553
2554TEST_F(TSYNC, two_sibling_want_nnp)
2555{
2556        void *status;
2557
2558        /* start siblings before any prctl() operations */
2559        tsync_start_sibling(&self->sibling[0]);
2560        tsync_start_sibling(&self->sibling[1]);
2561        while (self->sibling_count < TSYNC_SIBLINGS) {
2562                sem_wait(&self->started);
2563                self->sibling_count++;
2564        }
2565
2566        /* Tell the siblings to test no policy */
2567        pthread_mutex_lock(&self->mutex);
2568        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2569                TH_LOG("cond broadcast non-zero");
2570        }
2571        pthread_mutex_unlock(&self->mutex);
2572
2573        /* Ensure they are both upset about lacking nnp. */
2574        PTHREAD_JOIN(self->sibling[0].tid, &status);
2575        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2576        PTHREAD_JOIN(self->sibling[1].tid, &status);
2577        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2578}
2579
2580TEST_F(TSYNC, two_siblings_with_no_filter)
2581{
2582        long ret;
2583        void *status;
2584
2585        /* start siblings before any prctl() operations */
2586        tsync_start_sibling(&self->sibling[0]);
2587        tsync_start_sibling(&self->sibling[1]);
2588        while (self->sibling_count < TSYNC_SIBLINGS) {
2589                sem_wait(&self->started);
2590                self->sibling_count++;
2591        }
2592
2593        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2594                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2595        }
2596
2597        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2598                      &self->apply_prog);
2599        ASSERT_NE(ENOSYS, errno) {
2600                TH_LOG("Kernel does not support seccomp syscall!");
2601        }
2602        ASSERT_EQ(0, ret) {
2603                TH_LOG("Could install filter on all threads!");
2604        }
2605
2606        /* Tell the siblings to test the policy */
2607        pthread_mutex_lock(&self->mutex);
2608        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2609                TH_LOG("cond broadcast non-zero");
2610        }
2611        pthread_mutex_unlock(&self->mutex);
2612
2613        /* Ensure they are both killed and don't exit cleanly. */
2614        PTHREAD_JOIN(self->sibling[0].tid, &status);
2615        EXPECT_EQ(0x0, (long)status);
2616        PTHREAD_JOIN(self->sibling[1].tid, &status);
2617        EXPECT_EQ(0x0, (long)status);
2618}
2619
2620TEST_F(TSYNC, two_siblings_with_one_divergence)
2621{
2622        long ret;
2623        void *status;
2624
2625        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2626                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2627        }
2628
2629        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2630        ASSERT_NE(ENOSYS, errno) {
2631                TH_LOG("Kernel does not support seccomp syscall!");
2632        }
2633        ASSERT_EQ(0, ret) {
2634                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2635        }
2636        self->sibling[0].diverge = 1;
2637        tsync_start_sibling(&self->sibling[0]);
2638        tsync_start_sibling(&self->sibling[1]);
2639
2640        while (self->sibling_count < TSYNC_SIBLINGS) {
2641                sem_wait(&self->started);
2642                self->sibling_count++;
2643        }
2644
2645        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2646                      &self->apply_prog);
2647        ASSERT_EQ(self->sibling[0].system_tid, ret) {
2648                TH_LOG("Did not fail on diverged sibling.");
2649        }
2650
2651        /* Wake the threads */
2652        pthread_mutex_lock(&self->mutex);
2653        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2654                TH_LOG("cond broadcast non-zero");
2655        }
2656        pthread_mutex_unlock(&self->mutex);
2657
2658        /* Ensure they are both unkilled. */
2659        PTHREAD_JOIN(self->sibling[0].tid, &status);
2660        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2661        PTHREAD_JOIN(self->sibling[1].tid, &status);
2662        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2663}
2664
2665TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
2666{
2667        long ret, flags;
2668        void *status;
2669
2670        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2671                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2672        }
2673
2674        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2675        ASSERT_NE(ENOSYS, errno) {
2676                TH_LOG("Kernel does not support seccomp syscall!");
2677        }
2678        ASSERT_EQ(0, ret) {
2679                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2680        }
2681        self->sibling[0].diverge = 1;
2682        tsync_start_sibling(&self->sibling[0]);
2683        tsync_start_sibling(&self->sibling[1]);
2684
2685        while (self->sibling_count < TSYNC_SIBLINGS) {
2686                sem_wait(&self->started);
2687                self->sibling_count++;
2688        }
2689
2690        flags = SECCOMP_FILTER_FLAG_TSYNC | \
2691                SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
2692        ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
2693        ASSERT_EQ(ESRCH, errno) {
2694                TH_LOG("Did not return ESRCH for diverged sibling.");
2695        }
2696        ASSERT_EQ(-1, ret) {
2697                TH_LOG("Did not fail on diverged sibling.");
2698        }
2699
2700        /* Wake the threads */
2701        pthread_mutex_lock(&self->mutex);
2702        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2703                TH_LOG("cond broadcast non-zero");
2704        }
2705        pthread_mutex_unlock(&self->mutex);
2706
2707        /* Ensure they are both unkilled. */
2708        PTHREAD_JOIN(self->sibling[0].tid, &status);
2709        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2710        PTHREAD_JOIN(self->sibling[1].tid, &status);
2711        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2712}
2713
2714TEST_F(TSYNC, two_siblings_not_under_filter)
2715{
2716        long ret, sib;
2717        void *status;
2718        struct timespec delay = { .tv_nsec = 100000000 };
2719
2720        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2721                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2722        }
2723
2724        /*
2725         * Sibling 0 will have its own seccomp policy
2726         * and Sibling 1 will not be under seccomp at
2727         * all. Sibling 1 will enter seccomp and 0
2728         * will cause failure.
2729         */
2730        self->sibling[0].diverge = 1;
2731        tsync_start_sibling(&self->sibling[0]);
2732        tsync_start_sibling(&self->sibling[1]);
2733
2734        while (self->sibling_count < TSYNC_SIBLINGS) {
2735                sem_wait(&self->started);
2736                self->sibling_count++;
2737        }
2738
2739        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2740        ASSERT_NE(ENOSYS, errno) {
2741                TH_LOG("Kernel does not support seccomp syscall!");
2742        }
2743        ASSERT_EQ(0, ret) {
2744                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2745        }
2746
2747        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2748                      &self->apply_prog);
2749        ASSERT_EQ(ret, self->sibling[0].system_tid) {
2750                TH_LOG("Did not fail on diverged sibling.");
2751        }
2752        sib = 1;
2753        if (ret == self->sibling[0].system_tid)
2754                sib = 0;
2755
2756        pthread_mutex_lock(&self->mutex);
2757
2758        /* Increment the other siblings num_waits so we can clean up
2759         * the one we just saw.
2760         */
2761        self->sibling[!sib].num_waits += 1;
2762
2763        /* Signal the thread to clean up*/
2764        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2765                TH_LOG("cond broadcast non-zero");
2766        }
2767        pthread_mutex_unlock(&self->mutex);
2768        PTHREAD_JOIN(self->sibling[sib].tid, &status);
2769        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2770        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2771        while (!kill(self->sibling[sib].system_tid, 0))
2772                nanosleep(&delay, NULL);
2773        /* Switch to the remaining sibling */
2774        sib = !sib;
2775
2776        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2777                      &self->apply_prog);
2778        ASSERT_EQ(0, ret) {
2779                TH_LOG("Expected the remaining sibling to sync");
2780        };
2781
2782        pthread_mutex_lock(&self->mutex);
2783
2784        /* If remaining sibling didn't have a chance to wake up during
2785         * the first broadcast, manually reduce the num_waits now.
2786         */
2787        if (self->sibling[sib].num_waits > 1)
2788                self->sibling[sib].num_waits = 1;
2789        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2790                TH_LOG("cond broadcast non-zero");
2791        }
2792        pthread_mutex_unlock(&self->mutex);
2793        PTHREAD_JOIN(self->sibling[sib].tid, &status);
2794        EXPECT_EQ(0, (long)status);
2795        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2796        while (!kill(self->sibling[sib].system_tid, 0))
2797                nanosleep(&delay, NULL);
2798
2799        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2800                      &self->apply_prog);
2801        ASSERT_EQ(0, ret);  /* just us chickens */
2802}
2803
2804/* Make sure restarted syscalls are seen directly as "restart_syscall". */
2805TEST(syscall_restart)
2806{
2807        long ret;
2808        unsigned long msg;
2809        pid_t child_pid;
2810        int pipefd[2];
2811        int status;
2812        siginfo_t info = { };
2813        struct sock_filter filter[] = {
2814                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2815                         offsetof(struct seccomp_data, nr)),
2816
2817#ifdef __NR_sigreturn
2818                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
2819#endif
2820                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
2821                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
2822                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
2823                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
2824                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
2825                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2826
2827                /* Allow __NR_write for easy logging. */
2828                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2829                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2830                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2831                /* The nanosleep jump target. */
2832                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2833                /* The restart_syscall jump target. */
2834                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2835        };
2836        struct sock_fprog prog = {
2837                .len = (unsigned short)ARRAY_SIZE(filter),
2838                .filter = filter,
2839        };
2840#if defined(__arm__)
2841        struct utsname utsbuf;
2842#endif
2843
2844        ASSERT_EQ(0, pipe(pipefd));
2845
2846        child_pid = fork();
2847        ASSERT_LE(0, child_pid);
2848        if (child_pid == 0) {
2849                /* Child uses EXPECT not ASSERT to deliver status correctly. */
2850                char buf = ' ';
2851                struct timespec timeout = { };
2852
2853                /* Attach parent as tracer and stop. */
2854                EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2855                EXPECT_EQ(0, raise(SIGSTOP));
2856
2857                EXPECT_EQ(0, close(pipefd[1]));
2858
2859                EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2860                        TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2861                }
2862
2863                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2864                EXPECT_EQ(0, ret) {
2865                        TH_LOG("Failed to install filter!");
2866                }
2867
2868                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2869                        TH_LOG("Failed to read() sync from parent");
2870                }
2871                EXPECT_EQ('.', buf) {
2872                        TH_LOG("Failed to get sync data from read()");
2873                }
2874
2875                /* Start nanosleep to be interrupted. */
2876                timeout.tv_sec = 1;
2877                errno = 0;
2878                EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2879                        TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2880                }
2881
2882                /* Read final sync from parent. */
2883                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2884                        TH_LOG("Failed final read() from parent");
2885                }
2886                EXPECT_EQ('!', buf) {
2887                        TH_LOG("Failed to get final data from read()");
2888                }
2889
2890                /* Directly report the status of our test harness results. */
2891                syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2892                                                     : EXIT_FAILURE);
2893        }
2894        EXPECT_EQ(0, close(pipefd[0]));
2895
2896        /* Attach to child, setup options, and release. */
2897        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2898        ASSERT_EQ(true, WIFSTOPPED(status));
2899        ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2900                            PTRACE_O_TRACESECCOMP));
2901        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2902        ASSERT_EQ(1, write(pipefd[1], ".", 1));
2903
2904        /* Wait for nanosleep() to start. */
2905        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2906        ASSERT_EQ(true, WIFSTOPPED(status));
2907        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2908        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2909        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2910        ASSERT_EQ(0x100, msg);
2911        ret = get_syscall(_metadata, child_pid);
2912        EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
2913
2914        /* Might as well check siginfo for sanity while we're here. */
2915        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2916        ASSERT_EQ(SIGTRAP, info.si_signo);
2917        ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2918        EXPECT_EQ(0, info.si_errno);
2919        EXPECT_EQ(getuid(), info.si_uid);
2920        /* Verify signal delivery came from child (seccomp-triggered). */
2921        EXPECT_EQ(child_pid, info.si_pid);
2922
2923        /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2924        ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2925        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2926        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2927        ASSERT_EQ(true, WIFSTOPPED(status));
2928        ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2929        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2930        /*
2931         * There is no siginfo on SIGSTOP any more, so we can't verify
2932         * signal delivery came from parent now (getpid() == info.si_pid).
2933         * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2934         * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2935         */
2936        EXPECT_EQ(SIGSTOP, info.si_signo);
2937
2938        /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2939        ASSERT_EQ(0, kill(child_pid, SIGCONT));
2940        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2941        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2942        ASSERT_EQ(true, WIFSTOPPED(status));
2943        ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2944        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2945
2946        /* Wait for restart_syscall() to start. */
2947        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2948        ASSERT_EQ(true, WIFSTOPPED(status));
2949        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2950        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2951        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2952
2953        ASSERT_EQ(0x200, msg);
2954        ret = get_syscall(_metadata, child_pid);
2955#if defined(__arm__)
2956        /*
2957         * FIXME:
2958         * - native ARM registers do NOT expose true syscall.
2959         * - compat ARM registers on ARM64 DO expose true syscall.
2960         */
2961        ASSERT_EQ(0, uname(&utsbuf));
2962        if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2963                EXPECT_EQ(__NR_nanosleep, ret);
2964        } else
2965#endif
2966        {
2967                EXPECT_EQ(__NR_restart_syscall, ret);
2968        }
2969
2970        /* Write again to end test. */
2971        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2972        ASSERT_EQ(1, write(pipefd[1], "!", 1));
2973        EXPECT_EQ(0, close(pipefd[1]));
2974
2975        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2976        if (WIFSIGNALED(status) || WEXITSTATUS(status))
2977                _metadata->passed = 0;
2978}
2979
2980TEST_SIGNAL(filter_flag_log, SIGSYS)
2981{
2982        struct sock_filter allow_filter[] = {
2983                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2984        };
2985        struct sock_filter kill_filter[] = {
2986                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2987                        offsetof(struct seccomp_data, nr)),
2988                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2989                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2990                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2991        };
2992        struct sock_fprog allow_prog = {
2993                .len = (unsigned short)ARRAY_SIZE(allow_filter),
2994                .filter = allow_filter,
2995        };
2996        struct sock_fprog kill_prog = {
2997                .len = (unsigned short)ARRAY_SIZE(kill_filter),
2998                .filter = kill_filter,
2999        };
3000        long ret;
3001        pid_t parent = getppid();
3002
3003        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3004        ASSERT_EQ(0, ret);
3005
3006        /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
3007        ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
3008                      &allow_prog);
3009        ASSERT_NE(ENOSYS, errno) {
3010                TH_LOG("Kernel does not support seccomp syscall!");
3011        }
3012        EXPECT_NE(0, ret) {
3013                TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
3014        }
3015        EXPECT_EQ(EINVAL, errno) {
3016                TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
3017        }
3018
3019        /* Verify that a simple, permissive filter can be added with no flags */
3020        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
3021        EXPECT_EQ(0, ret);
3022
3023        /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
3024        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3025                      &allow_prog);
3026        ASSERT_NE(EINVAL, errno) {
3027                TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
3028        }
3029        EXPECT_EQ(0, ret);
3030
3031        /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
3032        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
3033                      &kill_prog);
3034        EXPECT_EQ(0, ret);
3035
3036        EXPECT_EQ(parent, syscall(__NR_getppid));
3037        /* getpid() should never return. */
3038        EXPECT_EQ(0, syscall(__NR_getpid));
3039}
3040
3041TEST(get_action_avail)
3042{
3043        __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3044                            SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3045                            SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3046        __u32 unknown_action = 0x10000000U;
3047        int i;
3048        long ret;
3049
3050        ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3051        ASSERT_NE(ENOSYS, errno) {
3052                TH_LOG("Kernel does not support seccomp syscall!");
3053        }
3054        ASSERT_NE(EINVAL, errno) {
3055                TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3056        }
3057        EXPECT_EQ(ret, 0);
3058
3059        for (i = 0; i < ARRAY_SIZE(actions); i++) {
3060                ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3061                EXPECT_EQ(ret, 0) {
3062                        TH_LOG("Expected action (0x%X) not available!",
3063                               actions[i]);
3064                }
3065        }
3066
3067        /* Check that an unknown action is handled properly (EOPNOTSUPP) */
3068        ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3069        EXPECT_EQ(ret, -1);
3070        EXPECT_EQ(errno, EOPNOTSUPP);
3071}
3072
3073TEST(get_metadata)
3074{
3075        pid_t pid;
3076        int pipefd[2];
3077        char buf;
3078        struct seccomp_metadata md;
3079        long ret;
3080
3081        /* Only real root can get metadata. */
3082        if (geteuid()) {
3083                SKIP(return, "get_metadata requires real root");
3084                return;
3085        }
3086
3087        ASSERT_EQ(0, pipe(pipefd));
3088
3089        pid = fork();
3090        ASSERT_GE(pid, 0);
3091        if (pid == 0) {
3092                struct sock_filter filter[] = {
3093                        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3094                };
3095                struct sock_fprog prog = {
3096                        .len = (unsigned short)ARRAY_SIZE(filter),
3097                        .filter = filter,
3098                };
3099
3100                /* one with log, one without */
3101                EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3102                                     SECCOMP_FILTER_FLAG_LOG, &prog));
3103                EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3104
3105                EXPECT_EQ(0, close(pipefd[0]));
3106                ASSERT_EQ(1, write(pipefd[1], "1", 1));
3107                ASSERT_EQ(0, close(pipefd[1]));
3108
3109                while (1)
3110                        sleep(100);
3111        }
3112
3113        ASSERT_EQ(0, close(pipefd[1]));
3114        ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3115
3116        ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3117        ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3118
3119        /* Past here must not use ASSERT or child process is never killed. */
3120
3121        md.filter_off = 0;
3122        errno = 0;
3123        ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3124        EXPECT_EQ(sizeof(md), ret) {
3125                if (errno == EINVAL)
3126                        SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3127        }
3128
3129        EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3130        EXPECT_EQ(md.filter_off, 0);
3131
3132        md.filter_off = 1;
3133        ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3134        EXPECT_EQ(sizeof(md), ret);
3135        EXPECT_EQ(md.flags, 0);
3136        EXPECT_EQ(md.filter_off, 1);
3137
3138skip:
3139        ASSERT_EQ(0, kill(pid, SIGKILL));
3140}
3141
3142static int user_notif_syscall(int nr, unsigned int flags)
3143{
3144        struct sock_filter filter[] = {
3145                BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3146                        offsetof(struct seccomp_data, nr)),
3147                BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3148                BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3149                BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3150        };
3151
3152        struct sock_fprog prog = {
3153                .len = (unsigned short)ARRAY_SIZE(filter),
3154                .filter = filter,
3155        };
3156
3157        return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3158}
3159
3160#define USER_NOTIF_MAGIC INT_MAX
3161TEST(user_notification_basic)
3162{
3163        pid_t pid;
3164        long ret;
3165        int status, listener;
3166        struct seccomp_notif req = {};
3167        struct seccomp_notif_resp resp = {};
3168        struct pollfd pollfd;
3169
3170        struct sock_filter filter[] = {
3171                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3172        };
3173        struct sock_fprog prog = {
3174                .len = (unsigned short)ARRAY_SIZE(filter),
3175                .filter = filter,
3176        };
3177
3178        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3179        ASSERT_EQ(0, ret) {
3180                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3181        }
3182
3183        pid = fork();
3184        ASSERT_GE(pid, 0);
3185
3186        /* Check that we get -ENOSYS with no listener attached */
3187        if (pid == 0) {
3188                if (user_notif_syscall(__NR_getppid, 0) < 0)
3189                        exit(1);
3190                ret = syscall(__NR_getppid);
3191                exit(ret >= 0 || errno != ENOSYS);
3192        }
3193
3194        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3195        EXPECT_EQ(true, WIFEXITED(status));
3196        EXPECT_EQ(0, WEXITSTATUS(status));
3197
3198        /* Add some no-op filters for grins. */
3199        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3200        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3201        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3202        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3203
3204        /* Check that the basic notification machinery works */
3205        listener = user_notif_syscall(__NR_getppid,
3206                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3207        ASSERT_GE(listener, 0);
3208
3209        /* Installing a second listener in the chain should EBUSY */
3210        EXPECT_EQ(user_notif_syscall(__NR_getppid,
3211                                     SECCOMP_FILTER_FLAG_NEW_LISTENER),
3212                  -1);
3213        EXPECT_EQ(errno, EBUSY);
3214
3215        pid = fork();
3216        ASSERT_GE(pid, 0);
3217
3218        if (pid == 0) {
3219                ret = syscall(__NR_getppid);
3220                exit(ret != USER_NOTIF_MAGIC);
3221        }
3222
3223        pollfd.fd = listener;
3224        pollfd.events = POLLIN | POLLOUT;
3225
3226        EXPECT_GT(poll(&pollfd, 1, -1), 0);
3227        EXPECT_EQ(pollfd.revents, POLLIN);
3228
3229        /* Test that we can't pass garbage to the kernel. */
3230        memset(&req, 0, sizeof(req));
3231        req.pid = -1;
3232        errno = 0;
3233        ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3234        EXPECT_EQ(-1, ret);
3235        EXPECT_EQ(EINVAL, errno);
3236
3237        if (ret) {
3238                req.pid = 0;
3239                EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3240        }
3241
3242        pollfd.fd = listener;
3243        pollfd.events = POLLIN | POLLOUT;
3244
3245        EXPECT_GT(poll(&pollfd, 1, -1), 0);
3246        EXPECT_EQ(pollfd.revents, POLLOUT);
3247
3248        EXPECT_EQ(req.data.nr,  __NR_getppid);
3249
3250        resp.id = req.id;
3251        resp.error = 0;
3252        resp.val = USER_NOTIF_MAGIC;
3253
3254        /* check that we make sure flags == 0 */
3255        resp.flags = 1;
3256        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3257        EXPECT_EQ(errno, EINVAL);
3258
3259        resp.flags = 0;
3260        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3261
3262        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3263        EXPECT_EQ(true, WIFEXITED(status));
3264        EXPECT_EQ(0, WEXITSTATUS(status));
3265}
3266
3267TEST(user_notification_with_tsync)
3268{
3269        int ret;
3270        unsigned int flags;
3271
3272        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3273        ASSERT_EQ(0, ret) {
3274                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3275        }
3276
3277        /* these were exclusive */
3278        flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
3279                SECCOMP_FILTER_FLAG_TSYNC;
3280        ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
3281        ASSERT_EQ(EINVAL, errno);
3282
3283        /* but now they're not */
3284        flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
3285        ret = user_notif_syscall(__NR_getppid, flags);
3286        close(ret);
3287        ASSERT_LE(0, ret);
3288}
3289
3290TEST(user_notification_kill_in_middle)
3291{
3292        pid_t pid;
3293        long ret;
3294        int listener;
3295        struct seccomp_notif req = {};
3296        struct seccomp_notif_resp resp = {};
3297
3298        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3299        ASSERT_EQ(0, ret) {
3300                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3301        }
3302
3303        listener = user_notif_syscall(__NR_getppid,
3304                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3305        ASSERT_GE(listener, 0);
3306
3307        /*
3308         * Check that nothing bad happens when we kill the task in the middle
3309         * of a syscall.
3310         */
3311        pid = fork();
3312        ASSERT_GE(pid, 0);
3313
3314        if (pid == 0) {
3315                ret = syscall(__NR_getppid);
3316                exit(ret != USER_NOTIF_MAGIC);
3317        }
3318
3319        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3320        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3321
3322        EXPECT_EQ(kill(pid, SIGKILL), 0);
3323        EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3324
3325        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3326
3327        resp.id = req.id;
3328        ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3329        EXPECT_EQ(ret, -1);
3330        EXPECT_EQ(errno, ENOENT);
3331}
3332
3333static int handled = -1;
3334
3335static void signal_handler(int signal)
3336{
3337        if (write(handled, "c", 1) != 1)
3338                perror("write from signal");
3339}
3340
3341TEST(user_notification_signal)
3342{
3343        pid_t pid;
3344        long ret;
3345        int status, listener, sk_pair[2];
3346        struct seccomp_notif req = {};
3347        struct seccomp_notif_resp resp = {};
3348        char c;
3349
3350        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3351        ASSERT_EQ(0, ret) {
3352                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3353        }
3354
3355        ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3356
3357        listener = user_notif_syscall(__NR_gettid,
3358                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3359        ASSERT_GE(listener, 0);
3360
3361        pid = fork();
3362        ASSERT_GE(pid, 0);
3363
3364        if (pid == 0) {
3365                close(sk_pair[0]);
3366                handled = sk_pair[1];
3367                if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3368                        perror("signal");
3369                        exit(1);
3370                }
3371                /*
3372                 * ERESTARTSYS behavior is a bit hard to test, because we need
3373                 * to rely on a signal that has not yet been handled. Let's at
3374                 * least check that the error code gets propagated through, and
3375                 * hope that it doesn't break when there is actually a signal :)
3376                 */
3377                ret = syscall(__NR_gettid);
3378                exit(!(ret == -1 && errno == 512));
3379        }
3380
3381        close(sk_pair[1]);
3382
3383        memset(&req, 0, sizeof(req));
3384        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3385
3386        EXPECT_EQ(kill(pid, SIGUSR1), 0);
3387
3388        /*
3389         * Make sure the signal really is delivered, which means we're not
3390         * stuck in the user notification code any more and the notification
3391         * should be dead.
3392         */
3393        EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3394
3395        resp.id = req.id;
3396        resp.error = -EPERM;
3397        resp.val = 0;
3398
3399        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3400        EXPECT_EQ(errno, ENOENT);
3401
3402        memset(&req, 0, sizeof(req));
3403        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3404
3405        resp.id = req.id;
3406        resp.error = -512; /* -ERESTARTSYS */
3407        resp.val = 0;
3408
3409        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3410
3411        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3412        EXPECT_EQ(true, WIFEXITED(status));
3413        EXPECT_EQ(0, WEXITSTATUS(status));
3414}
3415
3416TEST(user_notification_closed_listener)
3417{
3418        pid_t pid;
3419        long ret;
3420        int status, listener;
3421
3422        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3423        ASSERT_EQ(0, ret) {
3424                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3425        }
3426
3427        listener = user_notif_syscall(__NR_getppid,
3428                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3429        ASSERT_GE(listener, 0);
3430
3431        /*
3432         * Check that we get an ENOSYS when the listener is closed.
3433         */
3434        pid = fork();
3435        ASSERT_GE(pid, 0);
3436        if (pid == 0) {
3437                close(listener);
3438                ret = syscall(__NR_getppid);
3439                exit(ret != -1 && errno != ENOSYS);
3440        }
3441
3442        close(listener);
3443
3444        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3445        EXPECT_EQ(true, WIFEXITED(status));
3446        EXPECT_EQ(0, WEXITSTATUS(status));
3447}
3448
3449/*
3450 * Check that a pid in a child namespace still shows up as valid in ours.
3451 */
3452TEST(user_notification_child_pid_ns)
3453{
3454        pid_t pid;
3455        int status, listener;
3456        struct seccomp_notif req = {};
3457        struct seccomp_notif_resp resp = {};
3458
3459        ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
3460                if (errno == EINVAL)
3461                        SKIP(return, "kernel missing CLONE_NEWUSER support");
3462        };
3463
3464        listener = user_notif_syscall(__NR_getppid,
3465                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3466        ASSERT_GE(listener, 0);
3467
3468        pid = fork();
3469        ASSERT_GE(pid, 0);
3470
3471        if (pid == 0)
3472                exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3473
3474        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3475        EXPECT_EQ(req.pid, pid);
3476
3477        resp.id = req.id;
3478        resp.error = 0;
3479        resp.val = USER_NOTIF_MAGIC;
3480
3481        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3482
3483        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3484        EXPECT_EQ(true, WIFEXITED(status));
3485        EXPECT_EQ(0, WEXITSTATUS(status));
3486        close(listener);
3487}
3488
3489/*
3490 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3491 * invalid.
3492 */
3493TEST(user_notification_sibling_pid_ns)
3494{
3495        pid_t pid, pid2;
3496        int status, listener;
3497        struct seccomp_notif req = {};
3498        struct seccomp_notif_resp resp = {};
3499
3500        ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3501                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3502        }
3503
3504        listener = user_notif_syscall(__NR_getppid,
3505                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3506        ASSERT_GE(listener, 0);
3507
3508        pid = fork();
3509        ASSERT_GE(pid, 0);
3510
3511        if (pid == 0) {
3512                ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3513
3514                pid2 = fork();
3515                ASSERT_GE(pid2, 0);
3516
3517                if (pid2 == 0)
3518                        exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3519
3520                EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3521                EXPECT_EQ(true, WIFEXITED(status));
3522                EXPECT_EQ(0, WEXITSTATUS(status));
3523                exit(WEXITSTATUS(status));
3524        }
3525
3526        /* Create the sibling ns, and sibling in it. */
3527        ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
3528                if (errno == EPERM)
3529                        SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
3530        }
3531        ASSERT_EQ(errno, 0);
3532
3533        pid2 = fork();
3534        ASSERT_GE(pid2, 0);
3535
3536        if (pid2 == 0) {
3537                ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3538                /*
3539                 * The pid should be 0, i.e. the task is in some namespace that
3540                 * we can't "see".
3541                 */
3542                EXPECT_EQ(req.pid, 0);
3543
3544                resp.id = req.id;
3545                resp.error = 0;
3546                resp.val = USER_NOTIF_MAGIC;
3547
3548                ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3549                exit(0);
3550        }
3551
3552        close(listener);
3553
3554        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3555        EXPECT_EQ(true, WIFEXITED(status));
3556        EXPECT_EQ(0, WEXITSTATUS(status));
3557
3558        EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3559        EXPECT_EQ(true, WIFEXITED(status));
3560        EXPECT_EQ(0, WEXITSTATUS(status));
3561}
3562
3563TEST(user_notification_fault_recv)
3564{
3565        pid_t pid;
3566        int status, listener;
3567        struct seccomp_notif req = {};
3568        struct seccomp_notif_resp resp = {};
3569
3570        ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3571
3572        listener = user_notif_syscall(__NR_getppid,
3573                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3574        ASSERT_GE(listener, 0);
3575
3576        pid = fork();
3577        ASSERT_GE(pid, 0);
3578
3579        if (pid == 0)
3580                exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3581
3582        /* Do a bad recv() */
3583        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3584        EXPECT_EQ(errno, EFAULT);
3585
3586        /* We should still be able to receive this notification, though. */
3587        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3588        EXPECT_EQ(req.pid, pid);
3589
3590        resp.id = req.id;
3591        resp.error = 0;
3592        resp.val = USER_NOTIF_MAGIC;
3593
3594        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3595
3596        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3597        EXPECT_EQ(true, WIFEXITED(status));
3598        EXPECT_EQ(0, WEXITSTATUS(status));
3599}
3600
3601TEST(seccomp_get_notif_sizes)
3602{
3603        struct seccomp_notif_sizes sizes;
3604
3605        ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3606        EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3607        EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3608}
3609
3610TEST(user_notification_continue)
3611{
3612        pid_t pid;
3613        long ret;
3614        int status, listener;
3615        struct seccomp_notif req = {};
3616        struct seccomp_notif_resp resp = {};
3617        struct pollfd pollfd;
3618
3619        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3620        ASSERT_EQ(0, ret) {
3621                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3622        }
3623
3624        listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3625        ASSERT_GE(listener, 0);
3626
3627        pid = fork();
3628        ASSERT_GE(pid, 0);
3629
3630        if (pid == 0) {
3631                int dup_fd, pipe_fds[2];
3632                pid_t self;
3633
3634                ASSERT_GE(pipe(pipe_fds), 0);
3635
3636                dup_fd = dup(pipe_fds[0]);
3637                ASSERT_GE(dup_fd, 0);
3638                EXPECT_NE(pipe_fds[0], dup_fd);
3639
3640                self = getpid();
3641                ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
3642                exit(0);
3643        }
3644
3645        pollfd.fd = listener;
3646        pollfd.events = POLLIN | POLLOUT;
3647
3648        EXPECT_GT(poll(&pollfd, 1, -1), 0);
3649        EXPECT_EQ(pollfd.revents, POLLIN);
3650
3651        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3652
3653        pollfd.fd = listener;
3654        pollfd.events = POLLIN | POLLOUT;
3655
3656        EXPECT_GT(poll(&pollfd, 1, -1), 0);
3657        EXPECT_EQ(pollfd.revents, POLLOUT);
3658
3659        EXPECT_EQ(req.data.nr, __NR_dup);
3660
3661        resp.id = req.id;
3662        resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3663
3664        /*
3665         * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3666         * args be set to 0.
3667         */
3668        resp.error = 0;
3669        resp.val = USER_NOTIF_MAGIC;
3670        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3671        EXPECT_EQ(errno, EINVAL);
3672
3673        resp.error = USER_NOTIF_MAGIC;
3674        resp.val = 0;
3675        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3676        EXPECT_EQ(errno, EINVAL);
3677
3678        resp.error = 0;
3679        resp.val = 0;
3680        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3681                if (errno == EINVAL)
3682                        SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3683        }
3684
3685skip:
3686        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3687        EXPECT_EQ(true, WIFEXITED(status));
3688        EXPECT_EQ(0, WEXITSTATUS(status)) {
3689                if (WEXITSTATUS(status) == 2) {
3690                        SKIP(return, "Kernel does not support kcmp() syscall");
3691                        return;
3692                }
3693        }
3694}
3695
3696TEST(user_notification_filter_empty)
3697{
3698        pid_t pid;
3699        long ret;
3700        int status;
3701        struct pollfd pollfd;
3702        struct clone_args args = {
3703                .flags = CLONE_FILES,
3704                .exit_signal = SIGCHLD,
3705        };
3706
3707        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3708        ASSERT_EQ(0, ret) {
3709                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3710        }
3711
3712        pid = sys_clone3(&args, sizeof(args));
3713        ASSERT_GE(pid, 0);
3714
3715        if (pid == 0) {
3716                int listener;
3717
3718                listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3719                if (listener < 0)
3720                        _exit(EXIT_FAILURE);
3721
3722                if (dup2(listener, 200) != 200)
3723                        _exit(EXIT_FAILURE);
3724
3725                close(listener);
3726
3727                _exit(EXIT_SUCCESS);
3728        }
3729
3730        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3731        EXPECT_EQ(true, WIFEXITED(status));
3732        EXPECT_EQ(0, WEXITSTATUS(status));
3733
3734        /*
3735         * The seccomp filter has become unused so we should be notified once
3736         * the kernel gets around to cleaning up task struct.
3737         */
3738        pollfd.fd = 200;
3739        pollfd.events = POLLHUP;
3740
3741        EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3742        EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3743}
3744
3745static void *do_thread(void *data)
3746{
3747        return NULL;
3748}
3749
3750TEST(user_notification_filter_empty_threaded)
3751{
3752        pid_t pid;
3753        long ret;
3754        int status;
3755        struct pollfd pollfd;
3756        struct clone_args args = {
3757                .flags = CLONE_FILES,
3758                .exit_signal = SIGCHLD,
3759        };
3760
3761        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3762        ASSERT_EQ(0, ret) {
3763                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3764        }
3765
3766        pid = sys_clone3(&args, sizeof(args));
3767        ASSERT_GE(pid, 0);
3768
3769        if (pid == 0) {
3770                pid_t pid1, pid2;
3771                int listener, status;
3772                pthread_t thread;
3773
3774                listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3775                if (listener < 0)
3776                        _exit(EXIT_FAILURE);
3777
3778                if (dup2(listener, 200) != 200)
3779                        _exit(EXIT_FAILURE);
3780
3781                close(listener);
3782
3783                pid1 = fork();
3784                if (pid1 < 0)
3785                        _exit(EXIT_FAILURE);
3786
3787                if (pid1 == 0)
3788                        _exit(EXIT_SUCCESS);
3789
3790                pid2 = fork();
3791                if (pid2 < 0)
3792                        _exit(EXIT_FAILURE);
3793
3794                if (pid2 == 0)
3795                        _exit(EXIT_SUCCESS);
3796
3797                if (pthread_create(&thread, NULL, do_thread, NULL) ||
3798                    pthread_join(thread, NULL))
3799                        _exit(EXIT_FAILURE);
3800
3801                if (pthread_create(&thread, NULL, do_thread, NULL) ||
3802                    pthread_join(thread, NULL))
3803                        _exit(EXIT_FAILURE);
3804
3805                if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
3806                    WEXITSTATUS(status))
3807                        _exit(EXIT_FAILURE);
3808
3809                if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
3810                    WEXITSTATUS(status))
3811                        _exit(EXIT_FAILURE);
3812
3813                exit(EXIT_SUCCESS);
3814        }
3815
3816        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3817        EXPECT_EQ(true, WIFEXITED(status));
3818        EXPECT_EQ(0, WEXITSTATUS(status));
3819
3820        /*
3821         * The seccomp filter has become unused so we should be notified once
3822         * the kernel gets around to cleaning up task struct.
3823         */
3824        pollfd.fd = 200;
3825        pollfd.events = POLLHUP;
3826
3827        EXPECT_GT(poll(&pollfd, 1, 2000), 0);
3828        EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
3829}
3830
3831TEST(user_notification_addfd)
3832{
3833        pid_t pid;
3834        long ret;
3835        int status, listener, memfd, fd;
3836        struct seccomp_notif_addfd addfd = {};
3837        struct seccomp_notif_addfd_small small = {};
3838        struct seccomp_notif_addfd_big big = {};
3839        struct seccomp_notif req = {};
3840        struct seccomp_notif_resp resp = {};
3841        /* 100 ms */
3842        struct timespec delay = { .tv_nsec = 100000000 };
3843
3844        memfd = memfd_create("test", 0);
3845        ASSERT_GE(memfd, 0);
3846
3847        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3848        ASSERT_EQ(0, ret) {
3849                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3850        }
3851
3852        /* Check that the basic notification machinery works */
3853        listener = user_notif_syscall(__NR_getppid,
3854                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3855        ASSERT_GE(listener, 0);
3856
3857        pid = fork();
3858        ASSERT_GE(pid, 0);
3859
3860        if (pid == 0) {
3861                if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
3862                        exit(1);
3863                exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3864        }
3865
3866        ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3867
3868        addfd.srcfd = memfd;
3869        addfd.newfd = 0;
3870        addfd.id = req.id;
3871        addfd.flags = 0x0;
3872
3873        /* Verify bad newfd_flags cannot be set */
3874        addfd.newfd_flags = ~O_CLOEXEC;
3875        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
3876        EXPECT_EQ(errno, EINVAL);
3877        addfd.newfd_flags = O_CLOEXEC;
3878
3879        /* Verify bad flags cannot be set */
3880        addfd.flags = 0xff;
3881        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
3882        EXPECT_EQ(errno, EINVAL);
3883        addfd.flags = 0;
3884
3885        /* Verify that remote_fd cannot be set without setting flags */
3886        addfd.newfd = 1;
3887        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
3888        EXPECT_EQ(errno, EINVAL);
3889        addfd.newfd = 0;
3890
3891        /* Verify small size cannot be set */
3892        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
3893        EXPECT_EQ(errno, EINVAL);
3894
3895        /* Verify we can't send bits filled in unknown buffer area */
3896        memset(&big, 0xAA, sizeof(big));
3897        big.addfd = addfd;
3898        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
3899        EXPECT_EQ(errno, E2BIG);
3900
3901
3902        /* Verify we can set an arbitrary remote fd */
3903        fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
3904        /*
3905         * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd),
3906         * 4(listener), so the newly allocated fd should be 5.
3907         */
3908        EXPECT_EQ(fd, 5);
3909        EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
3910
3911        /* Verify we can set an arbitrary remote fd with large size */
3912        memset(&big, 0x0, sizeof(big));
3913        big.addfd = addfd;
3914        fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
3915        EXPECT_EQ(fd, 6);
3916
3917        /* Verify we can set a specific remote fd */
3918        addfd.newfd = 42;
3919        addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
3920        fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
3921        EXPECT_EQ(fd, 42);
3922        EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
3923
3924        /* Resume syscall */
3925        resp.id = req.id;
3926        resp.error = 0;
3927        resp.val = USER_NOTIF_MAGIC;
3928        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3929
3930        /*
3931         * This sets the ID of the ADD FD to the last request plus 1. The
3932         * notification ID increments 1 per notification.
3933         */
3934        addfd.id = req.id + 1;
3935
3936        /* This spins until the underlying notification is generated */
3937        while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
3938               errno != -EINPROGRESS)
3939                nanosleep(&delay, NULL);
3940
3941        memset(&req, 0, sizeof(req));
3942        ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3943        ASSERT_EQ(addfd.id, req.id);
3944
3945        resp.id = req.id;
3946        resp.error = 0;
3947        resp.val = USER_NOTIF_MAGIC;
3948        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3949
3950        /* Wait for child to finish. */
3951        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3952        EXPECT_EQ(true, WIFEXITED(status));
3953        EXPECT_EQ(0, WEXITSTATUS(status));
3954
3955        close(memfd);
3956}
3957
3958TEST(user_notification_addfd_rlimit)
3959{
3960        pid_t pid;
3961        long ret;
3962        int status, listener, memfd;
3963        struct seccomp_notif_addfd addfd = {};
3964        struct seccomp_notif req = {};
3965        struct seccomp_notif_resp resp = {};
3966        const struct rlimit lim = {
3967                .rlim_cur       = 0,
3968                .rlim_max       = 0,
3969        };
3970
3971        memfd = memfd_create("test", 0);
3972        ASSERT_GE(memfd, 0);
3973
3974        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3975        ASSERT_EQ(0, ret) {
3976                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3977        }
3978
3979        /* Check that the basic notification machinery works */
3980        listener = user_notif_syscall(__NR_getppid,
3981                                      SECCOMP_FILTER_FLAG_NEW_LISTENER);
3982        ASSERT_GE(listener, 0);
3983
3984        pid = fork();
3985        ASSERT_GE(pid, 0);
3986
3987        if (pid == 0)
3988                exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3989
3990
3991        ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3992
3993        ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
3994
3995        addfd.srcfd = memfd;
3996        addfd.newfd_flags = O_CLOEXEC;
3997        addfd.newfd = 0;
3998        addfd.id = req.id;
3999        addfd.flags = 0;
4000
4001        /* Should probably spot check /proc/sys/fs/file-nr */
4002        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4003        EXPECT_EQ(errno, EMFILE);
4004
4005        addfd.newfd = 100;
4006        addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
4007        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
4008        EXPECT_EQ(errno, EBADF);
4009
4010        resp.id = req.id;
4011        resp.error = 0;
4012        resp.val = USER_NOTIF_MAGIC;
4013
4014        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
4015
4016        /* Wait for child to finish. */
4017        EXPECT_EQ(waitpid(pid, &status, 0), pid);
4018        EXPECT_EQ(true, WIFEXITED(status));
4019        EXPECT_EQ(0, WEXITSTATUS(status));
4020
4021        close(memfd);
4022}
4023
4024/*
4025 * TODO:
4026 * - expand NNP testing
4027 * - better arch-specific TRACE and TRAP handlers.
4028 * - endianness checking when appropriate
4029 * - 64-bit arg prodding
4030 * - arch value testing (x86 modes especially)
4031 * - verify that FILTER_FLAG_LOG filters generate log messages
4032 * - verify that RET_LOG generates log messages
4033 */
4034
4035TEST_HARNESS_MAIN
4036