linux/tools/testing/selftests/seccomp/seccomp_bpf.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   3 * Use of this source code is governed by the GPLv2 license.
   4 *
   5 * Test code for seccomp bpf.
   6 */
   7
   8#include <sys/types.h>
   9
  10/*
  11 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
  12 * we need to use the kernel's siginfo.h file and trick glibc
  13 * into accepting it.
  14 */
  15#if !__GLIBC_PREREQ(2, 26)
  16# include <asm/siginfo.h>
  17# define __have_siginfo_t 1
  18# define __have_sigval_t 1
  19# define __have_sigevent_t 1
  20#endif
  21
  22#include <errno.h>
  23#include <linux/filter.h>
  24#include <sys/prctl.h>
  25#include <sys/ptrace.h>
  26#include <sys/user.h>
  27#include <linux/prctl.h>
  28#include <linux/ptrace.h>
  29#include <linux/seccomp.h>
  30#include <pthread.h>
  31#include <semaphore.h>
  32#include <signal.h>
  33#include <stddef.h>
  34#include <stdbool.h>
  35#include <string.h>
  36#include <time.h>
  37#include <linux/elf.h>
  38#include <sys/uio.h>
  39#include <sys/utsname.h>
  40#include <sys/fcntl.h>
  41#include <sys/mman.h>
  42#include <sys/times.h>
  43
  44#define _GNU_SOURCE
  45#include <unistd.h>
  46#include <sys/syscall.h>
  47
  48#include "../kselftest_harness.h"
  49
  50#ifndef PR_SET_PTRACER
  51# define PR_SET_PTRACER 0x59616d61
  52#endif
  53
  54#ifndef PR_SET_NO_NEW_PRIVS
  55#define PR_SET_NO_NEW_PRIVS 38
  56#define PR_GET_NO_NEW_PRIVS 39
  57#endif
  58
  59#ifndef PR_SECCOMP_EXT
  60#define PR_SECCOMP_EXT 43
  61#endif
  62
  63#ifndef SECCOMP_EXT_ACT
  64#define SECCOMP_EXT_ACT 1
  65#endif
  66
  67#ifndef SECCOMP_EXT_ACT_TSYNC
  68#define SECCOMP_EXT_ACT_TSYNC 1
  69#endif
  70
  71#ifndef SECCOMP_MODE_STRICT
  72#define SECCOMP_MODE_STRICT 1
  73#endif
  74
  75#ifndef SECCOMP_MODE_FILTER
  76#define SECCOMP_MODE_FILTER 2
  77#endif
  78
  79#ifndef SECCOMP_RET_ALLOW
  80struct seccomp_data {
  81        int nr;
  82        __u32 arch;
  83        __u64 instruction_pointer;
  84        __u64 args[6];
  85};
  86#endif
  87
  88#ifndef SECCOMP_RET_KILL_PROCESS
  89#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
  90#define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
  91#endif
  92#ifndef SECCOMP_RET_KILL
  93#define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
  94#define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
  95#define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
  96#define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
  97#define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
  98#endif
  99#ifndef SECCOMP_RET_LOG
 100#define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
 101#endif
 102
 103#ifndef __NR_seccomp
 104# if defined(__i386__)
 105#  define __NR_seccomp 354
 106# elif defined(__x86_64__)
 107#  define __NR_seccomp 317
 108# elif defined(__arm__)
 109#  define __NR_seccomp 383
 110# elif defined(__aarch64__)
 111#  define __NR_seccomp 277
 112# elif defined(__hppa__)
 113#  define __NR_seccomp 338
 114# elif defined(__powerpc__)
 115#  define __NR_seccomp 358
 116# elif defined(__s390__)
 117#  define __NR_seccomp 348
 118# else
 119#  warning "seccomp syscall number unknown for this architecture"
 120#  define __NR_seccomp 0xffff
 121# endif
 122#endif
 123
 124#ifndef SECCOMP_SET_MODE_STRICT
 125#define SECCOMP_SET_MODE_STRICT 0
 126#endif
 127
 128#ifndef SECCOMP_SET_MODE_FILTER
 129#define SECCOMP_SET_MODE_FILTER 1
 130#endif
 131
 132#ifndef SECCOMP_GET_ACTION_AVAIL
 133#define SECCOMP_GET_ACTION_AVAIL 2
 134#endif
 135
 136#ifndef SECCOMP_FILTER_FLAG_TSYNC
 137#define SECCOMP_FILTER_FLAG_TSYNC 1
 138#endif
 139
 140#ifndef SECCOMP_FILTER_FLAG_LOG
 141#define SECCOMP_FILTER_FLAG_LOG 2
 142#endif
 143
 144#ifndef PTRACE_SECCOMP_GET_METADATA
 145#define PTRACE_SECCOMP_GET_METADATA     0x420d
 146
 147struct seccomp_metadata {
 148        __u64 filter_off;       /* Input: which filter */
 149        __u64 flags;             /* Output: filter's flags */
 150};
 151#endif
 152
 153#ifndef seccomp
 154int seccomp(unsigned int op, unsigned int flags, void *args)
 155{
 156        errno = 0;
 157        return syscall(__NR_seccomp, op, flags, args);
 158}
 159#endif
 160
 161#if __BYTE_ORDER == __LITTLE_ENDIAN
 162#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
 163#elif __BYTE_ORDER == __BIG_ENDIAN
 164#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
 165#else
 166#error "wut? Unknown __BYTE_ORDER?!"
 167#endif
 168
 169#define SIBLING_EXIT_UNKILLED   0xbadbeef
 170#define SIBLING_EXIT_FAILURE    0xbadface
 171#define SIBLING_EXIT_NEWPRIVS   0xbadfeed
 172
 173TEST(mode_strict_support)
 174{
 175        long ret;
 176
 177        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 178        ASSERT_EQ(0, ret) {
 179                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 180        }
 181        syscall(__NR_exit, 0);
 182}
 183
 184TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
 185{
 186        long ret;
 187
 188        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 189        ASSERT_EQ(0, ret) {
 190                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 191        }
 192        syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
 193                NULL, NULL, NULL);
 194        EXPECT_FALSE(true) {
 195                TH_LOG("Unreachable!");
 196        }
 197}
 198
 199/* Note! This doesn't test no new privs behavior */
 200TEST(no_new_privs_support)
 201{
 202        long ret;
 203
 204        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 205        EXPECT_EQ(0, ret) {
 206                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 207        }
 208}
 209
 210/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 211TEST(mode_filter_support)
 212{
 213        long ret;
 214
 215        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
 216        ASSERT_EQ(0, ret) {
 217                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 218        }
 219        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
 220        EXPECT_EQ(-1, ret);
 221        EXPECT_EQ(EFAULT, errno) {
 222                TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
 223        }
 224}
 225
 226TEST(mode_filter_without_nnp)
 227{
 228        struct sock_filter filter[] = {
 229                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 230        };
 231        struct sock_fprog prog = {
 232                .len = (unsigned short)ARRAY_SIZE(filter),
 233                .filter = filter,
 234        };
 235        long ret;
 236
 237        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
 238        ASSERT_LE(0, ret) {
 239                TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
 240        }
 241        errno = 0;
 242        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 243        /* Succeeds with CAP_SYS_ADMIN, fails without */
 244        /* TODO(wad) check caps not euid */
 245        if (geteuid()) {
 246                EXPECT_EQ(-1, ret);
 247                EXPECT_EQ(EACCES, errno);
 248        } else {
 249                EXPECT_EQ(0, ret);
 250        }
 251}
 252
 253#define MAX_INSNS_PER_PATH 32768
 254
 255TEST(filter_size_limits)
 256{
 257        int i;
 258        int count = BPF_MAXINSNS + 1;
 259        struct sock_filter allow[] = {
 260                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 261        };
 262        struct sock_filter *filter;
 263        struct sock_fprog prog = { };
 264        long ret;
 265
 266        filter = calloc(count, sizeof(*filter));
 267        ASSERT_NE(NULL, filter);
 268
 269        for (i = 0; i < count; i++)
 270                filter[i] = allow[0];
 271
 272        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 273        ASSERT_EQ(0, ret);
 274
 275        prog.filter = filter;
 276        prog.len = count;
 277
 278        /* Too many filter instructions in a single filter. */
 279        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 280        ASSERT_NE(0, ret) {
 281                TH_LOG("Installing %d insn filter was allowed", prog.len);
 282        }
 283
 284        /* One less is okay, though. */
 285        prog.len -= 1;
 286        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 287        ASSERT_EQ(0, ret) {
 288                TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
 289        }
 290}
 291
 292TEST(filter_chain_limits)
 293{
 294        int i;
 295        int count = BPF_MAXINSNS;
 296        struct sock_filter allow[] = {
 297                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 298        };
 299        struct sock_filter *filter;
 300        struct sock_fprog prog = { };
 301        long ret;
 302
 303        filter = calloc(count, sizeof(*filter));
 304        ASSERT_NE(NULL, filter);
 305
 306        for (i = 0; i < count; i++)
 307                filter[i] = allow[0];
 308
 309        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 310        ASSERT_EQ(0, ret);
 311
 312        prog.filter = filter;
 313        prog.len = 1;
 314
 315        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 316        ASSERT_EQ(0, ret);
 317
 318        prog.len = count;
 319
 320        /* Too many total filter instructions. */
 321        for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
 322                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 323                if (ret != 0)
 324                        break;
 325        }
 326        ASSERT_NE(0, ret) {
 327                TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
 328                       i, count, i * (count + 4));
 329        }
 330}
 331
 332TEST(mode_filter_cannot_move_to_strict)
 333{
 334        struct sock_filter filter[] = {
 335                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 336        };
 337        struct sock_fprog prog = {
 338                .len = (unsigned short)ARRAY_SIZE(filter),
 339                .filter = filter,
 340        };
 341        long ret;
 342
 343        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 344        ASSERT_EQ(0, ret);
 345
 346        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 347        ASSERT_EQ(0, ret);
 348
 349        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
 350        EXPECT_EQ(-1, ret);
 351        EXPECT_EQ(EINVAL, errno);
 352}
 353
 354
 355TEST(mode_filter_get_seccomp)
 356{
 357        struct sock_filter filter[] = {
 358                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 359        };
 360        struct sock_fprog prog = {
 361                .len = (unsigned short)ARRAY_SIZE(filter),
 362                .filter = filter,
 363        };
 364        long ret;
 365
 366        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 367        ASSERT_EQ(0, ret);
 368
 369        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 370        EXPECT_EQ(0, ret);
 371
 372        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 373        ASSERT_EQ(0, ret);
 374
 375        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 376        EXPECT_EQ(2, ret);
 377}
 378
 379
 380TEST(ALLOW_all)
 381{
 382        struct sock_filter filter[] = {
 383                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 384        };
 385        struct sock_fprog prog = {
 386                .len = (unsigned short)ARRAY_SIZE(filter),
 387                .filter = filter,
 388        };
 389        long ret;
 390
 391        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 392        ASSERT_EQ(0, ret);
 393
 394        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 395        ASSERT_EQ(0, ret);
 396}
 397
 398TEST(empty_prog)
 399{
 400        struct sock_filter filter[] = {
 401        };
 402        struct sock_fprog prog = {
 403                .len = (unsigned short)ARRAY_SIZE(filter),
 404                .filter = filter,
 405        };
 406        long ret;
 407
 408        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 409        ASSERT_EQ(0, ret);
 410
 411        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 412        EXPECT_EQ(-1, ret);
 413        EXPECT_EQ(EINVAL, errno);
 414}
 415
 416TEST(log_all)
 417{
 418        struct sock_filter filter[] = {
 419                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
 420        };
 421        struct sock_fprog prog = {
 422                .len = (unsigned short)ARRAY_SIZE(filter),
 423                .filter = filter,
 424        };
 425        long ret;
 426        pid_t parent = getppid();
 427
 428        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 429        ASSERT_EQ(0, ret);
 430
 431        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 432        ASSERT_EQ(0, ret);
 433
 434        /* getppid() should succeed and be logged (no check for logging) */
 435        EXPECT_EQ(parent, syscall(__NR_getppid));
 436}
 437
 438TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 439{
 440        struct sock_filter filter[] = {
 441                BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
 442        };
 443        struct sock_fprog prog = {
 444                .len = (unsigned short)ARRAY_SIZE(filter),
 445                .filter = filter,
 446        };
 447        long ret;
 448
 449        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 450        ASSERT_EQ(0, ret);
 451
 452        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 453        ASSERT_EQ(0, ret);
 454        EXPECT_EQ(0, syscall(__NR_getpid)) {
 455                TH_LOG("getpid() shouldn't ever return");
 456        }
 457}
 458
 459/* return code >= 0x80000000 is unused. */
 460TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
 461{
 462        struct sock_filter filter[] = {
 463                BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
 464        };
 465        struct sock_fprog prog = {
 466                .len = (unsigned short)ARRAY_SIZE(filter),
 467                .filter = filter,
 468        };
 469        long ret;
 470
 471        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 472        ASSERT_EQ(0, ret);
 473
 474        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 475        ASSERT_EQ(0, ret);
 476        EXPECT_EQ(0, syscall(__NR_getpid)) {
 477                TH_LOG("getpid() shouldn't ever return");
 478        }
 479}
 480
 481TEST_SIGNAL(KILL_all, SIGSYS)
 482{
 483        struct sock_filter filter[] = {
 484                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 485        };
 486        struct sock_fprog prog = {
 487                .len = (unsigned short)ARRAY_SIZE(filter),
 488                .filter = filter,
 489        };
 490        long ret;
 491
 492        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 493        ASSERT_EQ(0, ret);
 494
 495        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 496        ASSERT_EQ(0, ret);
 497}
 498
 499TEST_SIGNAL(KILL_one, SIGSYS)
 500{
 501        struct sock_filter filter[] = {
 502                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 503                        offsetof(struct seccomp_data, nr)),
 504                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 505                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 506                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 507        };
 508        struct sock_fprog prog = {
 509                .len = (unsigned short)ARRAY_SIZE(filter),
 510                .filter = filter,
 511        };
 512        long ret;
 513        pid_t parent = getppid();
 514
 515        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 516        ASSERT_EQ(0, ret);
 517
 518        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 519        ASSERT_EQ(0, ret);
 520
 521        EXPECT_EQ(parent, syscall(__NR_getppid));
 522        /* getpid() should never return. */
 523        EXPECT_EQ(0, syscall(__NR_getpid));
 524}
 525
 526TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
 527{
 528        void *fatal_address;
 529        struct sock_filter filter[] = {
 530                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 531                        offsetof(struct seccomp_data, nr)),
 532                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
 533                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 534                /* Only both with lower 32-bit for now. */
 535                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
 536                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
 537                        (unsigned long)&fatal_address, 0, 1),
 538                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 539                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 540        };
 541        struct sock_fprog prog = {
 542                .len = (unsigned short)ARRAY_SIZE(filter),
 543                .filter = filter,
 544        };
 545        long ret;
 546        pid_t parent = getppid();
 547        struct tms timebuf;
 548        clock_t clock = times(&timebuf);
 549
 550        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 551        ASSERT_EQ(0, ret);
 552
 553        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 554        ASSERT_EQ(0, ret);
 555
 556        EXPECT_EQ(parent, syscall(__NR_getppid));
 557        EXPECT_LE(clock, syscall(__NR_times, &timebuf));
 558        /* times() should never return. */
 559        EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
 560}
 561
 562TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 563{
 564#ifndef __NR_mmap2
 565        int sysno = __NR_mmap;
 566#else
 567        int sysno = __NR_mmap2;
 568#endif
 569        struct sock_filter filter[] = {
 570                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 571                        offsetof(struct seccomp_data, nr)),
 572                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
 573                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 574                /* Only both with lower 32-bit for now. */
 575                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
 576                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
 577                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 578                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 579        };
 580        struct sock_fprog prog = {
 581                .len = (unsigned short)ARRAY_SIZE(filter),
 582                .filter = filter,
 583        };
 584        long ret;
 585        pid_t parent = getppid();
 586        int fd;
 587        void *map1, *map2;
 588        int page_size = sysconf(_SC_PAGESIZE);
 589
 590        ASSERT_LT(0, page_size);
 591
 592        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 593        ASSERT_EQ(0, ret);
 594
 595        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 596        ASSERT_EQ(0, ret);
 597
 598        fd = open("/dev/zero", O_RDONLY);
 599        ASSERT_NE(-1, fd);
 600
 601        EXPECT_EQ(parent, syscall(__NR_getppid));
 602        map1 = (void *)syscall(sysno,
 603                NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 604        EXPECT_NE(MAP_FAILED, map1);
 605        /* mmap2() should never return. */
 606        map2 = (void *)syscall(sysno,
 607                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 608        EXPECT_EQ(MAP_FAILED, map2);
 609
 610        /* The test failed, so clean up the resources. */
 611        munmap(map1, page_size);
 612        munmap(map2, page_size);
 613        close(fd);
 614}
 615
 616/* This is a thread task to die via seccomp filter violation. */
 617void *kill_thread(void *data)
 618{
 619        bool die = (bool)data;
 620
 621        if (die) {
 622                prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 623                return (void *)SIBLING_EXIT_FAILURE;
 624        }
 625
 626        return (void *)SIBLING_EXIT_UNKILLED;
 627}
 628
 629/* Prepare a thread that will kill itself or both of us. */
 630void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 631{
 632        pthread_t thread;
 633        void *status;
 634        /* Kill only when calling __NR_prctl. */
 635        struct sock_filter filter_thread[] = {
 636                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 637                        offsetof(struct seccomp_data, nr)),
 638                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 639                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
 640                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 641        };
 642        struct sock_fprog prog_thread = {
 643                .len = (unsigned short)ARRAY_SIZE(filter_thread),
 644                .filter = filter_thread,
 645        };
 646        struct sock_filter filter_process[] = {
 647                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 648                        offsetof(struct seccomp_data, nr)),
 649                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 650                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
 651                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 652        };
 653        struct sock_fprog prog_process = {
 654                .len = (unsigned short)ARRAY_SIZE(filter_process),
 655                .filter = filter_process,
 656        };
 657
 658        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
 659                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 660        }
 661
 662        ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
 663                             kill_process ? &prog_process : &prog_thread));
 664
 665        /*
 666         * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
 667         * flag cannot be downgraded by a new filter.
 668         */
 669        ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
 670
 671        /* Start a thread that will exit immediately. */
 672        ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
 673        ASSERT_EQ(0, pthread_join(thread, &status));
 674        ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
 675
 676        /* Start a thread that will die immediately. */
 677        ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
 678        ASSERT_EQ(0, pthread_join(thread, &status));
 679        ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
 680
 681        /*
 682         * If we get here, only the spawned thread died. Let the parent know
 683         * the whole process didn't die (i.e. this thread, the spawner,
 684         * stayed running).
 685         */
 686        exit(42);
 687}
 688
 689TEST(KILL_thread)
 690{
 691        int status;
 692        pid_t child_pid;
 693
 694        child_pid = fork();
 695        ASSERT_LE(0, child_pid);
 696        if (child_pid == 0) {
 697                kill_thread_or_group(_metadata, false);
 698                _exit(38);
 699        }
 700
 701        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 702
 703        /* If only the thread was killed, we'll see exit 42. */
 704        ASSERT_TRUE(WIFEXITED(status));
 705        ASSERT_EQ(42, WEXITSTATUS(status));
 706}
 707
 708TEST(KILL_process)
 709{
 710        int status;
 711        pid_t child_pid;
 712
 713        child_pid = fork();
 714        ASSERT_LE(0, child_pid);
 715        if (child_pid == 0) {
 716                kill_thread_or_group(_metadata, true);
 717                _exit(38);
 718        }
 719
 720        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 721
 722        /* If the entire process was killed, we'll see SIGSYS. */
 723        ASSERT_TRUE(WIFSIGNALED(status));
 724        ASSERT_EQ(SIGSYS, WTERMSIG(status));
 725}
 726
 727/* TODO(wad) add 64-bit versus 32-bit arg tests. */
 728TEST(arg_out_of_range)
 729{
 730        struct sock_filter filter[] = {
 731                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
 732                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 733        };
 734        struct sock_fprog prog = {
 735                .len = (unsigned short)ARRAY_SIZE(filter),
 736                .filter = filter,
 737        };
 738        long ret;
 739
 740        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 741        ASSERT_EQ(0, ret);
 742
 743        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 744        EXPECT_EQ(-1, ret);
 745        EXPECT_EQ(EINVAL, errno);
 746}
 747
 748#define ERRNO_FILTER(name, errno)                                       \
 749        struct sock_filter _read_filter_##name[] = {                    \
 750                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
 751                        offsetof(struct seccomp_data, nr)),             \
 752                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
 753                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
 754                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
 755        };                                                              \
 756        struct sock_fprog prog_##name = {                               \
 757                .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
 758                .filter = _read_filter_##name,                          \
 759        }
 760
 761/* Make sure basic errno values are correctly passed through a filter. */
 762TEST(ERRNO_valid)
 763{
 764        ERRNO_FILTER(valid, E2BIG);
 765        long ret;
 766        pid_t parent = getppid();
 767
 768        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 769        ASSERT_EQ(0, ret);
 770
 771        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 772        ASSERT_EQ(0, ret);
 773
 774        EXPECT_EQ(parent, syscall(__NR_getppid));
 775        EXPECT_EQ(-1, read(0, NULL, 0));
 776        EXPECT_EQ(E2BIG, errno);
 777}
 778
 779/* Make sure an errno of zero is correctly handled by the arch code. */
 780TEST(ERRNO_zero)
 781{
 782        ERRNO_FILTER(zero, 0);
 783        long ret;
 784        pid_t parent = getppid();
 785
 786        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 787        ASSERT_EQ(0, ret);
 788
 789        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 790        ASSERT_EQ(0, ret);
 791
 792        EXPECT_EQ(parent, syscall(__NR_getppid));
 793        /* "errno" of 0 is ok. */
 794        EXPECT_EQ(0, read(0, NULL, 0));
 795}
 796
 797/*
 798 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
 799 * This tests that the errno value gets capped correctly, fixed by
 800 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
 801 */
 802TEST(ERRNO_capped)
 803{
 804        ERRNO_FILTER(capped, 4096);
 805        long ret;
 806        pid_t parent = getppid();
 807
 808        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 809        ASSERT_EQ(0, ret);
 810
 811        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 812        ASSERT_EQ(0, ret);
 813
 814        EXPECT_EQ(parent, syscall(__NR_getppid));
 815        EXPECT_EQ(-1, read(0, NULL, 0));
 816        EXPECT_EQ(4095, errno);
 817}
 818
 819/*
 820 * Filters are processed in reverse order: last applied is executed first.
 821 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
 822 * SECCOMP_RET_DATA mask results will follow the most recently applied
 823 * matching filter return (and not the lowest or highest value).
 824 */
 825TEST(ERRNO_order)
 826{
 827        ERRNO_FILTER(first,  11);
 828        ERRNO_FILTER(second, 13);
 829        ERRNO_FILTER(third,  12);
 830        long ret;
 831        pid_t parent = getppid();
 832
 833        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 834        ASSERT_EQ(0, ret);
 835
 836        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
 837        ASSERT_EQ(0, ret);
 838
 839        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
 840        ASSERT_EQ(0, ret);
 841
 842        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
 843        ASSERT_EQ(0, ret);
 844
 845        EXPECT_EQ(parent, syscall(__NR_getppid));
 846        EXPECT_EQ(-1, read(0, NULL, 0));
 847        EXPECT_EQ(12, errno);
 848}
 849
 850FIXTURE_DATA(TRAP) {
 851        struct sock_fprog prog;
 852};
 853
 854FIXTURE_SETUP(TRAP)
 855{
 856        struct sock_filter filter[] = {
 857                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 858                        offsetof(struct seccomp_data, nr)),
 859                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 860                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 861                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 862        };
 863
 864        memset(&self->prog, 0, sizeof(self->prog));
 865        self->prog.filter = malloc(sizeof(filter));
 866        ASSERT_NE(NULL, self->prog.filter);
 867        memcpy(self->prog.filter, filter, sizeof(filter));
 868        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
 869}
 870
 871FIXTURE_TEARDOWN(TRAP)
 872{
 873        if (self->prog.filter)
 874                free(self->prog.filter);
 875}
 876
 877TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
 878{
 879        long ret;
 880
 881        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 882        ASSERT_EQ(0, ret);
 883
 884        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 885        ASSERT_EQ(0, ret);
 886        syscall(__NR_getpid);
 887}
 888
 889/* Ensure that SIGSYS overrides SIG_IGN */
 890TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 891{
 892        long ret;
 893
 894        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 895        ASSERT_EQ(0, ret);
 896
 897        signal(SIGSYS, SIG_IGN);
 898
 899        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 900        ASSERT_EQ(0, ret);
 901        syscall(__NR_getpid);
 902}
 903
 904static siginfo_t TRAP_info;
 905static volatile int TRAP_nr;
 906static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 907{
 908        memcpy(&TRAP_info, info, sizeof(TRAP_info));
 909        TRAP_nr = nr;
 910}
 911
 912TEST_F(TRAP, handler)
 913{
 914        int ret, test;
 915        struct sigaction act;
 916        sigset_t mask;
 917
 918        memset(&act, 0, sizeof(act));
 919        sigemptyset(&mask);
 920        sigaddset(&mask, SIGSYS);
 921
 922        act.sa_sigaction = &TRAP_action;
 923        act.sa_flags = SA_SIGINFO;
 924        ret = sigaction(SIGSYS, &act, NULL);
 925        ASSERT_EQ(0, ret) {
 926                TH_LOG("sigaction failed");
 927        }
 928        ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
 929        ASSERT_EQ(0, ret) {
 930                TH_LOG("sigprocmask failed");
 931        }
 932
 933        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 934        ASSERT_EQ(0, ret);
 935        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 936        ASSERT_EQ(0, ret);
 937        TRAP_nr = 0;
 938        memset(&TRAP_info, 0, sizeof(TRAP_info));
 939        /* Expect the registers to be rolled back. (nr = error) may vary
 940         * based on arch. */
 941        ret = syscall(__NR_getpid);
 942        /* Silence gcc warning about volatile. */
 943        test = TRAP_nr;
 944        EXPECT_EQ(SIGSYS, test);
 945        struct local_sigsys {
 946                void *_call_addr;       /* calling user insn */
 947                int _syscall;           /* triggering system call number */
 948                unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
 949        } *sigsys = (struct local_sigsys *)
 950#ifdef si_syscall
 951                &(TRAP_info.si_call_addr);
 952#else
 953                &TRAP_info.si_pid;
 954#endif
 955        EXPECT_EQ(__NR_getpid, sigsys->_syscall);
 956        /* Make sure arch is non-zero. */
 957        EXPECT_NE(0, sigsys->_arch);
 958        EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
 959}
 960
 961FIXTURE_DATA(precedence) {
 962        struct sock_fprog allow;
 963        struct sock_fprog log;
 964        struct sock_fprog trace;
 965        struct sock_fprog error;
 966        struct sock_fprog trap;
 967        struct sock_fprog kill;
 968};
 969
 970FIXTURE_SETUP(precedence)
 971{
 972        struct sock_filter allow_insns[] = {
 973                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 974        };
 975        struct sock_filter log_insns[] = {
 976                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 977                        offsetof(struct seccomp_data, nr)),
 978                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 979                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 980                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
 981        };
 982        struct sock_filter trace_insns[] = {
 983                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 984                        offsetof(struct seccomp_data, nr)),
 985                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 986                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 987                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
 988        };
 989        struct sock_filter error_insns[] = {
 990                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 991                        offsetof(struct seccomp_data, nr)),
 992                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 993                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 994                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
 995        };
 996        struct sock_filter trap_insns[] = {
 997                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 998                        offsetof(struct seccomp_data, nr)),
 999                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1000                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1001                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1002        };
1003        struct sock_filter kill_insns[] = {
1004                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1005                        offsetof(struct seccomp_data, nr)),
1006                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1007                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1008                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1009        };
1010
1011        memset(self, 0, sizeof(*self));
1012#define FILTER_ALLOC(_x) \
1013        self->_x.filter = malloc(sizeof(_x##_insns)); \
1014        ASSERT_NE(NULL, self->_x.filter); \
1015        memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1016        self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1017        FILTER_ALLOC(allow);
1018        FILTER_ALLOC(log);
1019        FILTER_ALLOC(trace);
1020        FILTER_ALLOC(error);
1021        FILTER_ALLOC(trap);
1022        FILTER_ALLOC(kill);
1023}
1024
1025FIXTURE_TEARDOWN(precedence)
1026{
1027#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1028        FILTER_FREE(allow);
1029        FILTER_FREE(log);
1030        FILTER_FREE(trace);
1031        FILTER_FREE(error);
1032        FILTER_FREE(trap);
1033        FILTER_FREE(kill);
1034}
1035
1036TEST_F(precedence, allow_ok)
1037{
1038        pid_t parent, res = 0;
1039        long ret;
1040
1041        parent = getppid();
1042        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1043        ASSERT_EQ(0, ret);
1044
1045        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1046        ASSERT_EQ(0, ret);
1047        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1048        ASSERT_EQ(0, ret);
1049        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1050        ASSERT_EQ(0, ret);
1051        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1052        ASSERT_EQ(0, ret);
1053        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1054        ASSERT_EQ(0, ret);
1055        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1056        ASSERT_EQ(0, ret);
1057        /* Should work just fine. */
1058        res = syscall(__NR_getppid);
1059        EXPECT_EQ(parent, res);
1060}
1061
1062TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1063{
1064        pid_t parent, res = 0;
1065        long ret;
1066
1067        parent = getppid();
1068        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1069        ASSERT_EQ(0, ret);
1070
1071        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1072        ASSERT_EQ(0, ret);
1073        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1074        ASSERT_EQ(0, ret);
1075        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1076        ASSERT_EQ(0, ret);
1077        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1078        ASSERT_EQ(0, ret);
1079        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1080        ASSERT_EQ(0, ret);
1081        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1082        ASSERT_EQ(0, ret);
1083        /* Should work just fine. */
1084        res = syscall(__NR_getppid);
1085        EXPECT_EQ(parent, res);
1086        /* getpid() should never return. */
1087        res = syscall(__NR_getpid);
1088        EXPECT_EQ(0, res);
1089}
1090
1091TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1092{
1093        pid_t parent;
1094        long ret;
1095
1096        parent = getppid();
1097        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1098        ASSERT_EQ(0, ret);
1099
1100        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1101        ASSERT_EQ(0, ret);
1102        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1103        ASSERT_EQ(0, ret);
1104        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1105        ASSERT_EQ(0, ret);
1106        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1107        ASSERT_EQ(0, ret);
1108        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1109        ASSERT_EQ(0, ret);
1110        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1111        ASSERT_EQ(0, ret);
1112        /* Should work just fine. */
1113        EXPECT_EQ(parent, syscall(__NR_getppid));
1114        /* getpid() should never return. */
1115        EXPECT_EQ(0, syscall(__NR_getpid));
1116}
1117
1118TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1119{
1120        pid_t parent;
1121        long ret;
1122
1123        parent = getppid();
1124        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1125        ASSERT_EQ(0, ret);
1126
1127        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1128        ASSERT_EQ(0, ret);
1129        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1130        ASSERT_EQ(0, ret);
1131        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1132        ASSERT_EQ(0, ret);
1133        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1134        ASSERT_EQ(0, ret);
1135        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1136        ASSERT_EQ(0, ret);
1137        /* Should work just fine. */
1138        EXPECT_EQ(parent, syscall(__NR_getppid));
1139        /* getpid() should never return. */
1140        EXPECT_EQ(0, syscall(__NR_getpid));
1141}
1142
1143TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1144{
1145        pid_t parent;
1146        long ret;
1147
1148        parent = getppid();
1149        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1150        ASSERT_EQ(0, ret);
1151
1152        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1153        ASSERT_EQ(0, ret);
1154        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1155        ASSERT_EQ(0, ret);
1156        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1157        ASSERT_EQ(0, ret);
1158        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1159        ASSERT_EQ(0, ret);
1160        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1161        ASSERT_EQ(0, ret);
1162        /* Should work just fine. */
1163        EXPECT_EQ(parent, syscall(__NR_getppid));
1164        /* getpid() should never return. */
1165        EXPECT_EQ(0, syscall(__NR_getpid));
1166}
1167
1168TEST_F(precedence, errno_is_third)
1169{
1170        pid_t parent;
1171        long ret;
1172
1173        parent = getppid();
1174        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1175        ASSERT_EQ(0, ret);
1176
1177        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1178        ASSERT_EQ(0, ret);
1179        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1180        ASSERT_EQ(0, ret);
1181        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1182        ASSERT_EQ(0, ret);
1183        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1184        ASSERT_EQ(0, ret);
1185        /* Should work just fine. */
1186        EXPECT_EQ(parent, syscall(__NR_getppid));
1187        EXPECT_EQ(0, syscall(__NR_getpid));
1188}
1189
1190TEST_F(precedence, errno_is_third_in_any_order)
1191{
1192        pid_t parent;
1193        long ret;
1194
1195        parent = getppid();
1196        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1197        ASSERT_EQ(0, ret);
1198
1199        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1200        ASSERT_EQ(0, ret);
1201        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1202        ASSERT_EQ(0, ret);
1203        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1204        ASSERT_EQ(0, ret);
1205        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1206        ASSERT_EQ(0, ret);
1207        /* Should work just fine. */
1208        EXPECT_EQ(parent, syscall(__NR_getppid));
1209        EXPECT_EQ(0, syscall(__NR_getpid));
1210}
1211
1212TEST_F(precedence, trace_is_fourth)
1213{
1214        pid_t parent;
1215        long ret;
1216
1217        parent = getppid();
1218        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1219        ASSERT_EQ(0, ret);
1220
1221        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1222        ASSERT_EQ(0, ret);
1223        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1224        ASSERT_EQ(0, ret);
1225        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1226        ASSERT_EQ(0, ret);
1227        /* Should work just fine. */
1228        EXPECT_EQ(parent, syscall(__NR_getppid));
1229        /* No ptracer */
1230        EXPECT_EQ(-1, syscall(__NR_getpid));
1231}
1232
1233TEST_F(precedence, trace_is_fourth_in_any_order)
1234{
1235        pid_t parent;
1236        long ret;
1237
1238        parent = getppid();
1239        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1240        ASSERT_EQ(0, ret);
1241
1242        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1243        ASSERT_EQ(0, ret);
1244        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1245        ASSERT_EQ(0, ret);
1246        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1247        ASSERT_EQ(0, ret);
1248        /* Should work just fine. */
1249        EXPECT_EQ(parent, syscall(__NR_getppid));
1250        /* No ptracer */
1251        EXPECT_EQ(-1, syscall(__NR_getpid));
1252}
1253
1254TEST_F(precedence, log_is_fifth)
1255{
1256        pid_t mypid, parent;
1257        long ret;
1258
1259        mypid = getpid();
1260        parent = getppid();
1261        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1262        ASSERT_EQ(0, ret);
1263
1264        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1265        ASSERT_EQ(0, ret);
1266        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1267        ASSERT_EQ(0, ret);
1268        /* Should work just fine. */
1269        EXPECT_EQ(parent, syscall(__NR_getppid));
1270        /* Should also work just fine */
1271        EXPECT_EQ(mypid, syscall(__NR_getpid));
1272}
1273
1274TEST_F(precedence, log_is_fifth_in_any_order)
1275{
1276        pid_t mypid, parent;
1277        long ret;
1278
1279        mypid = getpid();
1280        parent = getppid();
1281        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1282        ASSERT_EQ(0, ret);
1283
1284        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1285        ASSERT_EQ(0, ret);
1286        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1287        ASSERT_EQ(0, ret);
1288        /* Should work just fine. */
1289        EXPECT_EQ(parent, syscall(__NR_getppid));
1290        /* Should also work just fine */
1291        EXPECT_EQ(mypid, syscall(__NR_getpid));
1292}
1293
1294#ifndef PTRACE_O_TRACESECCOMP
1295#define PTRACE_O_TRACESECCOMP   0x00000080
1296#endif
1297
1298/* Catch the Ubuntu 12.04 value error. */
1299#if PTRACE_EVENT_SECCOMP != 7
1300#undef PTRACE_EVENT_SECCOMP
1301#endif
1302
1303#ifndef PTRACE_EVENT_SECCOMP
1304#define PTRACE_EVENT_SECCOMP 7
1305#endif
1306
1307#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1308bool tracer_running;
1309void tracer_stop(int sig)
1310{
1311        tracer_running = false;
1312}
1313
1314typedef void tracer_func_t(struct __test_metadata *_metadata,
1315                           pid_t tracee, int status, void *args);
1316
1317void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1318            tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1319{
1320        int ret = -1;
1321        struct sigaction action = {
1322                .sa_handler = tracer_stop,
1323        };
1324
1325        /* Allow external shutdown. */
1326        tracer_running = true;
1327        ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1328
1329        errno = 0;
1330        while (ret == -1 && errno != EINVAL)
1331                ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1332        ASSERT_EQ(0, ret) {
1333                kill(tracee, SIGKILL);
1334        }
1335        /* Wait for attach stop */
1336        wait(NULL);
1337
1338        ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1339                                                      PTRACE_O_TRACESYSGOOD :
1340                                                      PTRACE_O_TRACESECCOMP);
1341        ASSERT_EQ(0, ret) {
1342                TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1343                kill(tracee, SIGKILL);
1344        }
1345        ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1346                     tracee, NULL, 0);
1347        ASSERT_EQ(0, ret);
1348
1349        /* Unblock the tracee */
1350        ASSERT_EQ(1, write(fd, "A", 1));
1351        ASSERT_EQ(0, close(fd));
1352
1353        /* Run until we're shut down. Must assert to stop execution. */
1354        while (tracer_running) {
1355                int status;
1356
1357                if (wait(&status) != tracee)
1358                        continue;
1359                if (WIFSIGNALED(status) || WIFEXITED(status))
1360                        /* Child is dead. Time to go. */
1361                        return;
1362
1363                /* Check if this is a seccomp event. */
1364                ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1365
1366                tracer_func(_metadata, tracee, status, args);
1367
1368                ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1369                             tracee, NULL, 0);
1370                ASSERT_EQ(0, ret);
1371        }
1372        /* Directly report the status of our test harness results. */
1373        syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1374}
1375
1376/* Common tracer setup/teardown functions. */
1377void cont_handler(int num)
1378{ }
1379pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1380                          tracer_func_t func, void *args, bool ptrace_syscall)
1381{
1382        char sync;
1383        int pipefd[2];
1384        pid_t tracer_pid;
1385        pid_t tracee = getpid();
1386
1387        /* Setup a pipe for clean synchronization. */
1388        ASSERT_EQ(0, pipe(pipefd));
1389
1390        /* Fork a child which we'll promote to tracer */
1391        tracer_pid = fork();
1392        ASSERT_LE(0, tracer_pid);
1393        signal(SIGALRM, cont_handler);
1394        if (tracer_pid == 0) {
1395                close(pipefd[0]);
1396                start_tracer(_metadata, pipefd[1], tracee, func, args,
1397                             ptrace_syscall);
1398                syscall(__NR_exit, 0);
1399        }
1400        close(pipefd[1]);
1401        prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1402        read(pipefd[0], &sync, 1);
1403        close(pipefd[0]);
1404
1405        return tracer_pid;
1406}
1407void teardown_trace_fixture(struct __test_metadata *_metadata,
1408                            pid_t tracer)
1409{
1410        if (tracer) {
1411                int status;
1412                /*
1413                 * Extract the exit code from the other process and
1414                 * adopt it for ourselves in case its asserts failed.
1415                 */
1416                ASSERT_EQ(0, kill(tracer, SIGUSR1));
1417                ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1418                if (WEXITSTATUS(status))
1419                        _metadata->passed = 0;
1420        }
1421}
1422
1423/* "poke" tracer arguments and function. */
1424struct tracer_args_poke_t {
1425        unsigned long poke_addr;
1426};
1427
1428void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1429                 void *args)
1430{
1431        int ret;
1432        unsigned long msg;
1433        struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1434
1435        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1436        EXPECT_EQ(0, ret);
1437        /* If this fails, don't try to recover. */
1438        ASSERT_EQ(0x1001, msg) {
1439                kill(tracee, SIGKILL);
1440        }
1441        /*
1442         * Poke in the message.
1443         * Registers are not touched to try to keep this relatively arch
1444         * agnostic.
1445         */
1446        ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1447        EXPECT_EQ(0, ret);
1448}
1449
1450FIXTURE_DATA(TRACE_poke) {
1451        struct sock_fprog prog;
1452        pid_t tracer;
1453        long poked;
1454        struct tracer_args_poke_t tracer_args;
1455};
1456
1457FIXTURE_SETUP(TRACE_poke)
1458{
1459        struct sock_filter filter[] = {
1460                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1461                        offsetof(struct seccomp_data, nr)),
1462                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1463                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1464                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1465        };
1466
1467        self->poked = 0;
1468        memset(&self->prog, 0, sizeof(self->prog));
1469        self->prog.filter = malloc(sizeof(filter));
1470        ASSERT_NE(NULL, self->prog.filter);
1471        memcpy(self->prog.filter, filter, sizeof(filter));
1472        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1473
1474        /* Set up tracer args. */
1475        self->tracer_args.poke_addr = (unsigned long)&self->poked;
1476
1477        /* Launch tracer. */
1478        self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1479                                           &self->tracer_args, false);
1480}
1481
1482FIXTURE_TEARDOWN(TRACE_poke)
1483{
1484        teardown_trace_fixture(_metadata, self->tracer);
1485        if (self->prog.filter)
1486                free(self->prog.filter);
1487}
1488
1489TEST_F(TRACE_poke, read_has_side_effects)
1490{
1491        ssize_t ret;
1492
1493        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1494        ASSERT_EQ(0, ret);
1495
1496        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1497        ASSERT_EQ(0, ret);
1498
1499        EXPECT_EQ(0, self->poked);
1500        ret = read(-1, NULL, 0);
1501        EXPECT_EQ(-1, ret);
1502        EXPECT_EQ(0x1001, self->poked);
1503}
1504
1505TEST_F(TRACE_poke, getpid_runs_normally)
1506{
1507        long ret;
1508
1509        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1510        ASSERT_EQ(0, ret);
1511
1512        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1513        ASSERT_EQ(0, ret);
1514
1515        EXPECT_EQ(0, self->poked);
1516        EXPECT_NE(0, syscall(__NR_getpid));
1517        EXPECT_EQ(0, self->poked);
1518}
1519
1520#if defined(__x86_64__)
1521# define ARCH_REGS      struct user_regs_struct
1522# define SYSCALL_NUM    orig_rax
1523# define SYSCALL_RET    rax
1524#elif defined(__i386__)
1525# define ARCH_REGS      struct user_regs_struct
1526# define SYSCALL_NUM    orig_eax
1527# define SYSCALL_RET    eax
1528#elif defined(__arm__)
1529# define ARCH_REGS      struct pt_regs
1530# define SYSCALL_NUM    ARM_r7
1531# define SYSCALL_RET    ARM_r0
1532#elif defined(__aarch64__)
1533# define ARCH_REGS      struct user_pt_regs
1534# define SYSCALL_NUM    regs[8]
1535# define SYSCALL_RET    regs[0]
1536#elif defined(__hppa__)
1537# define ARCH_REGS      struct user_regs_struct
1538# define SYSCALL_NUM    gr[20]
1539# define SYSCALL_RET    gr[28]
1540#elif defined(__powerpc__)
1541# define ARCH_REGS      struct pt_regs
1542# define SYSCALL_NUM    gpr[0]
1543# define SYSCALL_RET    gpr[3]
1544#elif defined(__s390__)
1545# define ARCH_REGS     s390_regs
1546# define SYSCALL_NUM   gprs[2]
1547# define SYSCALL_RET   gprs[2]
1548#elif defined(__mips__)
1549# define ARCH_REGS      struct pt_regs
1550# define SYSCALL_NUM    regs[2]
1551# define SYSCALL_SYSCALL_NUM regs[4]
1552# define SYSCALL_RET    regs[2]
1553# define SYSCALL_NUM_RET_SHARE_REG
1554#else
1555# error "Do not know how to find your architecture's registers and syscalls"
1556#endif
1557
1558/* When the syscall return can't be changed, stub out the tests for it. */
1559#ifdef SYSCALL_NUM_RET_SHARE_REG
1560# define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1561#else
1562# define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(val, action)
1563#endif
1564
1565/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1566 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1567 */
1568#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1569#define HAVE_GETREGS
1570#endif
1571
1572/* Architecture-specific syscall fetching routine. */
1573int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1574{
1575        ARCH_REGS regs;
1576#ifdef HAVE_GETREGS
1577        EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1578                TH_LOG("PTRACE_GETREGS failed");
1579                return -1;
1580        }
1581#else
1582        struct iovec iov;
1583
1584        iov.iov_base = &regs;
1585        iov.iov_len = sizeof(regs);
1586        EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1587                TH_LOG("PTRACE_GETREGSET failed");
1588                return -1;
1589        }
1590#endif
1591
1592#if defined(__mips__)
1593        if (regs.SYSCALL_NUM == __NR_O32_Linux)
1594                return regs.SYSCALL_SYSCALL_NUM;
1595#endif
1596        return regs.SYSCALL_NUM;
1597}
1598
1599/* Architecture-specific syscall changing routine. */
1600void change_syscall(struct __test_metadata *_metadata,
1601                    pid_t tracee, int syscall)
1602{
1603        int ret;
1604        ARCH_REGS regs;
1605#ifdef HAVE_GETREGS
1606        ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1607#else
1608        struct iovec iov;
1609        iov.iov_base = &regs;
1610        iov.iov_len = sizeof(regs);
1611        ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1612#endif
1613        EXPECT_EQ(0, ret) {}
1614
1615#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1616    defined(__s390__) || defined(__hppa__)
1617        {
1618                regs.SYSCALL_NUM = syscall;
1619        }
1620#elif defined(__mips__)
1621        {
1622                if (regs.SYSCALL_NUM == __NR_O32_Linux)
1623                        regs.SYSCALL_SYSCALL_NUM = syscall;
1624                else
1625                        regs.SYSCALL_NUM = syscall;
1626        }
1627
1628#elif defined(__arm__)
1629# ifndef PTRACE_SET_SYSCALL
1630#  define PTRACE_SET_SYSCALL   23
1631# endif
1632        {
1633                ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1634                EXPECT_EQ(0, ret);
1635        }
1636
1637#elif defined(__aarch64__)
1638# ifndef NT_ARM_SYSTEM_CALL
1639#  define NT_ARM_SYSTEM_CALL 0x404
1640# endif
1641        {
1642                iov.iov_base = &syscall;
1643                iov.iov_len = sizeof(syscall);
1644                ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1645                             &iov);
1646                EXPECT_EQ(0, ret);
1647        }
1648
1649#else
1650        ASSERT_EQ(1, 0) {
1651                TH_LOG("How is the syscall changed on this architecture?");
1652        }
1653#endif
1654
1655        /* If syscall is skipped, change return value. */
1656        if (syscall == -1)
1657#ifdef SYSCALL_NUM_RET_SHARE_REG
1658                TH_LOG("Can't modify syscall return on this architecture");
1659#else
1660                regs.SYSCALL_RET = EPERM;
1661#endif
1662
1663#ifdef HAVE_GETREGS
1664        ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1665#else
1666        iov.iov_base = &regs;
1667        iov.iov_len = sizeof(regs);
1668        ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1669#endif
1670        EXPECT_EQ(0, ret);
1671}
1672
1673void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1674                    int status, void *args)
1675{
1676        int ret;
1677        unsigned long msg;
1678
1679        /* Make sure we got the right message. */
1680        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1681        EXPECT_EQ(0, ret);
1682
1683        /* Validate and take action on expected syscalls. */
1684        switch (msg) {
1685        case 0x1002:
1686                /* change getpid to getppid. */
1687                EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1688                change_syscall(_metadata, tracee, __NR_getppid);
1689                break;
1690        case 0x1003:
1691                /* skip gettid. */
1692                EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1693                change_syscall(_metadata, tracee, -1);
1694                break;
1695        case 0x1004:
1696                /* do nothing (allow getppid) */
1697                EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1698                break;
1699        default:
1700                EXPECT_EQ(0, msg) {
1701                        TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1702                        kill(tracee, SIGKILL);
1703                }
1704        }
1705
1706}
1707
1708void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1709                   int status, void *args)
1710{
1711        int ret, nr;
1712        unsigned long msg;
1713        static bool entry;
1714
1715        /* Make sure we got an empty message. */
1716        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1717        EXPECT_EQ(0, ret);
1718        EXPECT_EQ(0, msg);
1719
1720        /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1721        entry = !entry;
1722        if (!entry)
1723                return;
1724
1725        nr = get_syscall(_metadata, tracee);
1726
1727        if (nr == __NR_getpid)
1728                change_syscall(_metadata, tracee, __NR_getppid);
1729        if (nr == __NR_openat)
1730                change_syscall(_metadata, tracee, -1);
1731}
1732
1733FIXTURE_DATA(TRACE_syscall) {
1734        struct sock_fprog prog;
1735        pid_t tracer, mytid, mypid, parent;
1736};
1737
1738FIXTURE_SETUP(TRACE_syscall)
1739{
1740        struct sock_filter filter[] = {
1741                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1742                        offsetof(struct seccomp_data, nr)),
1743                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1744                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1745                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1746                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1747                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1748                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1749                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1750        };
1751
1752        memset(&self->prog, 0, sizeof(self->prog));
1753        self->prog.filter = malloc(sizeof(filter));
1754        ASSERT_NE(NULL, self->prog.filter);
1755        memcpy(self->prog.filter, filter, sizeof(filter));
1756        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1757
1758        /* Prepare some testable syscall results. */
1759        self->mytid = syscall(__NR_gettid);
1760        ASSERT_GT(self->mytid, 0);
1761        ASSERT_NE(self->mytid, 1) {
1762                TH_LOG("Running this test as init is not supported. :)");
1763        }
1764
1765        self->mypid = getpid();
1766        ASSERT_GT(self->mypid, 0);
1767        ASSERT_EQ(self->mytid, self->mypid);
1768
1769        self->parent = getppid();
1770        ASSERT_GT(self->parent, 0);
1771        ASSERT_NE(self->parent, self->mypid);
1772
1773        /* Launch tracer. */
1774        self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1775                                           false);
1776}
1777
1778FIXTURE_TEARDOWN(TRACE_syscall)
1779{
1780        teardown_trace_fixture(_metadata, self->tracer);
1781        if (self->prog.filter)
1782                free(self->prog.filter);
1783}
1784
1785TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1786{
1787        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1788        teardown_trace_fixture(_metadata, self->tracer);
1789        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1790                                           true);
1791
1792        /* Tracer will redirect getpid to getppid. */
1793        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1794}
1795
1796TEST_F(TRACE_syscall, ptrace_syscall_dropped)
1797{
1798        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1799        teardown_trace_fixture(_metadata, self->tracer);
1800        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1801                                           true);
1802
1803        /* Tracer should skip the open syscall, resulting in EPERM. */
1804        EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat));
1805}
1806
1807TEST_F(TRACE_syscall, syscall_allowed)
1808{
1809        long ret;
1810
1811        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1812        ASSERT_EQ(0, ret);
1813
1814        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1815        ASSERT_EQ(0, ret);
1816
1817        /* getppid works as expected (no changes). */
1818        EXPECT_EQ(self->parent, syscall(__NR_getppid));
1819        EXPECT_NE(self->mypid, syscall(__NR_getppid));
1820}
1821
1822TEST_F(TRACE_syscall, syscall_redirected)
1823{
1824        long ret;
1825
1826        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1827        ASSERT_EQ(0, ret);
1828
1829        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1830        ASSERT_EQ(0, ret);
1831
1832        /* getpid has been redirected to getppid as expected. */
1833        EXPECT_EQ(self->parent, syscall(__NR_getpid));
1834        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1835}
1836
1837TEST_F(TRACE_syscall, syscall_dropped)
1838{
1839        long ret;
1840
1841        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1842        ASSERT_EQ(0, ret);
1843
1844        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1845        ASSERT_EQ(0, ret);
1846
1847        /* gettid has been skipped and an altered return value stored. */
1848        EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
1849        EXPECT_NE(self->mytid, syscall(__NR_gettid));
1850}
1851
1852TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1853{
1854        struct sock_filter filter[] = {
1855                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1856                        offsetof(struct seccomp_data, nr)),
1857                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1858                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1859                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1860        };
1861        struct sock_fprog prog = {
1862                .len = (unsigned short)ARRAY_SIZE(filter),
1863                .filter = filter,
1864        };
1865        long ret;
1866
1867        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1868        ASSERT_EQ(0, ret);
1869
1870        /* Install fixture filter. */
1871        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1872        ASSERT_EQ(0, ret);
1873
1874        /* Install "errno on getppid" filter. */
1875        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1876        ASSERT_EQ(0, ret);
1877
1878        /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1879        errno = 0;
1880        EXPECT_EQ(-1, syscall(__NR_getpid));
1881        EXPECT_EQ(EPERM, errno);
1882}
1883
1884TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1885{
1886        struct sock_filter filter[] = {
1887                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1888                        offsetof(struct seccomp_data, nr)),
1889                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1890                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1891                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1892        };
1893        struct sock_fprog prog = {
1894                .len = (unsigned short)ARRAY_SIZE(filter),
1895                .filter = filter,
1896        };
1897        long ret;
1898
1899        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1900        ASSERT_EQ(0, ret);
1901
1902        /* Install fixture filter. */
1903        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1904        ASSERT_EQ(0, ret);
1905
1906        /* Install "death on getppid" filter. */
1907        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1908        ASSERT_EQ(0, ret);
1909
1910        /* Tracer will redirect getpid to getppid, and we should die. */
1911        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1912}
1913
1914TEST_F(TRACE_syscall, skip_after_ptrace)
1915{
1916        struct sock_filter filter[] = {
1917                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1918                        offsetof(struct seccomp_data, nr)),
1919                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1920                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1921                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1922        };
1923        struct sock_fprog prog = {
1924                .len = (unsigned short)ARRAY_SIZE(filter),
1925                .filter = filter,
1926        };
1927        long ret;
1928
1929        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1930        teardown_trace_fixture(_metadata, self->tracer);
1931        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1932                                           true);
1933
1934        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1935        ASSERT_EQ(0, ret);
1936
1937        /* Install "errno on getppid" filter. */
1938        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1939        ASSERT_EQ(0, ret);
1940
1941        /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1942        EXPECT_EQ(-1, syscall(__NR_getpid));
1943        EXPECT_EQ(EPERM, errno);
1944}
1945
1946TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1947{
1948        struct sock_filter filter[] = {
1949                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1950                        offsetof(struct seccomp_data, nr)),
1951                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1952                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1953                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1954        };
1955        struct sock_fprog prog = {
1956                .len = (unsigned short)ARRAY_SIZE(filter),
1957                .filter = filter,
1958        };
1959        long ret;
1960
1961        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1962        teardown_trace_fixture(_metadata, self->tracer);
1963        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1964                                           true);
1965
1966        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1967        ASSERT_EQ(0, ret);
1968
1969        /* Install "death on getppid" filter. */
1970        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1971        ASSERT_EQ(0, ret);
1972
1973        /* Tracer will redirect getpid to getppid, and we should die. */
1974        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1975}
1976
1977TEST(seccomp_syscall)
1978{
1979        struct sock_filter filter[] = {
1980                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1981        };
1982        struct sock_fprog prog = {
1983                .len = (unsigned short)ARRAY_SIZE(filter),
1984                .filter = filter,
1985        };
1986        long ret;
1987
1988        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1989        ASSERT_EQ(0, ret) {
1990                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1991        }
1992
1993        /* Reject insane operation. */
1994        ret = seccomp(-1, 0, &prog);
1995        ASSERT_NE(ENOSYS, errno) {
1996                TH_LOG("Kernel does not support seccomp syscall!");
1997        }
1998        EXPECT_EQ(EINVAL, errno) {
1999                TH_LOG("Did not reject crazy op value!");
2000        }
2001
2002        /* Reject strict with flags or pointer. */
2003        ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2004        EXPECT_EQ(EINVAL, errno) {
2005                TH_LOG("Did not reject mode strict with flags!");
2006        }
2007        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2008        EXPECT_EQ(EINVAL, errno) {
2009                TH_LOG("Did not reject mode strict with uargs!");
2010        }
2011
2012        /* Reject insane args for filter. */
2013        ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2014        EXPECT_EQ(EINVAL, errno) {
2015                TH_LOG("Did not reject crazy filter flags!");
2016        }
2017        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2018        EXPECT_EQ(EFAULT, errno) {
2019                TH_LOG("Did not reject NULL filter!");
2020        }
2021
2022        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2023        EXPECT_EQ(0, errno) {
2024                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2025                        strerror(errno));
2026        }
2027}
2028
2029TEST(seccomp_syscall_mode_lock)
2030{
2031        struct sock_filter filter[] = {
2032                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2033        };
2034        struct sock_fprog prog = {
2035                .len = (unsigned short)ARRAY_SIZE(filter),
2036                .filter = filter,
2037        };
2038        long ret;
2039
2040        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2041        ASSERT_EQ(0, ret) {
2042                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2043        }
2044
2045        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2046        ASSERT_NE(ENOSYS, errno) {
2047                TH_LOG("Kernel does not support seccomp syscall!");
2048        }
2049        EXPECT_EQ(0, ret) {
2050                TH_LOG("Could not install filter!");
2051        }
2052
2053        /* Make sure neither entry point will switch to strict. */
2054        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2055        EXPECT_EQ(EINVAL, errno) {
2056                TH_LOG("Switched to mode strict!");
2057        }
2058
2059        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2060        EXPECT_EQ(EINVAL, errno) {
2061                TH_LOG("Switched to mode strict!");
2062        }
2063}
2064
2065/*
2066 * Test detection of known and unknown filter flags. Userspace needs to be able
2067 * to check if a filter flag is supported by the current kernel and a good way
2068 * of doing that is by attempting to enter filter mode, with the flag bit in
2069 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2070 * that the flag is valid and EINVAL indicates that the flag is invalid.
2071 */
2072TEST(detect_seccomp_filter_flags)
2073{
2074        unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2075                                 SECCOMP_FILTER_FLAG_LOG };
2076        unsigned int flag, all_flags;
2077        int i;
2078        long ret;
2079
2080        /* Test detection of known-good filter flags */
2081        for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2082                flag = flags[i];
2083                ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2084                ASSERT_NE(ENOSYS, errno) {
2085                        TH_LOG("Kernel does not support seccomp syscall!");
2086                }
2087                EXPECT_EQ(-1, ret);
2088                EXPECT_EQ(EFAULT, errno) {
2089                        TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2090                               flag);
2091                }
2092
2093                all_flags |= flag;
2094        }
2095
2096        /* Test detection of all known-good filter flags */
2097        ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
2098        EXPECT_EQ(-1, ret);
2099        EXPECT_EQ(EFAULT, errno) {
2100                TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2101                       all_flags);
2102        }
2103
2104        /* Test detection of an unknown filter flag */
2105        flag = -1;
2106        ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2107        EXPECT_EQ(-1, ret);
2108        EXPECT_EQ(EINVAL, errno) {
2109                TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2110                       flag);
2111        }
2112
2113        /*
2114         * Test detection of an unknown filter flag that may simply need to be
2115         * added to this test
2116         */
2117        flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2118        ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2119        EXPECT_EQ(-1, ret);
2120        EXPECT_EQ(EINVAL, errno) {
2121                TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2122                       flag);
2123        }
2124}
2125
2126TEST(TSYNC_first)
2127{
2128        struct sock_filter filter[] = {
2129                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2130        };
2131        struct sock_fprog prog = {
2132                .len = (unsigned short)ARRAY_SIZE(filter),
2133                .filter = filter,
2134        };
2135        long ret;
2136
2137        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2138        ASSERT_EQ(0, ret) {
2139                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2140        }
2141
2142        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2143                      &prog);
2144        ASSERT_NE(ENOSYS, errno) {
2145                TH_LOG("Kernel does not support seccomp syscall!");
2146        }
2147        EXPECT_EQ(0, ret) {
2148                TH_LOG("Could not install initial filter with TSYNC!");
2149        }
2150}
2151
2152#define TSYNC_SIBLINGS 2
2153struct tsync_sibling {
2154        pthread_t tid;
2155        pid_t system_tid;
2156        sem_t *started;
2157        pthread_cond_t *cond;
2158        pthread_mutex_t *mutex;
2159        int diverge;
2160        int num_waits;
2161        struct sock_fprog *prog;
2162        struct __test_metadata *metadata;
2163};
2164
2165/*
2166 * To avoid joining joined threads (which is not allowed by Bionic),
2167 * make sure we both successfully join and clear the tid to skip a
2168 * later join attempt during fixture teardown. Any remaining threads
2169 * will be directly killed during teardown.
2170 */
2171#define PTHREAD_JOIN(tid, status)                                       \
2172        do {                                                            \
2173                int _rc = pthread_join(tid, status);                    \
2174                if (_rc) {                                              \
2175                        TH_LOG("pthread_join of tid %u failed: %d\n",   \
2176                                (unsigned int)tid, _rc);                \
2177                } else {                                                \
2178                        tid = 0;                                        \
2179                }                                                       \
2180        } while (0)
2181
2182FIXTURE_DATA(TSYNC) {
2183        struct sock_fprog root_prog, apply_prog;
2184        struct tsync_sibling sibling[TSYNC_SIBLINGS];
2185        sem_t started;
2186        pthread_cond_t cond;
2187        pthread_mutex_t mutex;
2188        int sibling_count;
2189};
2190
2191FIXTURE_SETUP(TSYNC)
2192{
2193        struct sock_filter root_filter[] = {
2194                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2195        };
2196        struct sock_filter apply_filter[] = {
2197                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2198                        offsetof(struct seccomp_data, nr)),
2199                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2200                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2201                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2202        };
2203
2204        memset(&self->root_prog, 0, sizeof(self->root_prog));
2205        memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2206        memset(&self->sibling, 0, sizeof(self->sibling));
2207        self->root_prog.filter = malloc(sizeof(root_filter));
2208        ASSERT_NE(NULL, self->root_prog.filter);
2209        memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2210        self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2211
2212        self->apply_prog.filter = malloc(sizeof(apply_filter));
2213        ASSERT_NE(NULL, self->apply_prog.filter);
2214        memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2215        self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2216
2217        self->sibling_count = 0;
2218        pthread_mutex_init(&self->mutex, NULL);
2219        pthread_cond_init(&self->cond, NULL);
2220        sem_init(&self->started, 0, 0);
2221        self->sibling[0].tid = 0;
2222        self->sibling[0].cond = &self->cond;
2223        self->sibling[0].started = &self->started;
2224        self->sibling[0].mutex = &self->mutex;
2225        self->sibling[0].diverge = 0;
2226        self->sibling[0].num_waits = 1;
2227        self->sibling[0].prog = &self->root_prog;
2228        self->sibling[0].metadata = _metadata;
2229        self->sibling[1].tid = 0;
2230        self->sibling[1].cond = &self->cond;
2231        self->sibling[1].started = &self->started;
2232        self->sibling[1].mutex = &self->mutex;
2233        self->sibling[1].diverge = 0;
2234        self->sibling[1].prog = &self->root_prog;
2235        self->sibling[1].num_waits = 1;
2236        self->sibling[1].metadata = _metadata;
2237}
2238
2239FIXTURE_TEARDOWN(TSYNC)
2240{
2241        int sib = 0;
2242
2243        if (self->root_prog.filter)
2244                free(self->root_prog.filter);
2245        if (self->apply_prog.filter)
2246                free(self->apply_prog.filter);
2247
2248        for ( ; sib < self->sibling_count; ++sib) {
2249                struct tsync_sibling *s = &self->sibling[sib];
2250
2251                if (!s->tid)
2252                        continue;
2253                /*
2254                 * If a thread is still running, it may be stuck, so hit
2255                 * it over the head really hard.
2256                 */
2257                pthread_kill(s->tid, 9);
2258        }
2259        pthread_mutex_destroy(&self->mutex);
2260        pthread_cond_destroy(&self->cond);
2261        sem_destroy(&self->started);
2262}
2263
2264void *tsync_sibling(void *data)
2265{
2266        long ret = 0;
2267        struct tsync_sibling *me = data;
2268
2269        me->system_tid = syscall(__NR_gettid);
2270
2271        pthread_mutex_lock(me->mutex);
2272        if (me->diverge) {
2273                /* Just re-apply the root prog to fork the tree */
2274                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2275                                me->prog, 0, 0);
2276        }
2277        sem_post(me->started);
2278        /* Return outside of started so parent notices failures. */
2279        if (ret) {
2280                pthread_mutex_unlock(me->mutex);
2281                return (void *)SIBLING_EXIT_FAILURE;
2282        }
2283        do {
2284                pthread_cond_wait(me->cond, me->mutex);
2285                me->num_waits = me->num_waits - 1;
2286        } while (me->num_waits);
2287        pthread_mutex_unlock(me->mutex);
2288
2289        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2290        if (!ret)
2291                return (void *)SIBLING_EXIT_NEWPRIVS;
2292        read(0, NULL, 0);
2293        return (void *)SIBLING_EXIT_UNKILLED;
2294}
2295
2296void tsync_start_sibling(struct tsync_sibling *sibling)
2297{
2298        pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2299}
2300
2301TEST_F(TSYNC, siblings_fail_prctl)
2302{
2303        long ret;
2304        void *status;
2305        struct sock_filter filter[] = {
2306                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2307                        offsetof(struct seccomp_data, nr)),
2308                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2309                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2310                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2311        };
2312        struct sock_fprog prog = {
2313                .len = (unsigned short)ARRAY_SIZE(filter),
2314                .filter = filter,
2315        };
2316
2317        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2318                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2319        }
2320
2321        /* Check prctl failure detection by requesting sib 0 diverge. */
2322        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2323        ASSERT_NE(ENOSYS, errno) {
2324                TH_LOG("Kernel does not support seccomp syscall!");
2325        }
2326        ASSERT_EQ(0, ret) {
2327                TH_LOG("setting filter failed");
2328        }
2329
2330        self->sibling[0].diverge = 1;
2331        tsync_start_sibling(&self->sibling[0]);
2332        tsync_start_sibling(&self->sibling[1]);
2333
2334        while (self->sibling_count < TSYNC_SIBLINGS) {
2335                sem_wait(&self->started);
2336                self->sibling_count++;
2337        }
2338
2339        /* Signal the threads to clean up*/
2340        pthread_mutex_lock(&self->mutex);
2341        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2342                TH_LOG("cond broadcast non-zero");
2343        }
2344        pthread_mutex_unlock(&self->mutex);
2345
2346        /* Ensure diverging sibling failed to call prctl. */
2347        PTHREAD_JOIN(self->sibling[0].tid, &status);
2348        EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2349        PTHREAD_JOIN(self->sibling[1].tid, &status);
2350        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2351}
2352
2353TEST_F(TSYNC, two_siblings_with_ancestor)
2354{
2355        long ret;
2356        void *status;
2357
2358        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2359                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2360        }
2361
2362        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2363        ASSERT_NE(ENOSYS, errno) {
2364                TH_LOG("Kernel does not support seccomp syscall!");
2365        }
2366        ASSERT_EQ(0, ret) {
2367                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2368        }
2369        tsync_start_sibling(&self->sibling[0]);
2370        tsync_start_sibling(&self->sibling[1]);
2371
2372        while (self->sibling_count < TSYNC_SIBLINGS) {
2373                sem_wait(&self->started);
2374                self->sibling_count++;
2375        }
2376
2377        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2378                      &self->apply_prog);
2379        ASSERT_EQ(0, ret) {
2380                TH_LOG("Could install filter on all threads!");
2381        }
2382        /* Tell the siblings to test the policy */
2383        pthread_mutex_lock(&self->mutex);
2384        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2385                TH_LOG("cond broadcast non-zero");
2386        }
2387        pthread_mutex_unlock(&self->mutex);
2388        /* Ensure they are both killed and don't exit cleanly. */
2389        PTHREAD_JOIN(self->sibling[0].tid, &status);
2390        EXPECT_EQ(0x0, (long)status);
2391        PTHREAD_JOIN(self->sibling[1].tid, &status);
2392        EXPECT_EQ(0x0, (long)status);
2393}
2394
2395TEST_F(TSYNC, two_sibling_want_nnp)
2396{
2397        void *status;
2398
2399        /* start siblings before any prctl() operations */
2400        tsync_start_sibling(&self->sibling[0]);
2401        tsync_start_sibling(&self->sibling[1]);
2402        while (self->sibling_count < TSYNC_SIBLINGS) {
2403                sem_wait(&self->started);
2404                self->sibling_count++;
2405        }
2406
2407        /* Tell the siblings to test no policy */
2408        pthread_mutex_lock(&self->mutex);
2409        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2410                TH_LOG("cond broadcast non-zero");
2411        }
2412        pthread_mutex_unlock(&self->mutex);
2413
2414        /* Ensure they are both upset about lacking nnp. */
2415        PTHREAD_JOIN(self->sibling[0].tid, &status);
2416        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2417        PTHREAD_JOIN(self->sibling[1].tid, &status);
2418        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2419}
2420
2421TEST_F(TSYNC, two_siblings_with_no_filter)
2422{
2423        long ret;
2424        void *status;
2425
2426        /* start siblings before any prctl() operations */
2427        tsync_start_sibling(&self->sibling[0]);
2428        tsync_start_sibling(&self->sibling[1]);
2429        while (self->sibling_count < TSYNC_SIBLINGS) {
2430                sem_wait(&self->started);
2431                self->sibling_count++;
2432        }
2433
2434        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2435                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2436        }
2437
2438        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2439                      &self->apply_prog);
2440        ASSERT_NE(ENOSYS, errno) {
2441                TH_LOG("Kernel does not support seccomp syscall!");
2442        }
2443        ASSERT_EQ(0, ret) {
2444                TH_LOG("Could install filter on all threads!");
2445        }
2446
2447        /* Tell the siblings to test the policy */
2448        pthread_mutex_lock(&self->mutex);
2449        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2450                TH_LOG("cond broadcast non-zero");
2451        }
2452        pthread_mutex_unlock(&self->mutex);
2453
2454        /* Ensure they are both killed and don't exit cleanly. */
2455        PTHREAD_JOIN(self->sibling[0].tid, &status);
2456        EXPECT_EQ(0x0, (long)status);
2457        PTHREAD_JOIN(self->sibling[1].tid, &status);
2458        EXPECT_EQ(0x0, (long)status);
2459}
2460
2461TEST_F(TSYNC, two_siblings_with_one_divergence)
2462{
2463        long ret;
2464        void *status;
2465
2466        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2467                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2468        }
2469
2470        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2471        ASSERT_NE(ENOSYS, errno) {
2472                TH_LOG("Kernel does not support seccomp syscall!");
2473        }
2474        ASSERT_EQ(0, ret) {
2475                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2476        }
2477        self->sibling[0].diverge = 1;
2478        tsync_start_sibling(&self->sibling[0]);
2479        tsync_start_sibling(&self->sibling[1]);
2480
2481        while (self->sibling_count < TSYNC_SIBLINGS) {
2482                sem_wait(&self->started);
2483                self->sibling_count++;
2484        }
2485
2486        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2487                      &self->apply_prog);
2488        ASSERT_EQ(self->sibling[0].system_tid, ret) {
2489                TH_LOG("Did not fail on diverged sibling.");
2490        }
2491
2492        /* Wake the threads */
2493        pthread_mutex_lock(&self->mutex);
2494        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2495                TH_LOG("cond broadcast non-zero");
2496        }
2497        pthread_mutex_unlock(&self->mutex);
2498
2499        /* Ensure they are both unkilled. */
2500        PTHREAD_JOIN(self->sibling[0].tid, &status);
2501        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2502        PTHREAD_JOIN(self->sibling[1].tid, &status);
2503        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2504}
2505
2506TEST_F(TSYNC, two_siblings_not_under_filter)
2507{
2508        long ret, sib;
2509        void *status;
2510
2511        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2512                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2513        }
2514
2515        /*
2516         * Sibling 0 will have its own seccomp policy
2517         * and Sibling 1 will not be under seccomp at
2518         * all. Sibling 1 will enter seccomp and 0
2519         * will cause failure.
2520         */
2521        self->sibling[0].diverge = 1;
2522        tsync_start_sibling(&self->sibling[0]);
2523        tsync_start_sibling(&self->sibling[1]);
2524
2525        while (self->sibling_count < TSYNC_SIBLINGS) {
2526                sem_wait(&self->started);
2527                self->sibling_count++;
2528        }
2529
2530        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2531        ASSERT_NE(ENOSYS, errno) {
2532                TH_LOG("Kernel does not support seccomp syscall!");
2533        }
2534        ASSERT_EQ(0, ret) {
2535                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2536        }
2537
2538        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2539                      &self->apply_prog);
2540        ASSERT_EQ(ret, self->sibling[0].system_tid) {
2541                TH_LOG("Did not fail on diverged sibling.");
2542        }
2543        sib = 1;
2544        if (ret == self->sibling[0].system_tid)
2545                sib = 0;
2546
2547        pthread_mutex_lock(&self->mutex);
2548
2549        /* Increment the other siblings num_waits so we can clean up
2550         * the one we just saw.
2551         */
2552        self->sibling[!sib].num_waits += 1;
2553
2554        /* Signal the thread to clean up*/
2555        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2556                TH_LOG("cond broadcast non-zero");
2557        }
2558        pthread_mutex_unlock(&self->mutex);
2559        PTHREAD_JOIN(self->sibling[sib].tid, &status);
2560        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2561        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2562        while (!kill(self->sibling[sib].system_tid, 0))
2563                sleep(0.1);
2564        /* Switch to the remaining sibling */
2565        sib = !sib;
2566
2567        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2568                      &self->apply_prog);
2569        ASSERT_EQ(0, ret) {
2570                TH_LOG("Expected the remaining sibling to sync");
2571        };
2572
2573        pthread_mutex_lock(&self->mutex);
2574
2575        /* If remaining sibling didn't have a chance to wake up during
2576         * the first broadcast, manually reduce the num_waits now.
2577         */
2578        if (self->sibling[sib].num_waits > 1)
2579                self->sibling[sib].num_waits = 1;
2580        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2581                TH_LOG("cond broadcast non-zero");
2582        }
2583        pthread_mutex_unlock(&self->mutex);
2584        PTHREAD_JOIN(self->sibling[sib].tid, &status);
2585        EXPECT_EQ(0, (long)status);
2586        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2587        while (!kill(self->sibling[sib].system_tid, 0))
2588                sleep(0.1);
2589
2590        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2591                      &self->apply_prog);
2592        ASSERT_EQ(0, ret);  /* just us chickens */
2593}
2594
2595/* Make sure restarted syscalls are seen directly as "restart_syscall". */
2596TEST(syscall_restart)
2597{
2598        long ret;
2599        unsigned long msg;
2600        pid_t child_pid;
2601        int pipefd[2];
2602        int status;
2603        siginfo_t info = { };
2604        struct sock_filter filter[] = {
2605                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2606                         offsetof(struct seccomp_data, nr)),
2607
2608#ifdef __NR_sigreturn
2609                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2610#endif
2611                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2612                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2613                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2614                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2615                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2616
2617                /* Allow __NR_write for easy logging. */
2618                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2619                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2620                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2621                /* The nanosleep jump target. */
2622                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2623                /* The restart_syscall jump target. */
2624                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2625        };
2626        struct sock_fprog prog = {
2627                .len = (unsigned short)ARRAY_SIZE(filter),
2628                .filter = filter,
2629        };
2630#if defined(__arm__)
2631        struct utsname utsbuf;
2632#endif
2633
2634        ASSERT_EQ(0, pipe(pipefd));
2635
2636        child_pid = fork();
2637        ASSERT_LE(0, child_pid);
2638        if (child_pid == 0) {
2639                /* Child uses EXPECT not ASSERT to deliver status correctly. */
2640                char buf = ' ';
2641                struct timespec timeout = { };
2642
2643                /* Attach parent as tracer and stop. */
2644                EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2645                EXPECT_EQ(0, raise(SIGSTOP));
2646
2647                EXPECT_EQ(0, close(pipefd[1]));
2648
2649                EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2650                        TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2651                }
2652
2653                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2654                EXPECT_EQ(0, ret) {
2655                        TH_LOG("Failed to install filter!");
2656                }
2657
2658                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2659                        TH_LOG("Failed to read() sync from parent");
2660                }
2661                EXPECT_EQ('.', buf) {
2662                        TH_LOG("Failed to get sync data from read()");
2663                }
2664
2665                /* Start nanosleep to be interrupted. */
2666                timeout.tv_sec = 1;
2667                errno = 0;
2668                EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2669                        TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2670                }
2671
2672                /* Read final sync from parent. */
2673                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2674                        TH_LOG("Failed final read() from parent");
2675                }
2676                EXPECT_EQ('!', buf) {
2677                        TH_LOG("Failed to get final data from read()");
2678                }
2679
2680                /* Directly report the status of our test harness results. */
2681                syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2682                                                     : EXIT_FAILURE);
2683        }
2684        EXPECT_EQ(0, close(pipefd[0]));
2685
2686        /* Attach to child, setup options, and release. */
2687        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2688        ASSERT_EQ(true, WIFSTOPPED(status));
2689        ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2690                            PTRACE_O_TRACESECCOMP));
2691        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2692        ASSERT_EQ(1, write(pipefd[1], ".", 1));
2693
2694        /* Wait for nanosleep() to start. */
2695        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2696        ASSERT_EQ(true, WIFSTOPPED(status));
2697        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2698        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2699        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2700        ASSERT_EQ(0x100, msg);
2701        EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2702
2703        /* Might as well check siginfo for sanity while we're here. */
2704        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2705        ASSERT_EQ(SIGTRAP, info.si_signo);
2706        ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2707        EXPECT_EQ(0, info.si_errno);
2708        EXPECT_EQ(getuid(), info.si_uid);
2709        /* Verify signal delivery came from child (seccomp-triggered). */
2710        EXPECT_EQ(child_pid, info.si_pid);
2711
2712        /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2713        ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2714        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2715        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2716        ASSERT_EQ(true, WIFSTOPPED(status));
2717        ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2718        /* Verify signal delivery came from parent now. */
2719        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2720        EXPECT_EQ(getpid(), info.si_pid);
2721
2722        /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2723        ASSERT_EQ(0, kill(child_pid, SIGCONT));
2724        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2725        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2726        ASSERT_EQ(true, WIFSTOPPED(status));
2727        ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2728        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2729
2730        /* Wait for restart_syscall() to start. */
2731        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2732        ASSERT_EQ(true, WIFSTOPPED(status));
2733        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2734        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2735        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2736
2737        ASSERT_EQ(0x200, msg);
2738        ret = get_syscall(_metadata, child_pid);
2739#if defined(__arm__)
2740        /*
2741         * FIXME:
2742         * - native ARM registers do NOT expose true syscall.
2743         * - compat ARM registers on ARM64 DO expose true syscall.
2744         */
2745        ASSERT_EQ(0, uname(&utsbuf));
2746        if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2747                EXPECT_EQ(__NR_nanosleep, ret);
2748        } else
2749#endif
2750        {
2751                EXPECT_EQ(__NR_restart_syscall, ret);
2752        }
2753
2754        /* Write again to end test. */
2755        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2756        ASSERT_EQ(1, write(pipefd[1], "!", 1));
2757        EXPECT_EQ(0, close(pipefd[1]));
2758
2759        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2760        if (WIFSIGNALED(status) || WEXITSTATUS(status))
2761                _metadata->passed = 0;
2762}
2763
2764TEST_SIGNAL(filter_flag_log, SIGSYS)
2765{
2766        struct sock_filter allow_filter[] = {
2767                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2768        };
2769        struct sock_filter kill_filter[] = {
2770                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2771                        offsetof(struct seccomp_data, nr)),
2772                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2773                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2774                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2775        };
2776        struct sock_fprog allow_prog = {
2777                .len = (unsigned short)ARRAY_SIZE(allow_filter),
2778                .filter = allow_filter,
2779        };
2780        struct sock_fprog kill_prog = {
2781                .len = (unsigned short)ARRAY_SIZE(kill_filter),
2782                .filter = kill_filter,
2783        };
2784        long ret;
2785        pid_t parent = getppid();
2786
2787        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2788        ASSERT_EQ(0, ret);
2789
2790        /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2791        ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2792                      &allow_prog);
2793        ASSERT_NE(ENOSYS, errno) {
2794                TH_LOG("Kernel does not support seccomp syscall!");
2795        }
2796        EXPECT_NE(0, ret) {
2797                TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2798        }
2799        EXPECT_EQ(EINVAL, errno) {
2800                TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2801        }
2802
2803        /* Verify that a simple, permissive filter can be added with no flags */
2804        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2805        EXPECT_EQ(0, ret);
2806
2807        /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2808        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2809                      &allow_prog);
2810        ASSERT_NE(EINVAL, errno) {
2811                TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2812        }
2813        EXPECT_EQ(0, ret);
2814
2815        /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2816        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2817                      &kill_prog);
2818        EXPECT_EQ(0, ret);
2819
2820        EXPECT_EQ(parent, syscall(__NR_getppid));
2821        /* getpid() should never return. */
2822        EXPECT_EQ(0, syscall(__NR_getpid));
2823}
2824
2825TEST(get_action_avail)
2826{
2827        __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2828                            SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2829                            SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2830        __u32 unknown_action = 0x10000000U;
2831        int i;
2832        long ret;
2833
2834        ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2835        ASSERT_NE(ENOSYS, errno) {
2836                TH_LOG("Kernel does not support seccomp syscall!");
2837        }
2838        ASSERT_NE(EINVAL, errno) {
2839                TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2840        }
2841        EXPECT_EQ(ret, 0);
2842
2843        for (i = 0; i < ARRAY_SIZE(actions); i++) {
2844                ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2845                EXPECT_EQ(ret, 0) {
2846                        TH_LOG("Expected action (0x%X) not available!",
2847                               actions[i]);
2848                }
2849        }
2850
2851        /* Check that an unknown action is handled properly (EOPNOTSUPP) */
2852        ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2853        EXPECT_EQ(ret, -1);
2854        EXPECT_EQ(errno, EOPNOTSUPP);
2855}
2856
2857TEST(get_metadata)
2858{
2859        pid_t pid;
2860        int pipefd[2];
2861        char buf;
2862        struct seccomp_metadata md;
2863
2864        ASSERT_EQ(0, pipe(pipefd));
2865
2866        pid = fork();
2867        ASSERT_GE(pid, 0);
2868        if (pid == 0) {
2869                struct sock_filter filter[] = {
2870                        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2871                };
2872                struct sock_fprog prog = {
2873                        .len = (unsigned short)ARRAY_SIZE(filter),
2874                        .filter = filter,
2875                };
2876
2877                /* one with log, one without */
2878                ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
2879                                     SECCOMP_FILTER_FLAG_LOG, &prog));
2880                ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
2881
2882                ASSERT_EQ(0, close(pipefd[0]));
2883                ASSERT_EQ(1, write(pipefd[1], "1", 1));
2884                ASSERT_EQ(0, close(pipefd[1]));
2885
2886                while (1)
2887                        sleep(100);
2888        }
2889
2890        ASSERT_EQ(0, close(pipefd[1]));
2891        ASSERT_EQ(1, read(pipefd[0], &buf, 1));
2892
2893        ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
2894        ASSERT_EQ(pid, waitpid(pid, NULL, 0));
2895
2896        md.filter_off = 0;
2897        ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
2898        EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
2899        EXPECT_EQ(md.filter_off, 0);
2900
2901        md.filter_off = 1;
2902        ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
2903        EXPECT_EQ(md.flags, 0);
2904        EXPECT_EQ(md.filter_off, 1);
2905
2906        ASSERT_EQ(0, kill(pid, SIGKILL));
2907}
2908
2909/*
2910 * TODO:
2911 * - add microbenchmarks
2912 * - expand NNP testing
2913 * - better arch-specific TRACE and TRAP handlers.
2914 * - endianness checking when appropriate
2915 * - 64-bit arg prodding
2916 * - arch value testing (x86 modes especially)
2917 * - verify that FILTER_FLAG_LOG filters generate log messages
2918 * - verify that RET_LOG generates log messages
2919 * - ...
2920 */
2921
2922TEST_HARNESS_MAIN
2923