linux/tools/testing/selftests/seccomp/seccomp_bpf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   4 *
   5 * Test code for seccomp bpf.
   6 */
   7
   8#define _GNU_SOURCE
   9#include <sys/types.h>
  10
  11/*
  12 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
  13 * we need to use the kernel's siginfo.h file and trick glibc
  14 * into accepting it.
  15 */
  16#if !__GLIBC_PREREQ(2, 26)
  17# include <asm/siginfo.h>
  18# define __have_siginfo_t 1
  19# define __have_sigval_t 1
  20# define __have_sigevent_t 1
  21#endif
  22
  23#include <errno.h>
  24#include <linux/filter.h>
  25#include <sys/prctl.h>
  26#include <sys/ptrace.h>
  27#include <sys/user.h>
  28#include <linux/prctl.h>
  29#include <linux/ptrace.h>
  30#include <linux/seccomp.h>
  31#include <pthread.h>
  32#include <semaphore.h>
  33#include <signal.h>
  34#include <stddef.h>
  35#include <stdbool.h>
  36#include <string.h>
  37#include <time.h>
  38#include <linux/elf.h>
  39#include <sys/uio.h>
  40#include <sys/utsname.h>
  41#include <sys/fcntl.h>
  42#include <sys/mman.h>
  43#include <sys/times.h>
  44#include <sys/socket.h>
  45#include <sys/ioctl.h>
  46
  47#include <unistd.h>
  48#include <sys/syscall.h>
  49#include <poll.h>
  50
  51#include "../kselftest_harness.h"
  52
  53#ifndef PR_SET_PTRACER
  54# define PR_SET_PTRACER 0x59616d61
  55#endif
  56
  57#ifndef PR_SET_NO_NEW_PRIVS
  58#define PR_SET_NO_NEW_PRIVS 38
  59#define PR_GET_NO_NEW_PRIVS 39
  60#endif
  61
  62#ifndef PR_SECCOMP_EXT
  63#define PR_SECCOMP_EXT 43
  64#endif
  65
  66#ifndef SECCOMP_EXT_ACT
  67#define SECCOMP_EXT_ACT 1
  68#endif
  69
  70#ifndef SECCOMP_EXT_ACT_TSYNC
  71#define SECCOMP_EXT_ACT_TSYNC 1
  72#endif
  73
  74#ifndef SECCOMP_MODE_STRICT
  75#define SECCOMP_MODE_STRICT 1
  76#endif
  77
  78#ifndef SECCOMP_MODE_FILTER
  79#define SECCOMP_MODE_FILTER 2
  80#endif
  81
  82#ifndef SECCOMP_RET_ALLOW
  83struct seccomp_data {
  84        int nr;
  85        __u32 arch;
  86        __u64 instruction_pointer;
  87        __u64 args[6];
  88};
  89#endif
  90
  91#ifndef SECCOMP_RET_KILL_PROCESS
  92#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
  93#define SECCOMP_RET_KILL_THREAD  0x00000000U /* kill the thread */
  94#endif
  95#ifndef SECCOMP_RET_KILL
  96#define SECCOMP_RET_KILL         SECCOMP_RET_KILL_THREAD
  97#define SECCOMP_RET_TRAP         0x00030000U /* disallow and force a SIGSYS */
  98#define SECCOMP_RET_ERRNO        0x00050000U /* returns an errno */
  99#define SECCOMP_RET_TRACE        0x7ff00000U /* pass to a tracer or disallow */
 100#define SECCOMP_RET_ALLOW        0x7fff0000U /* allow */
 101#endif
 102#ifndef SECCOMP_RET_LOG
 103#define SECCOMP_RET_LOG          0x7ffc0000U /* allow after logging */
 104#endif
 105
 106#ifndef __NR_seccomp
 107# if defined(__i386__)
 108#  define __NR_seccomp 354
 109# elif defined(__x86_64__)
 110#  define __NR_seccomp 317
 111# elif defined(__arm__)
 112#  define __NR_seccomp 383
 113# elif defined(__aarch64__)
 114#  define __NR_seccomp 277
 115# elif defined(__hppa__)
 116#  define __NR_seccomp 338
 117# elif defined(__powerpc__)
 118#  define __NR_seccomp 358
 119# elif defined(__s390__)
 120#  define __NR_seccomp 348
 121# else
 122#  warning "seccomp syscall number unknown for this architecture"
 123#  define __NR_seccomp 0xffff
 124# endif
 125#endif
 126
 127#ifndef SECCOMP_SET_MODE_STRICT
 128#define SECCOMP_SET_MODE_STRICT 0
 129#endif
 130
 131#ifndef SECCOMP_SET_MODE_FILTER
 132#define SECCOMP_SET_MODE_FILTER 1
 133#endif
 134
 135#ifndef SECCOMP_GET_ACTION_AVAIL
 136#define SECCOMP_GET_ACTION_AVAIL 2
 137#endif
 138
 139#ifndef SECCOMP_GET_NOTIF_SIZES
 140#define SECCOMP_GET_NOTIF_SIZES 3
 141#endif
 142
 143#ifndef SECCOMP_FILTER_FLAG_TSYNC
 144#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
 145#endif
 146
 147#ifndef SECCOMP_FILTER_FLAG_LOG
 148#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
 149#endif
 150
 151#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
 152#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
 153#endif
 154
 155#ifndef PTRACE_SECCOMP_GET_METADATA
 156#define PTRACE_SECCOMP_GET_METADATA     0x420d
 157
 158struct seccomp_metadata {
 159        __u64 filter_off;       /* Input: which filter */
 160        __u64 flags;             /* Output: filter's flags */
 161};
 162#endif
 163
 164#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
 165#define SECCOMP_FILTER_FLAG_NEW_LISTENER        (1UL << 3)
 166
 167#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
 168
 169#define SECCOMP_IOC_MAGIC               '!'
 170#define SECCOMP_IO(nr)                  _IO(SECCOMP_IOC_MAGIC, nr)
 171#define SECCOMP_IOR(nr, type)           _IOR(SECCOMP_IOC_MAGIC, nr, type)
 172#define SECCOMP_IOW(nr, type)           _IOW(SECCOMP_IOC_MAGIC, nr, type)
 173#define SECCOMP_IOWR(nr, type)          _IOWR(SECCOMP_IOC_MAGIC, nr, type)
 174
 175/* Flags for seccomp notification fd ioctl. */
 176#define SECCOMP_IOCTL_NOTIF_RECV        SECCOMP_IOWR(0, struct seccomp_notif)
 177#define SECCOMP_IOCTL_NOTIF_SEND        SECCOMP_IOWR(1, \
 178                                                struct seccomp_notif_resp)
 179#define SECCOMP_IOCTL_NOTIF_ID_VALID    SECCOMP_IOR(2, __u64)
 180
 181struct seccomp_notif {
 182        __u64 id;
 183        __u32 pid;
 184        __u32 flags;
 185        struct seccomp_data data;
 186};
 187
 188struct seccomp_notif_resp {
 189        __u64 id;
 190        __s64 val;
 191        __s32 error;
 192        __u32 flags;
 193};
 194
 195struct seccomp_notif_sizes {
 196        __u16 seccomp_notif;
 197        __u16 seccomp_notif_resp;
 198        __u16 seccomp_data;
 199};
 200#endif
 201
 202#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
 203#define PTRACE_EVENTMSG_SYSCALL_ENTRY   1
 204#define PTRACE_EVENTMSG_SYSCALL_EXIT    2
 205#endif
 206
 207#ifndef seccomp
 208int seccomp(unsigned int op, unsigned int flags, void *args)
 209{
 210        errno = 0;
 211        return syscall(__NR_seccomp, op, flags, args);
 212}
 213#endif
 214
 215#if __BYTE_ORDER == __LITTLE_ENDIAN
 216#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
 217#elif __BYTE_ORDER == __BIG_ENDIAN
 218#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
 219#else
 220#error "wut? Unknown __BYTE_ORDER?!"
 221#endif
 222
 223#define SIBLING_EXIT_UNKILLED   0xbadbeef
 224#define SIBLING_EXIT_FAILURE    0xbadface
 225#define SIBLING_EXIT_NEWPRIVS   0xbadfeed
 226
 227TEST(mode_strict_support)
 228{
 229        long ret;
 230
 231        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 232        ASSERT_EQ(0, ret) {
 233                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 234        }
 235        syscall(__NR_exit, 0);
 236}
 237
 238TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
 239{
 240        long ret;
 241
 242        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 243        ASSERT_EQ(0, ret) {
 244                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 245        }
 246        syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
 247                NULL, NULL, NULL);
 248        EXPECT_FALSE(true) {
 249                TH_LOG("Unreachable!");
 250        }
 251}
 252
 253/* Note! This doesn't test no new privs behavior */
 254TEST(no_new_privs_support)
 255{
 256        long ret;
 257
 258        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 259        EXPECT_EQ(0, ret) {
 260                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 261        }
 262}
 263
 264/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 265TEST(mode_filter_support)
 266{
 267        long ret;
 268
 269        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
 270        ASSERT_EQ(0, ret) {
 271                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 272        }
 273        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
 274        EXPECT_EQ(-1, ret);
 275        EXPECT_EQ(EFAULT, errno) {
 276                TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
 277        }
 278}
 279
 280TEST(mode_filter_without_nnp)
 281{
 282        struct sock_filter filter[] = {
 283                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 284        };
 285        struct sock_fprog prog = {
 286                .len = (unsigned short)ARRAY_SIZE(filter),
 287                .filter = filter,
 288        };
 289        long ret;
 290
 291        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
 292        ASSERT_LE(0, ret) {
 293                TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
 294        }
 295        errno = 0;
 296        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 297        /* Succeeds with CAP_SYS_ADMIN, fails without */
 298        /* TODO(wad) check caps not euid */
 299        if (geteuid()) {
 300                EXPECT_EQ(-1, ret);
 301                EXPECT_EQ(EACCES, errno);
 302        } else {
 303                EXPECT_EQ(0, ret);
 304        }
 305}
 306
 307#define MAX_INSNS_PER_PATH 32768
 308
 309TEST(filter_size_limits)
 310{
 311        int i;
 312        int count = BPF_MAXINSNS + 1;
 313        struct sock_filter allow[] = {
 314                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 315        };
 316        struct sock_filter *filter;
 317        struct sock_fprog prog = { };
 318        long ret;
 319
 320        filter = calloc(count, sizeof(*filter));
 321        ASSERT_NE(NULL, filter);
 322
 323        for (i = 0; i < count; i++)
 324                filter[i] = allow[0];
 325
 326        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 327        ASSERT_EQ(0, ret);
 328
 329        prog.filter = filter;
 330        prog.len = count;
 331
 332        /* Too many filter instructions in a single filter. */
 333        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 334        ASSERT_NE(0, ret) {
 335                TH_LOG("Installing %d insn filter was allowed", prog.len);
 336        }
 337
 338        /* One less is okay, though. */
 339        prog.len -= 1;
 340        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 341        ASSERT_EQ(0, ret) {
 342                TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
 343        }
 344}
 345
 346TEST(filter_chain_limits)
 347{
 348        int i;
 349        int count = BPF_MAXINSNS;
 350        struct sock_filter allow[] = {
 351                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 352        };
 353        struct sock_filter *filter;
 354        struct sock_fprog prog = { };
 355        long ret;
 356
 357        filter = calloc(count, sizeof(*filter));
 358        ASSERT_NE(NULL, filter);
 359
 360        for (i = 0; i < count; i++)
 361                filter[i] = allow[0];
 362
 363        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 364        ASSERT_EQ(0, ret);
 365
 366        prog.filter = filter;
 367        prog.len = 1;
 368
 369        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 370        ASSERT_EQ(0, ret);
 371
 372        prog.len = count;
 373
 374        /* Too many total filter instructions. */
 375        for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
 376                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 377                if (ret != 0)
 378                        break;
 379        }
 380        ASSERT_NE(0, ret) {
 381                TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
 382                       i, count, i * (count + 4));
 383        }
 384}
 385
 386TEST(mode_filter_cannot_move_to_strict)
 387{
 388        struct sock_filter filter[] = {
 389                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 390        };
 391        struct sock_fprog prog = {
 392                .len = (unsigned short)ARRAY_SIZE(filter),
 393                .filter = filter,
 394        };
 395        long ret;
 396
 397        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 398        ASSERT_EQ(0, ret);
 399
 400        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 401        ASSERT_EQ(0, ret);
 402
 403        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
 404        EXPECT_EQ(-1, ret);
 405        EXPECT_EQ(EINVAL, errno);
 406}
 407
 408
 409TEST(mode_filter_get_seccomp)
 410{
 411        struct sock_filter filter[] = {
 412                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 413        };
 414        struct sock_fprog prog = {
 415                .len = (unsigned short)ARRAY_SIZE(filter),
 416                .filter = filter,
 417        };
 418        long ret;
 419
 420        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 421        ASSERT_EQ(0, ret);
 422
 423        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 424        EXPECT_EQ(0, ret);
 425
 426        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 427        ASSERT_EQ(0, ret);
 428
 429        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 430        EXPECT_EQ(2, ret);
 431}
 432
 433
 434TEST(ALLOW_all)
 435{
 436        struct sock_filter filter[] = {
 437                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 438        };
 439        struct sock_fprog prog = {
 440                .len = (unsigned short)ARRAY_SIZE(filter),
 441                .filter = filter,
 442        };
 443        long ret;
 444
 445        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 446        ASSERT_EQ(0, ret);
 447
 448        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 449        ASSERT_EQ(0, ret);
 450}
 451
 452TEST(empty_prog)
 453{
 454        struct sock_filter filter[] = {
 455        };
 456        struct sock_fprog prog = {
 457                .len = (unsigned short)ARRAY_SIZE(filter),
 458                .filter = filter,
 459        };
 460        long ret;
 461
 462        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 463        ASSERT_EQ(0, ret);
 464
 465        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 466        EXPECT_EQ(-1, ret);
 467        EXPECT_EQ(EINVAL, errno);
 468}
 469
 470TEST(log_all)
 471{
 472        struct sock_filter filter[] = {
 473                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
 474        };
 475        struct sock_fprog prog = {
 476                .len = (unsigned short)ARRAY_SIZE(filter),
 477                .filter = filter,
 478        };
 479        long ret;
 480        pid_t parent = getppid();
 481
 482        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 483        ASSERT_EQ(0, ret);
 484
 485        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 486        ASSERT_EQ(0, ret);
 487
 488        /* getppid() should succeed and be logged (no check for logging) */
 489        EXPECT_EQ(parent, syscall(__NR_getppid));
 490}
 491
 492TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 493{
 494        struct sock_filter filter[] = {
 495                BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
 496        };
 497        struct sock_fprog prog = {
 498                .len = (unsigned short)ARRAY_SIZE(filter),
 499                .filter = filter,
 500        };
 501        long ret;
 502
 503        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 504        ASSERT_EQ(0, ret);
 505
 506        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 507        ASSERT_EQ(0, ret);
 508        EXPECT_EQ(0, syscall(__NR_getpid)) {
 509                TH_LOG("getpid() shouldn't ever return");
 510        }
 511}
 512
 513/* return code >= 0x80000000 is unused. */
 514TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
 515{
 516        struct sock_filter filter[] = {
 517                BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
 518        };
 519        struct sock_fprog prog = {
 520                .len = (unsigned short)ARRAY_SIZE(filter),
 521                .filter = filter,
 522        };
 523        long ret;
 524
 525        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 526        ASSERT_EQ(0, ret);
 527
 528        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 529        ASSERT_EQ(0, ret);
 530        EXPECT_EQ(0, syscall(__NR_getpid)) {
 531                TH_LOG("getpid() shouldn't ever return");
 532        }
 533}
 534
 535TEST_SIGNAL(KILL_all, SIGSYS)
 536{
 537        struct sock_filter filter[] = {
 538                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 539        };
 540        struct sock_fprog prog = {
 541                .len = (unsigned short)ARRAY_SIZE(filter),
 542                .filter = filter,
 543        };
 544        long ret;
 545
 546        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 547        ASSERT_EQ(0, ret);
 548
 549        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 550        ASSERT_EQ(0, ret);
 551}
 552
 553TEST_SIGNAL(KILL_one, SIGSYS)
 554{
 555        struct sock_filter filter[] = {
 556                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 557                        offsetof(struct seccomp_data, nr)),
 558                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 559                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 560                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 561        };
 562        struct sock_fprog prog = {
 563                .len = (unsigned short)ARRAY_SIZE(filter),
 564                .filter = filter,
 565        };
 566        long ret;
 567        pid_t parent = getppid();
 568
 569        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 570        ASSERT_EQ(0, ret);
 571
 572        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 573        ASSERT_EQ(0, ret);
 574
 575        EXPECT_EQ(parent, syscall(__NR_getppid));
 576        /* getpid() should never return. */
 577        EXPECT_EQ(0, syscall(__NR_getpid));
 578}
 579
 580TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
 581{
 582        void *fatal_address;
 583        struct sock_filter filter[] = {
 584                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 585                        offsetof(struct seccomp_data, nr)),
 586                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
 587                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 588                /* Only both with lower 32-bit for now. */
 589                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
 590                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
 591                        (unsigned long)&fatal_address, 0, 1),
 592                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 593                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 594        };
 595        struct sock_fprog prog = {
 596                .len = (unsigned short)ARRAY_SIZE(filter),
 597                .filter = filter,
 598        };
 599        long ret;
 600        pid_t parent = getppid();
 601        struct tms timebuf;
 602        clock_t clock = times(&timebuf);
 603
 604        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 605        ASSERT_EQ(0, ret);
 606
 607        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 608        ASSERT_EQ(0, ret);
 609
 610        EXPECT_EQ(parent, syscall(__NR_getppid));
 611        EXPECT_LE(clock, syscall(__NR_times, &timebuf));
 612        /* times() should never return. */
 613        EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
 614}
 615
 616TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 617{
 618#ifndef __NR_mmap2
 619        int sysno = __NR_mmap;
 620#else
 621        int sysno = __NR_mmap2;
 622#endif
 623        struct sock_filter filter[] = {
 624                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 625                        offsetof(struct seccomp_data, nr)),
 626                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
 627                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 628                /* Only both with lower 32-bit for now. */
 629                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
 630                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
 631                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 632                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 633        };
 634        struct sock_fprog prog = {
 635                .len = (unsigned short)ARRAY_SIZE(filter),
 636                .filter = filter,
 637        };
 638        long ret;
 639        pid_t parent = getppid();
 640        int fd;
 641        void *map1, *map2;
 642        int page_size = sysconf(_SC_PAGESIZE);
 643
 644        ASSERT_LT(0, page_size);
 645
 646        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 647        ASSERT_EQ(0, ret);
 648
 649        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 650        ASSERT_EQ(0, ret);
 651
 652        fd = open("/dev/zero", O_RDONLY);
 653        ASSERT_NE(-1, fd);
 654
 655        EXPECT_EQ(parent, syscall(__NR_getppid));
 656        map1 = (void *)syscall(sysno,
 657                NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 658        EXPECT_NE(MAP_FAILED, map1);
 659        /* mmap2() should never return. */
 660        map2 = (void *)syscall(sysno,
 661                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 662        EXPECT_EQ(MAP_FAILED, map2);
 663
 664        /* The test failed, so clean up the resources. */
 665        munmap(map1, page_size);
 666        munmap(map2, page_size);
 667        close(fd);
 668}
 669
 670/* This is a thread task to die via seccomp filter violation. */
 671void *kill_thread(void *data)
 672{
 673        bool die = (bool)data;
 674
 675        if (die) {
 676                prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 677                return (void *)SIBLING_EXIT_FAILURE;
 678        }
 679
 680        return (void *)SIBLING_EXIT_UNKILLED;
 681}
 682
 683/* Prepare a thread that will kill itself or both of us. */
 684void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 685{
 686        pthread_t thread;
 687        void *status;
 688        /* Kill only when calling __NR_prctl. */
 689        struct sock_filter filter_thread[] = {
 690                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 691                        offsetof(struct seccomp_data, nr)),
 692                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 693                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
 694                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 695        };
 696        struct sock_fprog prog_thread = {
 697                .len = (unsigned short)ARRAY_SIZE(filter_thread),
 698                .filter = filter_thread,
 699        };
 700        struct sock_filter filter_process[] = {
 701                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 702                        offsetof(struct seccomp_data, nr)),
 703                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
 704                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
 705                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 706        };
 707        struct sock_fprog prog_process = {
 708                .len = (unsigned short)ARRAY_SIZE(filter_process),
 709                .filter = filter_process,
 710        };
 711
 712        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
 713                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 714        }
 715
 716        ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
 717                             kill_process ? &prog_process : &prog_thread));
 718
 719        /*
 720         * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
 721         * flag cannot be downgraded by a new filter.
 722         */
 723        ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
 724
 725        /* Start a thread that will exit immediately. */
 726        ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
 727        ASSERT_EQ(0, pthread_join(thread, &status));
 728        ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
 729
 730        /* Start a thread that will die immediately. */
 731        ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
 732        ASSERT_EQ(0, pthread_join(thread, &status));
 733        ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
 734
 735        /*
 736         * If we get here, only the spawned thread died. Let the parent know
 737         * the whole process didn't die (i.e. this thread, the spawner,
 738         * stayed running).
 739         */
 740        exit(42);
 741}
 742
 743TEST(KILL_thread)
 744{
 745        int status;
 746        pid_t child_pid;
 747
 748        child_pid = fork();
 749        ASSERT_LE(0, child_pid);
 750        if (child_pid == 0) {
 751                kill_thread_or_group(_metadata, false);
 752                _exit(38);
 753        }
 754
 755        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 756
 757        /* If only the thread was killed, we'll see exit 42. */
 758        ASSERT_TRUE(WIFEXITED(status));
 759        ASSERT_EQ(42, WEXITSTATUS(status));
 760}
 761
 762TEST(KILL_process)
 763{
 764        int status;
 765        pid_t child_pid;
 766
 767        child_pid = fork();
 768        ASSERT_LE(0, child_pid);
 769        if (child_pid == 0) {
 770                kill_thread_or_group(_metadata, true);
 771                _exit(38);
 772        }
 773
 774        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
 775
 776        /* If the entire process was killed, we'll see SIGSYS. */
 777        ASSERT_TRUE(WIFSIGNALED(status));
 778        ASSERT_EQ(SIGSYS, WTERMSIG(status));
 779}
 780
 781/* TODO(wad) add 64-bit versus 32-bit arg tests. */
 782TEST(arg_out_of_range)
 783{
 784        struct sock_filter filter[] = {
 785                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
 786                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 787        };
 788        struct sock_fprog prog = {
 789                .len = (unsigned short)ARRAY_SIZE(filter),
 790                .filter = filter,
 791        };
 792        long ret;
 793
 794        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 795        ASSERT_EQ(0, ret);
 796
 797        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 798        EXPECT_EQ(-1, ret);
 799        EXPECT_EQ(EINVAL, errno);
 800}
 801
 802#define ERRNO_FILTER(name, errno)                                       \
 803        struct sock_filter _read_filter_##name[] = {                    \
 804                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,                          \
 805                        offsetof(struct seccomp_data, nr)),             \
 806                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),       \
 807                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),     \
 808                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),             \
 809        };                                                              \
 810        struct sock_fprog prog_##name = {                               \
 811                .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
 812                .filter = _read_filter_##name,                          \
 813        }
 814
 815/* Make sure basic errno values are correctly passed through a filter. */
 816TEST(ERRNO_valid)
 817{
 818        ERRNO_FILTER(valid, E2BIG);
 819        long ret;
 820        pid_t parent = getppid();
 821
 822        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 823        ASSERT_EQ(0, ret);
 824
 825        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 826        ASSERT_EQ(0, ret);
 827
 828        EXPECT_EQ(parent, syscall(__NR_getppid));
 829        EXPECT_EQ(-1, read(0, NULL, 0));
 830        EXPECT_EQ(E2BIG, errno);
 831}
 832
 833/* Make sure an errno of zero is correctly handled by the arch code. */
 834TEST(ERRNO_zero)
 835{
 836        ERRNO_FILTER(zero, 0);
 837        long ret;
 838        pid_t parent = getppid();
 839
 840        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 841        ASSERT_EQ(0, ret);
 842
 843        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 844        ASSERT_EQ(0, ret);
 845
 846        EXPECT_EQ(parent, syscall(__NR_getppid));
 847        /* "errno" of 0 is ok. */
 848        EXPECT_EQ(0, read(0, NULL, 0));
 849}
 850
 851/*
 852 * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
 853 * This tests that the errno value gets capped correctly, fixed by
 854 * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
 855 */
 856TEST(ERRNO_capped)
 857{
 858        ERRNO_FILTER(capped, 4096);
 859        long ret;
 860        pid_t parent = getppid();
 861
 862        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 863        ASSERT_EQ(0, ret);
 864
 865        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 866        ASSERT_EQ(0, ret);
 867
 868        EXPECT_EQ(parent, syscall(__NR_getppid));
 869        EXPECT_EQ(-1, read(0, NULL, 0));
 870        EXPECT_EQ(4095, errno);
 871}
 872
 873/*
 874 * Filters are processed in reverse order: last applied is executed first.
 875 * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
 876 * SECCOMP_RET_DATA mask results will follow the most recently applied
 877 * matching filter return (and not the lowest or highest value).
 878 */
 879TEST(ERRNO_order)
 880{
 881        ERRNO_FILTER(first,  11);
 882        ERRNO_FILTER(second, 13);
 883        ERRNO_FILTER(third,  12);
 884        long ret;
 885        pid_t parent = getppid();
 886
 887        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 888        ASSERT_EQ(0, ret);
 889
 890        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
 891        ASSERT_EQ(0, ret);
 892
 893        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
 894        ASSERT_EQ(0, ret);
 895
 896        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
 897        ASSERT_EQ(0, ret);
 898
 899        EXPECT_EQ(parent, syscall(__NR_getppid));
 900        EXPECT_EQ(-1, read(0, NULL, 0));
 901        EXPECT_EQ(12, errno);
 902}
 903
 904FIXTURE_DATA(TRAP) {
 905        struct sock_fprog prog;
 906};
 907
 908FIXTURE_SETUP(TRAP)
 909{
 910        struct sock_filter filter[] = {
 911                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 912                        offsetof(struct seccomp_data, nr)),
 913                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 914                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 915                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 916        };
 917
 918        memset(&self->prog, 0, sizeof(self->prog));
 919        self->prog.filter = malloc(sizeof(filter));
 920        ASSERT_NE(NULL, self->prog.filter);
 921        memcpy(self->prog.filter, filter, sizeof(filter));
 922        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
 923}
 924
 925FIXTURE_TEARDOWN(TRAP)
 926{
 927        if (self->prog.filter)
 928                free(self->prog.filter);
 929}
 930
 931TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
 932{
 933        long ret;
 934
 935        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 936        ASSERT_EQ(0, ret);
 937
 938        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 939        ASSERT_EQ(0, ret);
 940        syscall(__NR_getpid);
 941}
 942
 943/* Ensure that SIGSYS overrides SIG_IGN */
 944TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 945{
 946        long ret;
 947
 948        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 949        ASSERT_EQ(0, ret);
 950
 951        signal(SIGSYS, SIG_IGN);
 952
 953        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 954        ASSERT_EQ(0, ret);
 955        syscall(__NR_getpid);
 956}
 957
 958static siginfo_t TRAP_info;
 959static volatile int TRAP_nr;
 960static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 961{
 962        memcpy(&TRAP_info, info, sizeof(TRAP_info));
 963        TRAP_nr = nr;
 964}
 965
 966TEST_F(TRAP, handler)
 967{
 968        int ret, test;
 969        struct sigaction act;
 970        sigset_t mask;
 971
 972        memset(&act, 0, sizeof(act));
 973        sigemptyset(&mask);
 974        sigaddset(&mask, SIGSYS);
 975
 976        act.sa_sigaction = &TRAP_action;
 977        act.sa_flags = SA_SIGINFO;
 978        ret = sigaction(SIGSYS, &act, NULL);
 979        ASSERT_EQ(0, ret) {
 980                TH_LOG("sigaction failed");
 981        }
 982        ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
 983        ASSERT_EQ(0, ret) {
 984                TH_LOG("sigprocmask failed");
 985        }
 986
 987        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 988        ASSERT_EQ(0, ret);
 989        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 990        ASSERT_EQ(0, ret);
 991        TRAP_nr = 0;
 992        memset(&TRAP_info, 0, sizeof(TRAP_info));
 993        /* Expect the registers to be rolled back. (nr = error) may vary
 994         * based on arch. */
 995        ret = syscall(__NR_getpid);
 996        /* Silence gcc warning about volatile. */
 997        test = TRAP_nr;
 998        EXPECT_EQ(SIGSYS, test);
 999        struct local_sigsys {
1000                void *_call_addr;       /* calling user insn */
1001                int _syscall;           /* triggering system call number */
1002                unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
1003        } *sigsys = (struct local_sigsys *)
1004#ifdef si_syscall
1005                &(TRAP_info.si_call_addr);
1006#else
1007                &TRAP_info.si_pid;
1008#endif
1009        EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1010        /* Make sure arch is non-zero. */
1011        EXPECT_NE(0, sigsys->_arch);
1012        EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1013}
1014
1015FIXTURE_DATA(precedence) {
1016        struct sock_fprog allow;
1017        struct sock_fprog log;
1018        struct sock_fprog trace;
1019        struct sock_fprog error;
1020        struct sock_fprog trap;
1021        struct sock_fprog kill;
1022};
1023
1024FIXTURE_SETUP(precedence)
1025{
1026        struct sock_filter allow_insns[] = {
1027                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1028        };
1029        struct sock_filter log_insns[] = {
1030                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1031                        offsetof(struct seccomp_data, nr)),
1032                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1033                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1034                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1035        };
1036        struct sock_filter trace_insns[] = {
1037                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1038                        offsetof(struct seccomp_data, nr)),
1039                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1040                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1041                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1042        };
1043        struct sock_filter error_insns[] = {
1044                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1045                        offsetof(struct seccomp_data, nr)),
1046                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1047                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1048                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1049        };
1050        struct sock_filter trap_insns[] = {
1051                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1052                        offsetof(struct seccomp_data, nr)),
1053                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1054                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1055                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1056        };
1057        struct sock_filter kill_insns[] = {
1058                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1059                        offsetof(struct seccomp_data, nr)),
1060                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1061                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1062                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1063        };
1064
1065        memset(self, 0, sizeof(*self));
1066#define FILTER_ALLOC(_x) \
1067        self->_x.filter = malloc(sizeof(_x##_insns)); \
1068        ASSERT_NE(NULL, self->_x.filter); \
1069        memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1070        self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1071        FILTER_ALLOC(allow);
1072        FILTER_ALLOC(log);
1073        FILTER_ALLOC(trace);
1074        FILTER_ALLOC(error);
1075        FILTER_ALLOC(trap);
1076        FILTER_ALLOC(kill);
1077}
1078
1079FIXTURE_TEARDOWN(precedence)
1080{
1081#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1082        FILTER_FREE(allow);
1083        FILTER_FREE(log);
1084        FILTER_FREE(trace);
1085        FILTER_FREE(error);
1086        FILTER_FREE(trap);
1087        FILTER_FREE(kill);
1088}
1089
1090TEST_F(precedence, allow_ok)
1091{
1092        pid_t parent, res = 0;
1093        long ret;
1094
1095        parent = getppid();
1096        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1097        ASSERT_EQ(0, ret);
1098
1099        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1100        ASSERT_EQ(0, ret);
1101        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1102        ASSERT_EQ(0, ret);
1103        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1104        ASSERT_EQ(0, ret);
1105        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1106        ASSERT_EQ(0, ret);
1107        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1108        ASSERT_EQ(0, ret);
1109        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1110        ASSERT_EQ(0, ret);
1111        /* Should work just fine. */
1112        res = syscall(__NR_getppid);
1113        EXPECT_EQ(parent, res);
1114}
1115
1116TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1117{
1118        pid_t parent, res = 0;
1119        long ret;
1120
1121        parent = getppid();
1122        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1123        ASSERT_EQ(0, ret);
1124
1125        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1126        ASSERT_EQ(0, ret);
1127        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1128        ASSERT_EQ(0, ret);
1129        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1130        ASSERT_EQ(0, ret);
1131        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1132        ASSERT_EQ(0, ret);
1133        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1134        ASSERT_EQ(0, ret);
1135        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1136        ASSERT_EQ(0, ret);
1137        /* Should work just fine. */
1138        res = syscall(__NR_getppid);
1139        EXPECT_EQ(parent, res);
1140        /* getpid() should never return. */
1141        res = syscall(__NR_getpid);
1142        EXPECT_EQ(0, res);
1143}
1144
1145TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1146{
1147        pid_t parent;
1148        long ret;
1149
1150        parent = getppid();
1151        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1152        ASSERT_EQ(0, ret);
1153
1154        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1155        ASSERT_EQ(0, ret);
1156        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1157        ASSERT_EQ(0, ret);
1158        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1159        ASSERT_EQ(0, ret);
1160        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1161        ASSERT_EQ(0, ret);
1162        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1163        ASSERT_EQ(0, ret);
1164        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1165        ASSERT_EQ(0, ret);
1166        /* Should work just fine. */
1167        EXPECT_EQ(parent, syscall(__NR_getppid));
1168        /* getpid() should never return. */
1169        EXPECT_EQ(0, syscall(__NR_getpid));
1170}
1171
1172TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1173{
1174        pid_t parent;
1175        long ret;
1176
1177        parent = getppid();
1178        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1179        ASSERT_EQ(0, ret);
1180
1181        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1182        ASSERT_EQ(0, ret);
1183        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1184        ASSERT_EQ(0, ret);
1185        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1186        ASSERT_EQ(0, ret);
1187        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1188        ASSERT_EQ(0, ret);
1189        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1190        ASSERT_EQ(0, ret);
1191        /* Should work just fine. */
1192        EXPECT_EQ(parent, syscall(__NR_getppid));
1193        /* getpid() should never return. */
1194        EXPECT_EQ(0, syscall(__NR_getpid));
1195}
1196
1197TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1198{
1199        pid_t parent;
1200        long ret;
1201
1202        parent = getppid();
1203        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1204        ASSERT_EQ(0, ret);
1205
1206        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1207        ASSERT_EQ(0, ret);
1208        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1209        ASSERT_EQ(0, ret);
1210        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1211        ASSERT_EQ(0, ret);
1212        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1213        ASSERT_EQ(0, ret);
1214        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1215        ASSERT_EQ(0, ret);
1216        /* Should work just fine. */
1217        EXPECT_EQ(parent, syscall(__NR_getppid));
1218        /* getpid() should never return. */
1219        EXPECT_EQ(0, syscall(__NR_getpid));
1220}
1221
1222TEST_F(precedence, errno_is_third)
1223{
1224        pid_t parent;
1225        long ret;
1226
1227        parent = getppid();
1228        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1229        ASSERT_EQ(0, ret);
1230
1231        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1232        ASSERT_EQ(0, ret);
1233        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1234        ASSERT_EQ(0, ret);
1235        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1236        ASSERT_EQ(0, ret);
1237        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1238        ASSERT_EQ(0, ret);
1239        /* Should work just fine. */
1240        EXPECT_EQ(parent, syscall(__NR_getppid));
1241        EXPECT_EQ(0, syscall(__NR_getpid));
1242}
1243
1244TEST_F(precedence, errno_is_third_in_any_order)
1245{
1246        pid_t parent;
1247        long ret;
1248
1249        parent = getppid();
1250        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1251        ASSERT_EQ(0, ret);
1252
1253        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1254        ASSERT_EQ(0, ret);
1255        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1256        ASSERT_EQ(0, ret);
1257        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1258        ASSERT_EQ(0, ret);
1259        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1260        ASSERT_EQ(0, ret);
1261        /* Should work just fine. */
1262        EXPECT_EQ(parent, syscall(__NR_getppid));
1263        EXPECT_EQ(0, syscall(__NR_getpid));
1264}
1265
1266TEST_F(precedence, trace_is_fourth)
1267{
1268        pid_t parent;
1269        long ret;
1270
1271        parent = getppid();
1272        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1273        ASSERT_EQ(0, ret);
1274
1275        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1276        ASSERT_EQ(0, ret);
1277        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1278        ASSERT_EQ(0, ret);
1279        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1280        ASSERT_EQ(0, ret);
1281        /* Should work just fine. */
1282        EXPECT_EQ(parent, syscall(__NR_getppid));
1283        /* No ptracer */
1284        EXPECT_EQ(-1, syscall(__NR_getpid));
1285}
1286
1287TEST_F(precedence, trace_is_fourth_in_any_order)
1288{
1289        pid_t parent;
1290        long ret;
1291
1292        parent = getppid();
1293        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1294        ASSERT_EQ(0, ret);
1295
1296        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1297        ASSERT_EQ(0, ret);
1298        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1299        ASSERT_EQ(0, ret);
1300        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1301        ASSERT_EQ(0, ret);
1302        /* Should work just fine. */
1303        EXPECT_EQ(parent, syscall(__NR_getppid));
1304        /* No ptracer */
1305        EXPECT_EQ(-1, syscall(__NR_getpid));
1306}
1307
1308TEST_F(precedence, log_is_fifth)
1309{
1310        pid_t mypid, parent;
1311        long ret;
1312
1313        mypid = getpid();
1314        parent = getppid();
1315        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1316        ASSERT_EQ(0, ret);
1317
1318        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1319        ASSERT_EQ(0, ret);
1320        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1321        ASSERT_EQ(0, ret);
1322        /* Should work just fine. */
1323        EXPECT_EQ(parent, syscall(__NR_getppid));
1324        /* Should also work just fine */
1325        EXPECT_EQ(mypid, syscall(__NR_getpid));
1326}
1327
1328TEST_F(precedence, log_is_fifth_in_any_order)
1329{
1330        pid_t mypid, parent;
1331        long ret;
1332
1333        mypid = getpid();
1334        parent = getppid();
1335        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1336        ASSERT_EQ(0, ret);
1337
1338        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1339        ASSERT_EQ(0, ret);
1340        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1341        ASSERT_EQ(0, ret);
1342        /* Should work just fine. */
1343        EXPECT_EQ(parent, syscall(__NR_getppid));
1344        /* Should also work just fine */
1345        EXPECT_EQ(mypid, syscall(__NR_getpid));
1346}
1347
1348#ifndef PTRACE_O_TRACESECCOMP
1349#define PTRACE_O_TRACESECCOMP   0x00000080
1350#endif
1351
1352/* Catch the Ubuntu 12.04 value error. */
1353#if PTRACE_EVENT_SECCOMP != 7
1354#undef PTRACE_EVENT_SECCOMP
1355#endif
1356
1357#ifndef PTRACE_EVENT_SECCOMP
1358#define PTRACE_EVENT_SECCOMP 7
1359#endif
1360
1361#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1362bool tracer_running;
1363void tracer_stop(int sig)
1364{
1365        tracer_running = false;
1366}
1367
1368typedef void tracer_func_t(struct __test_metadata *_metadata,
1369                           pid_t tracee, int status, void *args);
1370
1371void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1372            tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1373{
1374        int ret = -1;
1375        struct sigaction action = {
1376                .sa_handler = tracer_stop,
1377        };
1378
1379        /* Allow external shutdown. */
1380        tracer_running = true;
1381        ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1382
1383        errno = 0;
1384        while (ret == -1 && errno != EINVAL)
1385                ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1386        ASSERT_EQ(0, ret) {
1387                kill(tracee, SIGKILL);
1388        }
1389        /* Wait for attach stop */
1390        wait(NULL);
1391
1392        ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1393                                                      PTRACE_O_TRACESYSGOOD :
1394                                                      PTRACE_O_TRACESECCOMP);
1395        ASSERT_EQ(0, ret) {
1396                TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1397                kill(tracee, SIGKILL);
1398        }
1399        ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1400                     tracee, NULL, 0);
1401        ASSERT_EQ(0, ret);
1402
1403        /* Unblock the tracee */
1404        ASSERT_EQ(1, write(fd, "A", 1));
1405        ASSERT_EQ(0, close(fd));
1406
1407        /* Run until we're shut down. Must assert to stop execution. */
1408        while (tracer_running) {
1409                int status;
1410
1411                if (wait(&status) != tracee)
1412                        continue;
1413                if (WIFSIGNALED(status) || WIFEXITED(status))
1414                        /* Child is dead. Time to go. */
1415                        return;
1416
1417                /* Check if this is a seccomp event. */
1418                ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1419
1420                tracer_func(_metadata, tracee, status, args);
1421
1422                ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1423                             tracee, NULL, 0);
1424                ASSERT_EQ(0, ret);
1425        }
1426        /* Directly report the status of our test harness results. */
1427        syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1428}
1429
1430/* Common tracer setup/teardown functions. */
1431void cont_handler(int num)
1432{ }
1433pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1434                          tracer_func_t func, void *args, bool ptrace_syscall)
1435{
1436        char sync;
1437        int pipefd[2];
1438        pid_t tracer_pid;
1439        pid_t tracee = getpid();
1440
1441        /* Setup a pipe for clean synchronization. */
1442        ASSERT_EQ(0, pipe(pipefd));
1443
1444        /* Fork a child which we'll promote to tracer */
1445        tracer_pid = fork();
1446        ASSERT_LE(0, tracer_pid);
1447        signal(SIGALRM, cont_handler);
1448        if (tracer_pid == 0) {
1449                close(pipefd[0]);
1450                start_tracer(_metadata, pipefd[1], tracee, func, args,
1451                             ptrace_syscall);
1452                syscall(__NR_exit, 0);
1453        }
1454        close(pipefd[1]);
1455        prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1456        read(pipefd[0], &sync, 1);
1457        close(pipefd[0]);
1458
1459        return tracer_pid;
1460}
1461void teardown_trace_fixture(struct __test_metadata *_metadata,
1462                            pid_t tracer)
1463{
1464        if (tracer) {
1465                int status;
1466                /*
1467                 * Extract the exit code from the other process and
1468                 * adopt it for ourselves in case its asserts failed.
1469                 */
1470                ASSERT_EQ(0, kill(tracer, SIGUSR1));
1471                ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1472                if (WEXITSTATUS(status))
1473                        _metadata->passed = 0;
1474        }
1475}
1476
1477/* "poke" tracer arguments and function. */
1478struct tracer_args_poke_t {
1479        unsigned long poke_addr;
1480};
1481
1482void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1483                 void *args)
1484{
1485        int ret;
1486        unsigned long msg;
1487        struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1488
1489        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1490        EXPECT_EQ(0, ret);
1491        /* If this fails, don't try to recover. */
1492        ASSERT_EQ(0x1001, msg) {
1493                kill(tracee, SIGKILL);
1494        }
1495        /*
1496         * Poke in the message.
1497         * Registers are not touched to try to keep this relatively arch
1498         * agnostic.
1499         */
1500        ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1501        EXPECT_EQ(0, ret);
1502}
1503
1504FIXTURE_DATA(TRACE_poke) {
1505        struct sock_fprog prog;
1506        pid_t tracer;
1507        long poked;
1508        struct tracer_args_poke_t tracer_args;
1509};
1510
1511FIXTURE_SETUP(TRACE_poke)
1512{
1513        struct sock_filter filter[] = {
1514                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1515                        offsetof(struct seccomp_data, nr)),
1516                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1517                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1518                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1519        };
1520
1521        self->poked = 0;
1522        memset(&self->prog, 0, sizeof(self->prog));
1523        self->prog.filter = malloc(sizeof(filter));
1524        ASSERT_NE(NULL, self->prog.filter);
1525        memcpy(self->prog.filter, filter, sizeof(filter));
1526        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1527
1528        /* Set up tracer args. */
1529        self->tracer_args.poke_addr = (unsigned long)&self->poked;
1530
1531        /* Launch tracer. */
1532        self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1533                                           &self->tracer_args, false);
1534}
1535
1536FIXTURE_TEARDOWN(TRACE_poke)
1537{
1538        teardown_trace_fixture(_metadata, self->tracer);
1539        if (self->prog.filter)
1540                free(self->prog.filter);
1541}
1542
1543TEST_F(TRACE_poke, read_has_side_effects)
1544{
1545        ssize_t ret;
1546
1547        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1548        ASSERT_EQ(0, ret);
1549
1550        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1551        ASSERT_EQ(0, ret);
1552
1553        EXPECT_EQ(0, self->poked);
1554        ret = read(-1, NULL, 0);
1555        EXPECT_EQ(-1, ret);
1556        EXPECT_EQ(0x1001, self->poked);
1557}
1558
1559TEST_F(TRACE_poke, getpid_runs_normally)
1560{
1561        long ret;
1562
1563        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1564        ASSERT_EQ(0, ret);
1565
1566        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1567        ASSERT_EQ(0, ret);
1568
1569        EXPECT_EQ(0, self->poked);
1570        EXPECT_NE(0, syscall(__NR_getpid));
1571        EXPECT_EQ(0, self->poked);
1572}
1573
1574#if defined(__x86_64__)
1575# define ARCH_REGS      struct user_regs_struct
1576# define SYSCALL_NUM    orig_rax
1577# define SYSCALL_RET    rax
1578#elif defined(__i386__)
1579# define ARCH_REGS      struct user_regs_struct
1580# define SYSCALL_NUM    orig_eax
1581# define SYSCALL_RET    eax
1582#elif defined(__arm__)
1583# define ARCH_REGS      struct pt_regs
1584# define SYSCALL_NUM    ARM_r7
1585# define SYSCALL_RET    ARM_r0
1586#elif defined(__aarch64__)
1587# define ARCH_REGS      struct user_pt_regs
1588# define SYSCALL_NUM    regs[8]
1589# define SYSCALL_RET    regs[0]
1590#elif defined(__hppa__)
1591# define ARCH_REGS      struct user_regs_struct
1592# define SYSCALL_NUM    gr[20]
1593# define SYSCALL_RET    gr[28]
1594#elif defined(__powerpc__)
1595# define ARCH_REGS      struct pt_regs
1596# define SYSCALL_NUM    gpr[0]
1597# define SYSCALL_RET    gpr[3]
1598#elif defined(__s390__)
1599# define ARCH_REGS     s390_regs
1600# define SYSCALL_NUM   gprs[2]
1601# define SYSCALL_RET   gprs[2]
1602#elif defined(__mips__)
1603# define ARCH_REGS      struct pt_regs
1604# define SYSCALL_NUM    regs[2]
1605# define SYSCALL_SYSCALL_NUM regs[4]
1606# define SYSCALL_RET    regs[2]
1607# define SYSCALL_NUM_RET_SHARE_REG
1608#else
1609# error "Do not know how to find your architecture's registers and syscalls"
1610#endif
1611
1612/* When the syscall return can't be changed, stub out the tests for it. */
1613#ifdef SYSCALL_NUM_RET_SHARE_REG
1614# define EXPECT_SYSCALL_RETURN(val, action)     EXPECT_EQ(-1, action)
1615#else
1616# define EXPECT_SYSCALL_RETURN(val, action)             \
1617        do {                                            \
1618                errno = 0;                              \
1619                if (val < 0) {                          \
1620                        EXPECT_EQ(-1, action);          \
1621                        EXPECT_EQ(-(val), errno);       \
1622                } else {                                \
1623                        EXPECT_EQ(val, action);         \
1624                }                                       \
1625        } while (0)
1626#endif
1627
1628/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1629 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1630 */
1631#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1632#define HAVE_GETREGS
1633#endif
1634
1635/* Architecture-specific syscall fetching routine. */
1636int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1637{
1638        ARCH_REGS regs;
1639#ifdef HAVE_GETREGS
1640        EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1641                TH_LOG("PTRACE_GETREGS failed");
1642                return -1;
1643        }
1644#else
1645        struct iovec iov;
1646
1647        iov.iov_base = &regs;
1648        iov.iov_len = sizeof(regs);
1649        EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1650                TH_LOG("PTRACE_GETREGSET failed");
1651                return -1;
1652        }
1653#endif
1654
1655#if defined(__mips__)
1656        if (regs.SYSCALL_NUM == __NR_O32_Linux)
1657                return regs.SYSCALL_SYSCALL_NUM;
1658#endif
1659        return regs.SYSCALL_NUM;
1660}
1661
1662/* Architecture-specific syscall changing routine. */
1663void change_syscall(struct __test_metadata *_metadata,
1664                    pid_t tracee, int syscall, int result)
1665{
1666        int ret;
1667        ARCH_REGS regs;
1668#ifdef HAVE_GETREGS
1669        ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1670#else
1671        struct iovec iov;
1672        iov.iov_base = &regs;
1673        iov.iov_len = sizeof(regs);
1674        ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1675#endif
1676        EXPECT_EQ(0, ret) {}
1677
1678#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1679    defined(__s390__) || defined(__hppa__)
1680        {
1681                regs.SYSCALL_NUM = syscall;
1682        }
1683#elif defined(__mips__)
1684        {
1685                if (regs.SYSCALL_NUM == __NR_O32_Linux)
1686                        regs.SYSCALL_SYSCALL_NUM = syscall;
1687                else
1688                        regs.SYSCALL_NUM = syscall;
1689        }
1690
1691#elif defined(__arm__)
1692# ifndef PTRACE_SET_SYSCALL
1693#  define PTRACE_SET_SYSCALL   23
1694# endif
1695        {
1696                ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1697                EXPECT_EQ(0, ret);
1698        }
1699
1700#elif defined(__aarch64__)
1701# ifndef NT_ARM_SYSTEM_CALL
1702#  define NT_ARM_SYSTEM_CALL 0x404
1703# endif
1704        {
1705                iov.iov_base = &syscall;
1706                iov.iov_len = sizeof(syscall);
1707                ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1708                             &iov);
1709                EXPECT_EQ(0, ret);
1710        }
1711
1712#else
1713        ASSERT_EQ(1, 0) {
1714                TH_LOG("How is the syscall changed on this architecture?");
1715        }
1716#endif
1717
1718        /* If syscall is skipped, change return value. */
1719        if (syscall == -1)
1720#ifdef SYSCALL_NUM_RET_SHARE_REG
1721                TH_LOG("Can't modify syscall return on this architecture");
1722#else
1723                regs.SYSCALL_RET = result;
1724#endif
1725
1726#ifdef HAVE_GETREGS
1727        ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1728#else
1729        iov.iov_base = &regs;
1730        iov.iov_len = sizeof(regs);
1731        ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1732#endif
1733        EXPECT_EQ(0, ret);
1734}
1735
1736void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1737                    int status, void *args)
1738{
1739        int ret;
1740        unsigned long msg;
1741
1742        /* Make sure we got the right message. */
1743        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1744        EXPECT_EQ(0, ret);
1745
1746        /* Validate and take action on expected syscalls. */
1747        switch (msg) {
1748        case 0x1002:
1749                /* change getpid to getppid. */
1750                EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1751                change_syscall(_metadata, tracee, __NR_getppid, 0);
1752                break;
1753        case 0x1003:
1754                /* skip gettid with valid return code. */
1755                EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1756                change_syscall(_metadata, tracee, -1, 45000);
1757                break;
1758        case 0x1004:
1759                /* skip openat with error. */
1760                EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1761                change_syscall(_metadata, tracee, -1, -ESRCH);
1762                break;
1763        case 0x1005:
1764                /* do nothing (allow getppid) */
1765                EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1766                break;
1767        default:
1768                EXPECT_EQ(0, msg) {
1769                        TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1770                        kill(tracee, SIGKILL);
1771                }
1772        }
1773
1774}
1775
1776void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1777                   int status, void *args)
1778{
1779        int ret, nr;
1780        unsigned long msg;
1781        static bool entry;
1782
1783        /*
1784         * The traditional way to tell PTRACE_SYSCALL entry/exit
1785         * is by counting.
1786         */
1787        entry = !entry;
1788
1789        /* Make sure we got an appropriate message. */
1790        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1791        EXPECT_EQ(0, ret);
1792        EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1793                        : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1794
1795        if (!entry)
1796                return;
1797
1798        nr = get_syscall(_metadata, tracee);
1799
1800        if (nr == __NR_getpid)
1801                change_syscall(_metadata, tracee, __NR_getppid, 0);
1802        if (nr == __NR_gettid)
1803                change_syscall(_metadata, tracee, -1, 45000);
1804        if (nr == __NR_openat)
1805                change_syscall(_metadata, tracee, -1, -ESRCH);
1806}
1807
1808FIXTURE_DATA(TRACE_syscall) {
1809        struct sock_fprog prog;
1810        pid_t tracer, mytid, mypid, parent;
1811};
1812
1813FIXTURE_SETUP(TRACE_syscall)
1814{
1815        struct sock_filter filter[] = {
1816                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1817                        offsetof(struct seccomp_data, nr)),
1818                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1819                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1820                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1821                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1822                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1823                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1824                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1825                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1826                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1827        };
1828
1829        memset(&self->prog, 0, sizeof(self->prog));
1830        self->prog.filter = malloc(sizeof(filter));
1831        ASSERT_NE(NULL, self->prog.filter);
1832        memcpy(self->prog.filter, filter, sizeof(filter));
1833        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1834
1835        /* Prepare some testable syscall results. */
1836        self->mytid = syscall(__NR_gettid);
1837        ASSERT_GT(self->mytid, 0);
1838        ASSERT_NE(self->mytid, 1) {
1839                TH_LOG("Running this test as init is not supported. :)");
1840        }
1841
1842        self->mypid = getpid();
1843        ASSERT_GT(self->mypid, 0);
1844        ASSERT_EQ(self->mytid, self->mypid);
1845
1846        self->parent = getppid();
1847        ASSERT_GT(self->parent, 0);
1848        ASSERT_NE(self->parent, self->mypid);
1849
1850        /* Launch tracer. */
1851        self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1852                                           false);
1853}
1854
1855FIXTURE_TEARDOWN(TRACE_syscall)
1856{
1857        teardown_trace_fixture(_metadata, self->tracer);
1858        if (self->prog.filter)
1859                free(self->prog.filter);
1860}
1861
1862TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1863{
1864        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1865        teardown_trace_fixture(_metadata, self->tracer);
1866        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1867                                           true);
1868
1869        /* Tracer will redirect getpid to getppid. */
1870        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1871}
1872
1873TEST_F(TRACE_syscall, ptrace_syscall_errno)
1874{
1875        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1876        teardown_trace_fixture(_metadata, self->tracer);
1877        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1878                                           true);
1879
1880        /* Tracer should skip the open syscall, resulting in ESRCH. */
1881        EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1882}
1883
1884TEST_F(TRACE_syscall, ptrace_syscall_faked)
1885{
1886        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1887        teardown_trace_fixture(_metadata, self->tracer);
1888        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1889                                           true);
1890
1891        /* Tracer should skip the gettid syscall, resulting fake pid. */
1892        EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1893}
1894
1895TEST_F(TRACE_syscall, syscall_allowed)
1896{
1897        long ret;
1898
1899        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1900        ASSERT_EQ(0, ret);
1901
1902        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1903        ASSERT_EQ(0, ret);
1904
1905        /* getppid works as expected (no changes). */
1906        EXPECT_EQ(self->parent, syscall(__NR_getppid));
1907        EXPECT_NE(self->mypid, syscall(__NR_getppid));
1908}
1909
1910TEST_F(TRACE_syscall, syscall_redirected)
1911{
1912        long ret;
1913
1914        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1915        ASSERT_EQ(0, ret);
1916
1917        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1918        ASSERT_EQ(0, ret);
1919
1920        /* getpid has been redirected to getppid as expected. */
1921        EXPECT_EQ(self->parent, syscall(__NR_getpid));
1922        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1923}
1924
1925TEST_F(TRACE_syscall, syscall_errno)
1926{
1927        long ret;
1928
1929        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1930        ASSERT_EQ(0, ret);
1931
1932        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1933        ASSERT_EQ(0, ret);
1934
1935        /* openat has been skipped and an errno return. */
1936        EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1937}
1938
1939TEST_F(TRACE_syscall, syscall_faked)
1940{
1941        long ret;
1942
1943        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1944        ASSERT_EQ(0, ret);
1945
1946        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1947        ASSERT_EQ(0, ret);
1948
1949        /* gettid has been skipped and an altered return value stored. */
1950        EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1951}
1952
1953TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1954{
1955        struct sock_filter filter[] = {
1956                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1957                        offsetof(struct seccomp_data, nr)),
1958                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1959                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1960                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1961        };
1962        struct sock_fprog prog = {
1963                .len = (unsigned short)ARRAY_SIZE(filter),
1964                .filter = filter,
1965        };
1966        long ret;
1967
1968        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1969        ASSERT_EQ(0, ret);
1970
1971        /* Install fixture filter. */
1972        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1973        ASSERT_EQ(0, ret);
1974
1975        /* Install "errno on getppid" filter. */
1976        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1977        ASSERT_EQ(0, ret);
1978
1979        /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1980        errno = 0;
1981        EXPECT_EQ(-1, syscall(__NR_getpid));
1982        EXPECT_EQ(EPERM, errno);
1983}
1984
1985TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1986{
1987        struct sock_filter filter[] = {
1988                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1989                        offsetof(struct seccomp_data, nr)),
1990                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1991                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1992                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1993        };
1994        struct sock_fprog prog = {
1995                .len = (unsigned short)ARRAY_SIZE(filter),
1996                .filter = filter,
1997        };
1998        long ret;
1999
2000        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2001        ASSERT_EQ(0, ret);
2002
2003        /* Install fixture filter. */
2004        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2005        ASSERT_EQ(0, ret);
2006
2007        /* Install "death on getppid" filter. */
2008        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2009        ASSERT_EQ(0, ret);
2010
2011        /* Tracer will redirect getpid to getppid, and we should die. */
2012        EXPECT_NE(self->mypid, syscall(__NR_getpid));
2013}
2014
2015TEST_F(TRACE_syscall, skip_after_ptrace)
2016{
2017        struct sock_filter filter[] = {
2018                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2019                        offsetof(struct seccomp_data, nr)),
2020                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2021                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2022                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2023        };
2024        struct sock_fprog prog = {
2025                .len = (unsigned short)ARRAY_SIZE(filter),
2026                .filter = filter,
2027        };
2028        long ret;
2029
2030        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2031        teardown_trace_fixture(_metadata, self->tracer);
2032        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2033                                           true);
2034
2035        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2036        ASSERT_EQ(0, ret);
2037
2038        /* Install "errno on getppid" filter. */
2039        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2040        ASSERT_EQ(0, ret);
2041
2042        /* Tracer will redirect getpid to getppid, and we should see EPERM. */
2043        EXPECT_EQ(-1, syscall(__NR_getpid));
2044        EXPECT_EQ(EPERM, errno);
2045}
2046
2047TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2048{
2049        struct sock_filter filter[] = {
2050                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2051                        offsetof(struct seccomp_data, nr)),
2052                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2053                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2054                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2055        };
2056        struct sock_fprog prog = {
2057                .len = (unsigned short)ARRAY_SIZE(filter),
2058                .filter = filter,
2059        };
2060        long ret;
2061
2062        /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2063        teardown_trace_fixture(_metadata, self->tracer);
2064        self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2065                                           true);
2066
2067        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2068        ASSERT_EQ(0, ret);
2069
2070        /* Install "death on getppid" filter. */
2071        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2072        ASSERT_EQ(0, ret);
2073
2074        /* Tracer will redirect getpid to getppid, and we should die. */
2075        EXPECT_NE(self->mypid, syscall(__NR_getpid));
2076}
2077
2078TEST(seccomp_syscall)
2079{
2080        struct sock_filter filter[] = {
2081                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2082        };
2083        struct sock_fprog prog = {
2084                .len = (unsigned short)ARRAY_SIZE(filter),
2085                .filter = filter,
2086        };
2087        long ret;
2088
2089        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2090        ASSERT_EQ(0, ret) {
2091                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2092        }
2093
2094        /* Reject insane operation. */
2095        ret = seccomp(-1, 0, &prog);
2096        ASSERT_NE(ENOSYS, errno) {
2097                TH_LOG("Kernel does not support seccomp syscall!");
2098        }
2099        EXPECT_EQ(EINVAL, errno) {
2100                TH_LOG("Did not reject crazy op value!");
2101        }
2102
2103        /* Reject strict with flags or pointer. */
2104        ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2105        EXPECT_EQ(EINVAL, errno) {
2106                TH_LOG("Did not reject mode strict with flags!");
2107        }
2108        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2109        EXPECT_EQ(EINVAL, errno) {
2110                TH_LOG("Did not reject mode strict with uargs!");
2111        }
2112
2113        /* Reject insane args for filter. */
2114        ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2115        EXPECT_EQ(EINVAL, errno) {
2116                TH_LOG("Did not reject crazy filter flags!");
2117        }
2118        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2119        EXPECT_EQ(EFAULT, errno) {
2120                TH_LOG("Did not reject NULL filter!");
2121        }
2122
2123        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2124        EXPECT_EQ(0, errno) {
2125                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2126                        strerror(errno));
2127        }
2128}
2129
2130TEST(seccomp_syscall_mode_lock)
2131{
2132        struct sock_filter filter[] = {
2133                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2134        };
2135        struct sock_fprog prog = {
2136                .len = (unsigned short)ARRAY_SIZE(filter),
2137                .filter = filter,
2138        };
2139        long ret;
2140
2141        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2142        ASSERT_EQ(0, ret) {
2143                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2144        }
2145
2146        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2147        ASSERT_NE(ENOSYS, errno) {
2148                TH_LOG("Kernel does not support seccomp syscall!");
2149        }
2150        EXPECT_EQ(0, ret) {
2151                TH_LOG("Could not install filter!");
2152        }
2153
2154        /* Make sure neither entry point will switch to strict. */
2155        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2156        EXPECT_EQ(EINVAL, errno) {
2157                TH_LOG("Switched to mode strict!");
2158        }
2159
2160        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2161        EXPECT_EQ(EINVAL, errno) {
2162                TH_LOG("Switched to mode strict!");
2163        }
2164}
2165
2166/*
2167 * Test detection of known and unknown filter flags. Userspace needs to be able
2168 * to check if a filter flag is supported by the current kernel and a good way
2169 * of doing that is by attempting to enter filter mode, with the flag bit in
2170 * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2171 * that the flag is valid and EINVAL indicates that the flag is invalid.
2172 */
2173TEST(detect_seccomp_filter_flags)
2174{
2175        unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2176                                 SECCOMP_FILTER_FLAG_LOG,
2177                                 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2178                                 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2179        unsigned int exclusive[] = {
2180                                SECCOMP_FILTER_FLAG_TSYNC,
2181                                SECCOMP_FILTER_FLAG_NEW_LISTENER };
2182        unsigned int flag, all_flags, exclusive_mask;
2183        int i;
2184        long ret;
2185
2186        /* Test detection of individual known-good filter flags */
2187        for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2188                int bits = 0;
2189
2190                flag = flags[i];
2191                /* Make sure the flag is a single bit! */
2192                while (flag) {
2193                        if (flag & 0x1)
2194                                bits ++;
2195                        flag >>= 1;
2196                }
2197                ASSERT_EQ(1, bits);
2198                flag = flags[i];
2199
2200                ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2201                ASSERT_NE(ENOSYS, errno) {
2202                        TH_LOG("Kernel does not support seccomp syscall!");
2203                }
2204                EXPECT_EQ(-1, ret);
2205                EXPECT_EQ(EFAULT, errno) {
2206                        TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2207                               flag);
2208                }
2209
2210                all_flags |= flag;
2211        }
2212
2213        /*
2214         * Test detection of all known-good filter flags combined. But
2215         * for the exclusive flags we need to mask them out and try them
2216         * individually for the "all flags" testing.
2217         */
2218        exclusive_mask = 0;
2219        for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2220                exclusive_mask |= exclusive[i];
2221        for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2222                flag = all_flags & ~exclusive_mask;
2223                flag |= exclusive[i];
2224
2225                ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2226                EXPECT_EQ(-1, ret);
2227                EXPECT_EQ(EFAULT, errno) {
2228                        TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2229                               flag);
2230                }
2231        }
2232
2233        /* Test detection of an unknown filter flags, without exclusives. */
2234        flag = -1;
2235        flag &= ~exclusive_mask;
2236        ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2237        EXPECT_EQ(-1, ret);
2238        EXPECT_EQ(EINVAL, errno) {
2239                TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2240                       flag);
2241        }
2242
2243        /*
2244         * Test detection of an unknown filter flag that may simply need to be
2245         * added to this test
2246         */
2247        flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2248        ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2249        EXPECT_EQ(-1, ret);
2250        EXPECT_EQ(EINVAL, errno) {
2251                TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2252                       flag);
2253        }
2254}
2255
2256TEST(TSYNC_first)
2257{
2258        struct sock_filter filter[] = {
2259                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2260        };
2261        struct sock_fprog prog = {
2262                .len = (unsigned short)ARRAY_SIZE(filter),
2263                .filter = filter,
2264        };
2265        long ret;
2266
2267        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2268        ASSERT_EQ(0, ret) {
2269                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2270        }
2271
2272        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2273                      &prog);
2274        ASSERT_NE(ENOSYS, errno) {
2275                TH_LOG("Kernel does not support seccomp syscall!");
2276        }
2277        EXPECT_EQ(0, ret) {
2278                TH_LOG("Could not install initial filter with TSYNC!");
2279        }
2280}
2281
2282#define TSYNC_SIBLINGS 2
2283struct tsync_sibling {
2284        pthread_t tid;
2285        pid_t system_tid;
2286        sem_t *started;
2287        pthread_cond_t *cond;
2288        pthread_mutex_t *mutex;
2289        int diverge;
2290        int num_waits;
2291        struct sock_fprog *prog;
2292        struct __test_metadata *metadata;
2293};
2294
2295/*
2296 * To avoid joining joined threads (which is not allowed by Bionic),
2297 * make sure we both successfully join and clear the tid to skip a
2298 * later join attempt during fixture teardown. Any remaining threads
2299 * will be directly killed during teardown.
2300 */
2301#define PTHREAD_JOIN(tid, status)                                       \
2302        do {                                                            \
2303                int _rc = pthread_join(tid, status);                    \
2304                if (_rc) {                                              \
2305                        TH_LOG("pthread_join of tid %u failed: %d\n",   \
2306                                (unsigned int)tid, _rc);                \
2307                } else {                                                \
2308                        tid = 0;                                        \
2309                }                                                       \
2310        } while (0)
2311
2312FIXTURE_DATA(TSYNC) {
2313        struct sock_fprog root_prog, apply_prog;
2314        struct tsync_sibling sibling[TSYNC_SIBLINGS];
2315        sem_t started;
2316        pthread_cond_t cond;
2317        pthread_mutex_t mutex;
2318        int sibling_count;
2319};
2320
2321FIXTURE_SETUP(TSYNC)
2322{
2323        struct sock_filter root_filter[] = {
2324                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2325        };
2326        struct sock_filter apply_filter[] = {
2327                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2328                        offsetof(struct seccomp_data, nr)),
2329                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2330                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2331                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2332        };
2333
2334        memset(&self->root_prog, 0, sizeof(self->root_prog));
2335        memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2336        memset(&self->sibling, 0, sizeof(self->sibling));
2337        self->root_prog.filter = malloc(sizeof(root_filter));
2338        ASSERT_NE(NULL, self->root_prog.filter);
2339        memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2340        self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2341
2342        self->apply_prog.filter = malloc(sizeof(apply_filter));
2343        ASSERT_NE(NULL, self->apply_prog.filter);
2344        memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2345        self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2346
2347        self->sibling_count = 0;
2348        pthread_mutex_init(&self->mutex, NULL);
2349        pthread_cond_init(&self->cond, NULL);
2350        sem_init(&self->started, 0, 0);
2351        self->sibling[0].tid = 0;
2352        self->sibling[0].cond = &self->cond;
2353        self->sibling[0].started = &self->started;
2354        self->sibling[0].mutex = &self->mutex;
2355        self->sibling[0].diverge = 0;
2356        self->sibling[0].num_waits = 1;
2357        self->sibling[0].prog = &self->root_prog;
2358        self->sibling[0].metadata = _metadata;
2359        self->sibling[1].tid = 0;
2360        self->sibling[1].cond = &self->cond;
2361        self->sibling[1].started = &self->started;
2362        self->sibling[1].mutex = &self->mutex;
2363        self->sibling[1].diverge = 0;
2364        self->sibling[1].prog = &self->root_prog;
2365        self->sibling[1].num_waits = 1;
2366        self->sibling[1].metadata = _metadata;
2367}
2368
2369FIXTURE_TEARDOWN(TSYNC)
2370{
2371        int sib = 0;
2372
2373        if (self->root_prog.filter)
2374                free(self->root_prog.filter);
2375        if (self->apply_prog.filter)
2376                free(self->apply_prog.filter);
2377
2378        for ( ; sib < self->sibling_count; ++sib) {
2379                struct tsync_sibling *s = &self->sibling[sib];
2380
2381                if (!s->tid)
2382                        continue;
2383                /*
2384                 * If a thread is still running, it may be stuck, so hit
2385                 * it over the head really hard.
2386                 */
2387                pthread_kill(s->tid, 9);
2388        }
2389        pthread_mutex_destroy(&self->mutex);
2390        pthread_cond_destroy(&self->cond);
2391        sem_destroy(&self->started);
2392}
2393
2394void *tsync_sibling(void *data)
2395{
2396        long ret = 0;
2397        struct tsync_sibling *me = data;
2398
2399        me->system_tid = syscall(__NR_gettid);
2400
2401        pthread_mutex_lock(me->mutex);
2402        if (me->diverge) {
2403                /* Just re-apply the root prog to fork the tree */
2404                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2405                                me->prog, 0, 0);
2406        }
2407        sem_post(me->started);
2408        /* Return outside of started so parent notices failures. */
2409        if (ret) {
2410                pthread_mutex_unlock(me->mutex);
2411                return (void *)SIBLING_EXIT_FAILURE;
2412        }
2413        do {
2414                pthread_cond_wait(me->cond, me->mutex);
2415                me->num_waits = me->num_waits - 1;
2416        } while (me->num_waits);
2417        pthread_mutex_unlock(me->mutex);
2418
2419        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2420        if (!ret)
2421                return (void *)SIBLING_EXIT_NEWPRIVS;
2422        read(0, NULL, 0);
2423        return (void *)SIBLING_EXIT_UNKILLED;
2424}
2425
2426void tsync_start_sibling(struct tsync_sibling *sibling)
2427{
2428        pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2429}
2430
2431TEST_F(TSYNC, siblings_fail_prctl)
2432{
2433        long ret;
2434        void *status;
2435        struct sock_filter filter[] = {
2436                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2437                        offsetof(struct seccomp_data, nr)),
2438                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2439                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2440                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2441        };
2442        struct sock_fprog prog = {
2443                .len = (unsigned short)ARRAY_SIZE(filter),
2444                .filter = filter,
2445        };
2446
2447        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2448                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2449        }
2450
2451        /* Check prctl failure detection by requesting sib 0 diverge. */
2452        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2453        ASSERT_NE(ENOSYS, errno) {
2454                TH_LOG("Kernel does not support seccomp syscall!");
2455        }
2456        ASSERT_EQ(0, ret) {
2457                TH_LOG("setting filter failed");
2458        }
2459
2460        self->sibling[0].diverge = 1;
2461        tsync_start_sibling(&self->sibling[0]);
2462        tsync_start_sibling(&self->sibling[1]);
2463
2464        while (self->sibling_count < TSYNC_SIBLINGS) {
2465                sem_wait(&self->started);
2466                self->sibling_count++;
2467        }
2468
2469        /* Signal the threads to clean up*/
2470        pthread_mutex_lock(&self->mutex);
2471        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2472                TH_LOG("cond broadcast non-zero");
2473        }
2474        pthread_mutex_unlock(&self->mutex);
2475
2476        /* Ensure diverging sibling failed to call prctl. */
2477        PTHREAD_JOIN(self->sibling[0].tid, &status);
2478        EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2479        PTHREAD_JOIN(self->sibling[1].tid, &status);
2480        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2481}
2482
2483TEST_F(TSYNC, two_siblings_with_ancestor)
2484{
2485        long ret;
2486        void *status;
2487
2488        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2489                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2490        }
2491
2492        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2493        ASSERT_NE(ENOSYS, errno) {
2494                TH_LOG("Kernel does not support seccomp syscall!");
2495        }
2496        ASSERT_EQ(0, ret) {
2497                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2498        }
2499        tsync_start_sibling(&self->sibling[0]);
2500        tsync_start_sibling(&self->sibling[1]);
2501
2502        while (self->sibling_count < TSYNC_SIBLINGS) {
2503                sem_wait(&self->started);
2504                self->sibling_count++;
2505        }
2506
2507        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2508                      &self->apply_prog);
2509        ASSERT_EQ(0, ret) {
2510                TH_LOG("Could install filter on all threads!");
2511        }
2512        /* Tell the siblings to test the policy */
2513        pthread_mutex_lock(&self->mutex);
2514        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2515                TH_LOG("cond broadcast non-zero");
2516        }
2517        pthread_mutex_unlock(&self->mutex);
2518        /* Ensure they are both killed and don't exit cleanly. */
2519        PTHREAD_JOIN(self->sibling[0].tid, &status);
2520        EXPECT_EQ(0x0, (long)status);
2521        PTHREAD_JOIN(self->sibling[1].tid, &status);
2522        EXPECT_EQ(0x0, (long)status);
2523}
2524
2525TEST_F(TSYNC, two_sibling_want_nnp)
2526{
2527        void *status;
2528
2529        /* start siblings before any prctl() operations */
2530        tsync_start_sibling(&self->sibling[0]);
2531        tsync_start_sibling(&self->sibling[1]);
2532        while (self->sibling_count < TSYNC_SIBLINGS) {
2533                sem_wait(&self->started);
2534                self->sibling_count++;
2535        }
2536
2537        /* Tell the siblings to test no policy */
2538        pthread_mutex_lock(&self->mutex);
2539        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2540                TH_LOG("cond broadcast non-zero");
2541        }
2542        pthread_mutex_unlock(&self->mutex);
2543
2544        /* Ensure they are both upset about lacking nnp. */
2545        PTHREAD_JOIN(self->sibling[0].tid, &status);
2546        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2547        PTHREAD_JOIN(self->sibling[1].tid, &status);
2548        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2549}
2550
2551TEST_F(TSYNC, two_siblings_with_no_filter)
2552{
2553        long ret;
2554        void *status;
2555
2556        /* start siblings before any prctl() operations */
2557        tsync_start_sibling(&self->sibling[0]);
2558        tsync_start_sibling(&self->sibling[1]);
2559        while (self->sibling_count < TSYNC_SIBLINGS) {
2560                sem_wait(&self->started);
2561                self->sibling_count++;
2562        }
2563
2564        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2565                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2566        }
2567
2568        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2569                      &self->apply_prog);
2570        ASSERT_NE(ENOSYS, errno) {
2571                TH_LOG("Kernel does not support seccomp syscall!");
2572        }
2573        ASSERT_EQ(0, ret) {
2574                TH_LOG("Could install filter on all threads!");
2575        }
2576
2577        /* Tell the siblings to test the policy */
2578        pthread_mutex_lock(&self->mutex);
2579        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2580                TH_LOG("cond broadcast non-zero");
2581        }
2582        pthread_mutex_unlock(&self->mutex);
2583
2584        /* Ensure they are both killed and don't exit cleanly. */
2585        PTHREAD_JOIN(self->sibling[0].tid, &status);
2586        EXPECT_EQ(0x0, (long)status);
2587        PTHREAD_JOIN(self->sibling[1].tid, &status);
2588        EXPECT_EQ(0x0, (long)status);
2589}
2590
2591TEST_F(TSYNC, two_siblings_with_one_divergence)
2592{
2593        long ret;
2594        void *status;
2595
2596        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2597                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2598        }
2599
2600        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2601        ASSERT_NE(ENOSYS, errno) {
2602                TH_LOG("Kernel does not support seccomp syscall!");
2603        }
2604        ASSERT_EQ(0, ret) {
2605                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2606        }
2607        self->sibling[0].diverge = 1;
2608        tsync_start_sibling(&self->sibling[0]);
2609        tsync_start_sibling(&self->sibling[1]);
2610
2611        while (self->sibling_count < TSYNC_SIBLINGS) {
2612                sem_wait(&self->started);
2613                self->sibling_count++;
2614        }
2615
2616        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2617                      &self->apply_prog);
2618        ASSERT_EQ(self->sibling[0].system_tid, ret) {
2619                TH_LOG("Did not fail on diverged sibling.");
2620        }
2621
2622        /* Wake the threads */
2623        pthread_mutex_lock(&self->mutex);
2624        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2625                TH_LOG("cond broadcast non-zero");
2626        }
2627        pthread_mutex_unlock(&self->mutex);
2628
2629        /* Ensure they are both unkilled. */
2630        PTHREAD_JOIN(self->sibling[0].tid, &status);
2631        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2632        PTHREAD_JOIN(self->sibling[1].tid, &status);
2633        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2634}
2635
2636TEST_F(TSYNC, two_siblings_not_under_filter)
2637{
2638        long ret, sib;
2639        void *status;
2640        struct timespec delay = { .tv_nsec = 100000000 };
2641
2642        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2643                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2644        }
2645
2646        /*
2647         * Sibling 0 will have its own seccomp policy
2648         * and Sibling 1 will not be under seccomp at
2649         * all. Sibling 1 will enter seccomp and 0
2650         * will cause failure.
2651         */
2652        self->sibling[0].diverge = 1;
2653        tsync_start_sibling(&self->sibling[0]);
2654        tsync_start_sibling(&self->sibling[1]);
2655
2656        while (self->sibling_count < TSYNC_SIBLINGS) {
2657                sem_wait(&self->started);
2658                self->sibling_count++;
2659        }
2660
2661        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2662        ASSERT_NE(ENOSYS, errno) {
2663                TH_LOG("Kernel does not support seccomp syscall!");
2664        }
2665        ASSERT_EQ(0, ret) {
2666                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2667        }
2668
2669        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2670                      &self->apply_prog);
2671        ASSERT_EQ(ret, self->sibling[0].system_tid) {
2672                TH_LOG("Did not fail on diverged sibling.");
2673        }
2674        sib = 1;
2675        if (ret == self->sibling[0].system_tid)
2676                sib = 0;
2677
2678        pthread_mutex_lock(&self->mutex);
2679
2680        /* Increment the other siblings num_waits so we can clean up
2681         * the one we just saw.
2682         */
2683        self->sibling[!sib].num_waits += 1;
2684
2685        /* Signal the thread to clean up*/
2686        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2687                TH_LOG("cond broadcast non-zero");
2688        }
2689        pthread_mutex_unlock(&self->mutex);
2690        PTHREAD_JOIN(self->sibling[sib].tid, &status);
2691        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2692        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2693        while (!kill(self->sibling[sib].system_tid, 0))
2694                nanosleep(&delay, NULL);
2695        /* Switch to the remaining sibling */
2696        sib = !sib;
2697
2698        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2699                      &self->apply_prog);
2700        ASSERT_EQ(0, ret) {
2701                TH_LOG("Expected the remaining sibling to sync");
2702        };
2703
2704        pthread_mutex_lock(&self->mutex);
2705
2706        /* If remaining sibling didn't have a chance to wake up during
2707         * the first broadcast, manually reduce the num_waits now.
2708         */
2709        if (self->sibling[sib].num_waits > 1)
2710                self->sibling[sib].num_waits = 1;
2711        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2712                TH_LOG("cond broadcast non-zero");
2713        }
2714        pthread_mutex_unlock(&self->mutex);
2715        PTHREAD_JOIN(self->sibling[sib].tid, &status);
2716        EXPECT_EQ(0, (long)status);
2717        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2718        while (!kill(self->sibling[sib].system_tid, 0))
2719                nanosleep(&delay, NULL);
2720
2721        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2722                      &self->apply_prog);
2723        ASSERT_EQ(0, ret);  /* just us chickens */
2724}
2725
2726/* Make sure restarted syscalls are seen directly as "restart_syscall". */
2727TEST(syscall_restart)
2728{
2729        long ret;
2730        unsigned long msg;
2731        pid_t child_pid;
2732        int pipefd[2];
2733        int status;
2734        siginfo_t info = { };
2735        struct sock_filter filter[] = {
2736                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2737                         offsetof(struct seccomp_data, nr)),
2738
2739#ifdef __NR_sigreturn
2740                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2741#endif
2742                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2743                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2744                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2745                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2746                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2747
2748                /* Allow __NR_write for easy logging. */
2749                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2750                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2751                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2752                /* The nanosleep jump target. */
2753                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2754                /* The restart_syscall jump target. */
2755                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2756        };
2757        struct sock_fprog prog = {
2758                .len = (unsigned short)ARRAY_SIZE(filter),
2759                .filter = filter,
2760        };
2761#if defined(__arm__)
2762        struct utsname utsbuf;
2763#endif
2764
2765        ASSERT_EQ(0, pipe(pipefd));
2766
2767        child_pid = fork();
2768        ASSERT_LE(0, child_pid);
2769        if (child_pid == 0) {
2770                /* Child uses EXPECT not ASSERT to deliver status correctly. */
2771                char buf = ' ';
2772                struct timespec timeout = { };
2773
2774                /* Attach parent as tracer and stop. */
2775                EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2776                EXPECT_EQ(0, raise(SIGSTOP));
2777
2778                EXPECT_EQ(0, close(pipefd[1]));
2779
2780                EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2781                        TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2782                }
2783
2784                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2785                EXPECT_EQ(0, ret) {
2786                        TH_LOG("Failed to install filter!");
2787                }
2788
2789                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2790                        TH_LOG("Failed to read() sync from parent");
2791                }
2792                EXPECT_EQ('.', buf) {
2793                        TH_LOG("Failed to get sync data from read()");
2794                }
2795
2796                /* Start nanosleep to be interrupted. */
2797                timeout.tv_sec = 1;
2798                errno = 0;
2799                EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2800                        TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2801                }
2802
2803                /* Read final sync from parent. */
2804                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2805                        TH_LOG("Failed final read() from parent");
2806                }
2807                EXPECT_EQ('!', buf) {
2808                        TH_LOG("Failed to get final data from read()");
2809                }
2810
2811                /* Directly report the status of our test harness results. */
2812                syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2813                                                     : EXIT_FAILURE);
2814        }
2815        EXPECT_EQ(0, close(pipefd[0]));
2816
2817        /* Attach to child, setup options, and release. */
2818        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2819        ASSERT_EQ(true, WIFSTOPPED(status));
2820        ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2821                            PTRACE_O_TRACESECCOMP));
2822        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2823        ASSERT_EQ(1, write(pipefd[1], ".", 1));
2824
2825        /* Wait for nanosleep() to start. */
2826        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2827        ASSERT_EQ(true, WIFSTOPPED(status));
2828        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2829        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2830        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2831        ASSERT_EQ(0x100, msg);
2832        EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2833
2834        /* Might as well check siginfo for sanity while we're here. */
2835        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2836        ASSERT_EQ(SIGTRAP, info.si_signo);
2837        ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2838        EXPECT_EQ(0, info.si_errno);
2839        EXPECT_EQ(getuid(), info.si_uid);
2840        /* Verify signal delivery came from child (seccomp-triggered). */
2841        EXPECT_EQ(child_pid, info.si_pid);
2842
2843        /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2844        ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2845        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2846        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2847        ASSERT_EQ(true, WIFSTOPPED(status));
2848        ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2849        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2850        /*
2851         * There is no siginfo on SIGSTOP any more, so we can't verify
2852         * signal delivery came from parent now (getpid() == info.si_pid).
2853         * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2854         * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2855         */
2856        EXPECT_EQ(SIGSTOP, info.si_signo);
2857
2858        /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2859        ASSERT_EQ(0, kill(child_pid, SIGCONT));
2860        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2861        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2862        ASSERT_EQ(true, WIFSTOPPED(status));
2863        ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2864        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2865
2866        /* Wait for restart_syscall() to start. */
2867        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2868        ASSERT_EQ(true, WIFSTOPPED(status));
2869        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2870        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2871        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2872
2873        ASSERT_EQ(0x200, msg);
2874        ret = get_syscall(_metadata, child_pid);
2875#if defined(__arm__)
2876        /*
2877         * FIXME:
2878         * - native ARM registers do NOT expose true syscall.
2879         * - compat ARM registers on ARM64 DO expose true syscall.
2880         */
2881        ASSERT_EQ(0, uname(&utsbuf));
2882        if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2883                EXPECT_EQ(__NR_nanosleep, ret);
2884        } else
2885#endif
2886        {
2887                EXPECT_EQ(__NR_restart_syscall, ret);
2888        }
2889
2890        /* Write again to end test. */
2891        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2892        ASSERT_EQ(1, write(pipefd[1], "!", 1));
2893        EXPECT_EQ(0, close(pipefd[1]));
2894
2895        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2896        if (WIFSIGNALED(status) || WEXITSTATUS(status))
2897                _metadata->passed = 0;
2898}
2899
2900TEST_SIGNAL(filter_flag_log, SIGSYS)
2901{
2902        struct sock_filter allow_filter[] = {
2903                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2904        };
2905        struct sock_filter kill_filter[] = {
2906                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2907                        offsetof(struct seccomp_data, nr)),
2908                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2909                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2910                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2911        };
2912        struct sock_fprog allow_prog = {
2913                .len = (unsigned short)ARRAY_SIZE(allow_filter),
2914                .filter = allow_filter,
2915        };
2916        struct sock_fprog kill_prog = {
2917                .len = (unsigned short)ARRAY_SIZE(kill_filter),
2918                .filter = kill_filter,
2919        };
2920        long ret;
2921        pid_t parent = getppid();
2922
2923        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2924        ASSERT_EQ(0, ret);
2925
2926        /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2927        ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2928                      &allow_prog);
2929        ASSERT_NE(ENOSYS, errno) {
2930                TH_LOG("Kernel does not support seccomp syscall!");
2931        }
2932        EXPECT_NE(0, ret) {
2933                TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2934        }
2935        EXPECT_EQ(EINVAL, errno) {
2936                TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2937        }
2938
2939        /* Verify that a simple, permissive filter can be added with no flags */
2940        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2941        EXPECT_EQ(0, ret);
2942
2943        /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2944        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2945                      &allow_prog);
2946        ASSERT_NE(EINVAL, errno) {
2947                TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2948        }
2949        EXPECT_EQ(0, ret);
2950
2951        /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2952        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2953                      &kill_prog);
2954        EXPECT_EQ(0, ret);
2955
2956        EXPECT_EQ(parent, syscall(__NR_getppid));
2957        /* getpid() should never return. */
2958        EXPECT_EQ(0, syscall(__NR_getpid));
2959}
2960
2961TEST(get_action_avail)
2962{
2963        __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2964                            SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2965                            SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2966        __u32 unknown_action = 0x10000000U;
2967        int i;
2968        long ret;
2969
2970        ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2971        ASSERT_NE(ENOSYS, errno) {
2972                TH_LOG("Kernel does not support seccomp syscall!");
2973        }
2974        ASSERT_NE(EINVAL, errno) {
2975                TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2976        }
2977        EXPECT_EQ(ret, 0);
2978
2979        for (i = 0; i < ARRAY_SIZE(actions); i++) {
2980                ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2981                EXPECT_EQ(ret, 0) {
2982                        TH_LOG("Expected action (0x%X) not available!",
2983                               actions[i]);
2984                }
2985        }
2986
2987        /* Check that an unknown action is handled properly (EOPNOTSUPP) */
2988        ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2989        EXPECT_EQ(ret, -1);
2990        EXPECT_EQ(errno, EOPNOTSUPP);
2991}
2992
2993TEST(get_metadata)
2994{
2995        pid_t pid;
2996        int pipefd[2];
2997        char buf;
2998        struct seccomp_metadata md;
2999        long ret;
3000
3001        /* Only real root can get metadata. */
3002        if (geteuid()) {
3003                XFAIL(return, "get_metadata requires real root");
3004                return;
3005        }
3006
3007        ASSERT_EQ(0, pipe(pipefd));
3008
3009        pid = fork();
3010        ASSERT_GE(pid, 0);
3011        if (pid == 0) {
3012                struct sock_filter filter[] = {
3013                        BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3014                };
3015                struct sock_fprog prog = {
3016                        .len = (unsigned short)ARRAY_SIZE(filter),
3017                        .filter = filter,
3018                };
3019
3020                /* one with log, one without */
3021                EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3022                                     SECCOMP_FILTER_FLAG_LOG, &prog));
3023                EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3024
3025                EXPECT_EQ(0, close(pipefd[0]));
3026                ASSERT_EQ(1, write(pipefd[1], "1", 1));
3027                ASSERT_EQ(0, close(pipefd[1]));
3028
3029                while (1)
3030                        sleep(100);
3031        }
3032
3033        ASSERT_EQ(0, close(pipefd[1]));
3034        ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3035
3036        ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3037        ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3038
3039        /* Past here must not use ASSERT or child process is never killed. */
3040
3041        md.filter_off = 0;
3042        errno = 0;
3043        ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3044        EXPECT_EQ(sizeof(md), ret) {
3045                if (errno == EINVAL)
3046                        XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3047        }
3048
3049        EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3050        EXPECT_EQ(md.filter_off, 0);
3051
3052        md.filter_off = 1;
3053        ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3054        EXPECT_EQ(sizeof(md), ret);
3055        EXPECT_EQ(md.flags, 0);
3056        EXPECT_EQ(md.filter_off, 1);
3057
3058skip:
3059        ASSERT_EQ(0, kill(pid, SIGKILL));
3060}
3061
3062static int user_trap_syscall(int nr, unsigned int flags)
3063{
3064        struct sock_filter filter[] = {
3065                BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3066                        offsetof(struct seccomp_data, nr)),
3067                BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3068                BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3069                BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3070        };
3071
3072        struct sock_fprog prog = {
3073                .len = (unsigned short)ARRAY_SIZE(filter),
3074                .filter = filter,
3075        };
3076
3077        return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3078}
3079
3080#define USER_NOTIF_MAGIC 116983961184613L
3081TEST(user_notification_basic)
3082{
3083        pid_t pid;
3084        long ret;
3085        int status, listener;
3086        struct seccomp_notif req = {};
3087        struct seccomp_notif_resp resp = {};
3088        struct pollfd pollfd;
3089
3090        struct sock_filter filter[] = {
3091                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3092        };
3093        struct sock_fprog prog = {
3094                .len = (unsigned short)ARRAY_SIZE(filter),
3095                .filter = filter,
3096        };
3097
3098        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3099        ASSERT_EQ(0, ret) {
3100                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3101        }
3102
3103        pid = fork();
3104        ASSERT_GE(pid, 0);
3105
3106        /* Check that we get -ENOSYS with no listener attached */
3107        if (pid == 0) {
3108                if (user_trap_syscall(__NR_getppid, 0) < 0)
3109                        exit(1);
3110                ret = syscall(__NR_getppid);
3111                exit(ret >= 0 || errno != ENOSYS);
3112        }
3113
3114        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3115        EXPECT_EQ(true, WIFEXITED(status));
3116        EXPECT_EQ(0, WEXITSTATUS(status));
3117
3118        /* Add some no-op filters for grins. */
3119        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3120        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3121        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3122        EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3123
3124        /* Check that the basic notification machinery works */
3125        listener = user_trap_syscall(__NR_getppid,
3126                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3127        ASSERT_GE(listener, 0);
3128
3129        /* Installing a second listener in the chain should EBUSY */
3130        EXPECT_EQ(user_trap_syscall(__NR_getppid,
3131                                    SECCOMP_FILTER_FLAG_NEW_LISTENER),
3132                  -1);
3133        EXPECT_EQ(errno, EBUSY);
3134
3135        pid = fork();
3136        ASSERT_GE(pid, 0);
3137
3138        if (pid == 0) {
3139                ret = syscall(__NR_getppid);
3140                exit(ret != USER_NOTIF_MAGIC);
3141        }
3142
3143        pollfd.fd = listener;
3144        pollfd.events = POLLIN | POLLOUT;
3145
3146        EXPECT_GT(poll(&pollfd, 1, -1), 0);
3147        EXPECT_EQ(pollfd.revents, POLLIN);
3148
3149        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3150
3151        pollfd.fd = listener;
3152        pollfd.events = POLLIN | POLLOUT;
3153
3154        EXPECT_GT(poll(&pollfd, 1, -1), 0);
3155        EXPECT_EQ(pollfd.revents, POLLOUT);
3156
3157        EXPECT_EQ(req.data.nr,  __NR_getppid);
3158
3159        resp.id = req.id;
3160        resp.error = 0;
3161        resp.val = USER_NOTIF_MAGIC;
3162
3163        /* check that we make sure flags == 0 */
3164        resp.flags = 1;
3165        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3166        EXPECT_EQ(errno, EINVAL);
3167
3168        resp.flags = 0;
3169        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3170
3171        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3172        EXPECT_EQ(true, WIFEXITED(status));
3173        EXPECT_EQ(0, WEXITSTATUS(status));
3174}
3175
3176TEST(user_notification_kill_in_middle)
3177{
3178        pid_t pid;
3179        long ret;
3180        int listener;
3181        struct seccomp_notif req = {};
3182        struct seccomp_notif_resp resp = {};
3183
3184        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3185        ASSERT_EQ(0, ret) {
3186                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3187        }
3188
3189        listener = user_trap_syscall(__NR_getppid,
3190                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3191        ASSERT_GE(listener, 0);
3192
3193        /*
3194         * Check that nothing bad happens when we kill the task in the middle
3195         * of a syscall.
3196         */
3197        pid = fork();
3198        ASSERT_GE(pid, 0);
3199
3200        if (pid == 0) {
3201                ret = syscall(__NR_getppid);
3202                exit(ret != USER_NOTIF_MAGIC);
3203        }
3204
3205        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3206        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3207
3208        EXPECT_EQ(kill(pid, SIGKILL), 0);
3209        EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3210
3211        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3212
3213        resp.id = req.id;
3214        ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3215        EXPECT_EQ(ret, -1);
3216        EXPECT_EQ(errno, ENOENT);
3217}
3218
3219static int handled = -1;
3220
3221static void signal_handler(int signal)
3222{
3223        if (write(handled, "c", 1) != 1)
3224                perror("write from signal");
3225}
3226
3227TEST(user_notification_signal)
3228{
3229        pid_t pid;
3230        long ret;
3231        int status, listener, sk_pair[2];
3232        struct seccomp_notif req = {};
3233        struct seccomp_notif_resp resp = {};
3234        char c;
3235
3236        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3237        ASSERT_EQ(0, ret) {
3238                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3239        }
3240
3241        ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3242
3243        listener = user_trap_syscall(__NR_gettid,
3244                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3245        ASSERT_GE(listener, 0);
3246
3247        pid = fork();
3248        ASSERT_GE(pid, 0);
3249
3250        if (pid == 0) {
3251                close(sk_pair[0]);
3252                handled = sk_pair[1];
3253                if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3254                        perror("signal");
3255                        exit(1);
3256                }
3257                /*
3258                 * ERESTARTSYS behavior is a bit hard to test, because we need
3259                 * to rely on a signal that has not yet been handled. Let's at
3260                 * least check that the error code gets propagated through, and
3261                 * hope that it doesn't break when there is actually a signal :)
3262                 */
3263                ret = syscall(__NR_gettid);
3264                exit(!(ret == -1 && errno == 512));
3265        }
3266
3267        close(sk_pair[1]);
3268
3269        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3270
3271        EXPECT_EQ(kill(pid, SIGUSR1), 0);
3272
3273        /*
3274         * Make sure the signal really is delivered, which means we're not
3275         * stuck in the user notification code any more and the notification
3276         * should be dead.
3277         */
3278        EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3279
3280        resp.id = req.id;
3281        resp.error = -EPERM;
3282        resp.val = 0;
3283
3284        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3285        EXPECT_EQ(errno, ENOENT);
3286
3287        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3288
3289        resp.id = req.id;
3290        resp.error = -512; /* -ERESTARTSYS */
3291        resp.val = 0;
3292
3293        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3294
3295        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3296        EXPECT_EQ(true, WIFEXITED(status));
3297        EXPECT_EQ(0, WEXITSTATUS(status));
3298}
3299
3300TEST(user_notification_closed_listener)
3301{
3302        pid_t pid;
3303        long ret;
3304        int status, listener;
3305
3306        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3307        ASSERT_EQ(0, ret) {
3308                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3309        }
3310
3311        listener = user_trap_syscall(__NR_getppid,
3312                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3313        ASSERT_GE(listener, 0);
3314
3315        /*
3316         * Check that we get an ENOSYS when the listener is closed.
3317         */
3318        pid = fork();
3319        ASSERT_GE(pid, 0);
3320        if (pid == 0) {
3321                close(listener);
3322                ret = syscall(__NR_getppid);
3323                exit(ret != -1 && errno != ENOSYS);
3324        }
3325
3326        close(listener);
3327
3328        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3329        EXPECT_EQ(true, WIFEXITED(status));
3330        EXPECT_EQ(0, WEXITSTATUS(status));
3331}
3332
3333/*
3334 * Check that a pid in a child namespace still shows up as valid in ours.
3335 */
3336TEST(user_notification_child_pid_ns)
3337{
3338        pid_t pid;
3339        int status, listener;
3340        struct seccomp_notif req = {};
3341        struct seccomp_notif_resp resp = {};
3342
3343        ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3344
3345        listener = user_trap_syscall(__NR_getppid,
3346                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3347        ASSERT_GE(listener, 0);
3348
3349        pid = fork();
3350        ASSERT_GE(pid, 0);
3351
3352        if (pid == 0)
3353                exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3354
3355        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3356        EXPECT_EQ(req.pid, pid);
3357
3358        resp.id = req.id;
3359        resp.error = 0;
3360        resp.val = USER_NOTIF_MAGIC;
3361
3362        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3363
3364        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3365        EXPECT_EQ(true, WIFEXITED(status));
3366        EXPECT_EQ(0, WEXITSTATUS(status));
3367        close(listener);
3368}
3369
3370/*
3371 * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3372 * invalid.
3373 */
3374TEST(user_notification_sibling_pid_ns)
3375{
3376        pid_t pid, pid2;
3377        int status, listener;
3378        struct seccomp_notif req = {};
3379        struct seccomp_notif_resp resp = {};
3380
3381        ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3382                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3383        }
3384
3385        listener = user_trap_syscall(__NR_getppid,
3386                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3387        ASSERT_GE(listener, 0);
3388
3389        pid = fork();
3390        ASSERT_GE(pid, 0);
3391
3392        if (pid == 0) {
3393                ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3394
3395                pid2 = fork();
3396                ASSERT_GE(pid2, 0);
3397
3398                if (pid2 == 0)
3399                        exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3400
3401                EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3402                EXPECT_EQ(true, WIFEXITED(status));
3403                EXPECT_EQ(0, WEXITSTATUS(status));
3404                exit(WEXITSTATUS(status));
3405        }
3406
3407        /* Create the sibling ns, and sibling in it. */
3408        ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3409        ASSERT_EQ(errno, 0);
3410
3411        pid2 = fork();
3412        ASSERT_GE(pid2, 0);
3413
3414        if (pid2 == 0) {
3415                ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3416                /*
3417                 * The pid should be 0, i.e. the task is in some namespace that
3418                 * we can't "see".
3419                 */
3420                EXPECT_EQ(req.pid, 0);
3421
3422                resp.id = req.id;
3423                resp.error = 0;
3424                resp.val = USER_NOTIF_MAGIC;
3425
3426                ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3427                exit(0);
3428        }
3429
3430        close(listener);
3431
3432        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3433        EXPECT_EQ(true, WIFEXITED(status));
3434        EXPECT_EQ(0, WEXITSTATUS(status));
3435
3436        EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3437        EXPECT_EQ(true, WIFEXITED(status));
3438        EXPECT_EQ(0, WEXITSTATUS(status));
3439}
3440
3441TEST(user_notification_fault_recv)
3442{
3443        pid_t pid;
3444        int status, listener;
3445        struct seccomp_notif req = {};
3446        struct seccomp_notif_resp resp = {};
3447
3448        ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3449
3450        listener = user_trap_syscall(__NR_getppid,
3451                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3452        ASSERT_GE(listener, 0);
3453
3454        pid = fork();
3455        ASSERT_GE(pid, 0);
3456
3457        if (pid == 0)
3458                exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3459
3460        /* Do a bad recv() */
3461        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3462        EXPECT_EQ(errno, EFAULT);
3463
3464        /* We should still be able to receive this notification, though. */
3465        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3466        EXPECT_EQ(req.pid, pid);
3467
3468        resp.id = req.id;
3469        resp.error = 0;
3470        resp.val = USER_NOTIF_MAGIC;
3471
3472        EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3473
3474        EXPECT_EQ(waitpid(pid, &status, 0), pid);
3475        EXPECT_EQ(true, WIFEXITED(status));
3476        EXPECT_EQ(0, WEXITSTATUS(status));
3477}
3478
3479TEST(seccomp_get_notif_sizes)
3480{
3481        struct seccomp_notif_sizes sizes;
3482
3483        ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3484        EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3485        EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3486}
3487
3488/*
3489 * TODO:
3490 * - add microbenchmarks
3491 * - expand NNP testing
3492 * - better arch-specific TRACE and TRAP handlers.
3493 * - endianness checking when appropriate
3494 * - 64-bit arg prodding
3495 * - arch value testing (x86 modes especially)
3496 * - verify that FILTER_FLAG_LOG filters generate log messages
3497 * - verify that RET_LOG generates log messages
3498 * - ...
3499 */
3500
3501TEST_HARNESS_MAIN
3502