linux/tools/testing/selftests/seccomp/seccomp_bpf.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   3 * Use of this source code is governed by the GPLv2 license.
   4 *
   5 * Test code for seccomp bpf.
   6 */
   7
   8#include <asm/siginfo.h>
   9#define __have_siginfo_t 1
  10#define __have_sigval_t 1
  11#define __have_sigevent_t 1
  12
  13#include <errno.h>
  14#include <linux/filter.h>
  15#include <sys/prctl.h>
  16#include <sys/ptrace.h>
  17#include <sys/types.h>
  18#include <sys/user.h>
  19#include <linux/prctl.h>
  20#include <linux/ptrace.h>
  21#include <linux/seccomp.h>
  22#include <pthread.h>
  23#include <semaphore.h>
  24#include <signal.h>
  25#include <stddef.h>
  26#include <stdbool.h>
  27#include <string.h>
  28#include <time.h>
  29#include <linux/elf.h>
  30#include <sys/uio.h>
  31#include <sys/utsname.h>
  32#include <sys/fcntl.h>
  33#include <sys/mman.h>
  34#include <sys/times.h>
  35
  36#define _GNU_SOURCE
  37#include <unistd.h>
  38#include <sys/syscall.h>
  39
  40#include "test_harness.h"
  41
  42#ifndef PR_SET_PTRACER
  43# define PR_SET_PTRACER 0x59616d61
  44#endif
  45
  46#ifndef PR_SET_NO_NEW_PRIVS
  47#define PR_SET_NO_NEW_PRIVS 38
  48#define PR_GET_NO_NEW_PRIVS 39
  49#endif
  50
  51#ifndef PR_SECCOMP_EXT
  52#define PR_SECCOMP_EXT 43
  53#endif
  54
  55#ifndef SECCOMP_EXT_ACT
  56#define SECCOMP_EXT_ACT 1
  57#endif
  58
  59#ifndef SECCOMP_EXT_ACT_TSYNC
  60#define SECCOMP_EXT_ACT_TSYNC 1
  61#endif
  62
  63#ifndef SECCOMP_MODE_STRICT
  64#define SECCOMP_MODE_STRICT 1
  65#endif
  66
  67#ifndef SECCOMP_MODE_FILTER
  68#define SECCOMP_MODE_FILTER 2
  69#endif
  70
  71#ifndef SECCOMP_RET_KILL
  72#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
  73#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
  74#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
  75#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
  76#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
  77
  78/* Masks for the return value sections. */
  79#define SECCOMP_RET_ACTION      0x7fff0000U
  80#define SECCOMP_RET_DATA        0x0000ffffU
  81
  82struct seccomp_data {
  83        int nr;
  84        __u32 arch;
  85        __u64 instruction_pointer;
  86        __u64 args[6];
  87};
  88#endif
  89
  90#if __BYTE_ORDER == __LITTLE_ENDIAN
  91#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
  92#elif __BYTE_ORDER == __BIG_ENDIAN
  93#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
  94#else
  95#error "wut? Unknown __BYTE_ORDER?!"
  96#endif
  97
  98#define SIBLING_EXIT_UNKILLED   0xbadbeef
  99#define SIBLING_EXIT_FAILURE    0xbadface
 100#define SIBLING_EXIT_NEWPRIVS   0xbadfeed
 101
 102TEST(mode_strict_support)
 103{
 104        long ret;
 105
 106        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 107        ASSERT_EQ(0, ret) {
 108                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 109        }
 110        syscall(__NR_exit, 1);
 111}
 112
 113TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
 114{
 115        long ret;
 116
 117        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 118        ASSERT_EQ(0, ret) {
 119                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 120        }
 121        syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
 122                NULL, NULL, NULL);
 123        EXPECT_FALSE(true) {
 124                TH_LOG("Unreachable!");
 125        }
 126}
 127
 128/* Note! This doesn't test no new privs behavior */
 129TEST(no_new_privs_support)
 130{
 131        long ret;
 132
 133        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 134        EXPECT_EQ(0, ret) {
 135                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 136        }
 137}
 138
 139/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
 140TEST(mode_filter_support)
 141{
 142        long ret;
 143
 144        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
 145        ASSERT_EQ(0, ret) {
 146                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 147        }
 148        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
 149        EXPECT_EQ(-1, ret);
 150        EXPECT_EQ(EFAULT, errno) {
 151                TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
 152        }
 153}
 154
 155TEST(mode_filter_without_nnp)
 156{
 157        struct sock_filter filter[] = {
 158                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 159        };
 160        struct sock_fprog prog = {
 161                .len = (unsigned short)ARRAY_SIZE(filter),
 162                .filter = filter,
 163        };
 164        long ret;
 165
 166        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
 167        ASSERT_LE(0, ret) {
 168                TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
 169        }
 170        errno = 0;
 171        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 172        /* Succeeds with CAP_SYS_ADMIN, fails without */
 173        /* TODO(wad) check caps not euid */
 174        if (geteuid()) {
 175                EXPECT_EQ(-1, ret);
 176                EXPECT_EQ(EACCES, errno);
 177        } else {
 178                EXPECT_EQ(0, ret);
 179        }
 180}
 181
 182#define MAX_INSNS_PER_PATH 32768
 183
 184TEST(filter_size_limits)
 185{
 186        int i;
 187        int count = BPF_MAXINSNS + 1;
 188        struct sock_filter allow[] = {
 189                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 190        };
 191        struct sock_filter *filter;
 192        struct sock_fprog prog = { };
 193        long ret;
 194
 195        filter = calloc(count, sizeof(*filter));
 196        ASSERT_NE(NULL, filter);
 197
 198        for (i = 0; i < count; i++)
 199                filter[i] = allow[0];
 200
 201        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 202        ASSERT_EQ(0, ret);
 203
 204        prog.filter = filter;
 205        prog.len = count;
 206
 207        /* Too many filter instructions in a single filter. */
 208        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 209        ASSERT_NE(0, ret) {
 210                TH_LOG("Installing %d insn filter was allowed", prog.len);
 211        }
 212
 213        /* One less is okay, though. */
 214        prog.len -= 1;
 215        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 216        ASSERT_EQ(0, ret) {
 217                TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
 218        }
 219}
 220
 221TEST(filter_chain_limits)
 222{
 223        int i;
 224        int count = BPF_MAXINSNS;
 225        struct sock_filter allow[] = {
 226                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 227        };
 228        struct sock_filter *filter;
 229        struct sock_fprog prog = { };
 230        long ret;
 231
 232        filter = calloc(count, sizeof(*filter));
 233        ASSERT_NE(NULL, filter);
 234
 235        for (i = 0; i < count; i++)
 236                filter[i] = allow[0];
 237
 238        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 239        ASSERT_EQ(0, ret);
 240
 241        prog.filter = filter;
 242        prog.len = 1;
 243
 244        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 245        ASSERT_EQ(0, ret);
 246
 247        prog.len = count;
 248
 249        /* Too many total filter instructions. */
 250        for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
 251                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 252                if (ret != 0)
 253                        break;
 254        }
 255        ASSERT_NE(0, ret) {
 256                TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
 257                       i, count, i * (count + 4));
 258        }
 259}
 260
 261TEST(mode_filter_cannot_move_to_strict)
 262{
 263        struct sock_filter filter[] = {
 264                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 265        };
 266        struct sock_fprog prog = {
 267                .len = (unsigned short)ARRAY_SIZE(filter),
 268                .filter = filter,
 269        };
 270        long ret;
 271
 272        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 273        ASSERT_EQ(0, ret);
 274
 275        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 276        ASSERT_EQ(0, ret);
 277
 278        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
 279        EXPECT_EQ(-1, ret);
 280        EXPECT_EQ(EINVAL, errno);
 281}
 282
 283
 284TEST(mode_filter_get_seccomp)
 285{
 286        struct sock_filter filter[] = {
 287                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 288        };
 289        struct sock_fprog prog = {
 290                .len = (unsigned short)ARRAY_SIZE(filter),
 291                .filter = filter,
 292        };
 293        long ret;
 294
 295        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 296        ASSERT_EQ(0, ret);
 297
 298        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 299        EXPECT_EQ(0, ret);
 300
 301        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 302        ASSERT_EQ(0, ret);
 303
 304        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 305        EXPECT_EQ(2, ret);
 306}
 307
 308
 309TEST(ALLOW_all)
 310{
 311        struct sock_filter filter[] = {
 312                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 313        };
 314        struct sock_fprog prog = {
 315                .len = (unsigned short)ARRAY_SIZE(filter),
 316                .filter = filter,
 317        };
 318        long ret;
 319
 320        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 321        ASSERT_EQ(0, ret);
 322
 323        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 324        ASSERT_EQ(0, ret);
 325}
 326
 327TEST(empty_prog)
 328{
 329        struct sock_filter filter[] = {
 330        };
 331        struct sock_fprog prog = {
 332                .len = (unsigned short)ARRAY_SIZE(filter),
 333                .filter = filter,
 334        };
 335        long ret;
 336
 337        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 338        ASSERT_EQ(0, ret);
 339
 340        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 341        EXPECT_EQ(-1, ret);
 342        EXPECT_EQ(EINVAL, errno);
 343}
 344
 345TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 346{
 347        struct sock_filter filter[] = {
 348                BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
 349        };
 350        struct sock_fprog prog = {
 351                .len = (unsigned short)ARRAY_SIZE(filter),
 352                .filter = filter,
 353        };
 354        long ret;
 355
 356        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 357        ASSERT_EQ(0, ret);
 358
 359        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 360        ASSERT_EQ(0, ret);
 361        EXPECT_EQ(0, syscall(__NR_getpid)) {
 362                TH_LOG("getpid() shouldn't ever return");
 363        }
 364}
 365
 366/* return code >= 0x80000000 is unused. */
 367TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
 368{
 369        struct sock_filter filter[] = {
 370                BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
 371        };
 372        struct sock_fprog prog = {
 373                .len = (unsigned short)ARRAY_SIZE(filter),
 374                .filter = filter,
 375        };
 376        long ret;
 377
 378        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 379        ASSERT_EQ(0, ret);
 380
 381        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 382        ASSERT_EQ(0, ret);
 383        EXPECT_EQ(0, syscall(__NR_getpid)) {
 384                TH_LOG("getpid() shouldn't ever return");
 385        }
 386}
 387
 388TEST_SIGNAL(KILL_all, SIGSYS)
 389{
 390        struct sock_filter filter[] = {
 391                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 392        };
 393        struct sock_fprog prog = {
 394                .len = (unsigned short)ARRAY_SIZE(filter),
 395                .filter = filter,
 396        };
 397        long ret;
 398
 399        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 400        ASSERT_EQ(0, ret);
 401
 402        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 403        ASSERT_EQ(0, ret);
 404}
 405
 406TEST_SIGNAL(KILL_one, SIGSYS)
 407{
 408        struct sock_filter filter[] = {
 409                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 410                        offsetof(struct seccomp_data, nr)),
 411                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 412                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 413                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 414        };
 415        struct sock_fprog prog = {
 416                .len = (unsigned short)ARRAY_SIZE(filter),
 417                .filter = filter,
 418        };
 419        long ret;
 420        pid_t parent = getppid();
 421
 422        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 423        ASSERT_EQ(0, ret);
 424
 425        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 426        ASSERT_EQ(0, ret);
 427
 428        EXPECT_EQ(parent, syscall(__NR_getppid));
 429        /* getpid() should never return. */
 430        EXPECT_EQ(0, syscall(__NR_getpid));
 431}
 432
 433TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
 434{
 435        void *fatal_address;
 436        struct sock_filter filter[] = {
 437                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 438                        offsetof(struct seccomp_data, nr)),
 439                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
 440                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 441                /* Only both with lower 32-bit for now. */
 442                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
 443                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
 444                        (unsigned long)&fatal_address, 0, 1),
 445                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 446                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 447        };
 448        struct sock_fprog prog = {
 449                .len = (unsigned short)ARRAY_SIZE(filter),
 450                .filter = filter,
 451        };
 452        long ret;
 453        pid_t parent = getppid();
 454        struct tms timebuf;
 455        clock_t clock = times(&timebuf);
 456
 457        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 458        ASSERT_EQ(0, ret);
 459
 460        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 461        ASSERT_EQ(0, ret);
 462
 463        EXPECT_EQ(parent, syscall(__NR_getppid));
 464        EXPECT_LE(clock, syscall(__NR_times, &timebuf));
 465        /* times() should never return. */
 466        EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
 467}
 468
 469TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 470{
 471#ifndef __NR_mmap2
 472        int sysno = __NR_mmap;
 473#else
 474        int sysno = __NR_mmap2;
 475#endif
 476        struct sock_filter filter[] = {
 477                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 478                        offsetof(struct seccomp_data, nr)),
 479                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
 480                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 481                /* Only both with lower 32-bit for now. */
 482                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
 483                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
 484                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 485                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 486        };
 487        struct sock_fprog prog = {
 488                .len = (unsigned short)ARRAY_SIZE(filter),
 489                .filter = filter,
 490        };
 491        long ret;
 492        pid_t parent = getppid();
 493        int fd;
 494        void *map1, *map2;
 495        int page_size = sysconf(_SC_PAGESIZE);
 496
 497        ASSERT_LT(0, page_size);
 498
 499        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 500        ASSERT_EQ(0, ret);
 501
 502        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 503        ASSERT_EQ(0, ret);
 504
 505        fd = open("/dev/zero", O_RDONLY);
 506        ASSERT_NE(-1, fd);
 507
 508        EXPECT_EQ(parent, syscall(__NR_getppid));
 509        map1 = (void *)syscall(sysno,
 510                NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 511        EXPECT_NE(MAP_FAILED, map1);
 512        /* mmap2() should never return. */
 513        map2 = (void *)syscall(sysno,
 514                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 515        EXPECT_EQ(MAP_FAILED, map2);
 516
 517        /* The test failed, so clean up the resources. */
 518        munmap(map1, page_size);
 519        munmap(map2, page_size);
 520        close(fd);
 521}
 522
 523/* TODO(wad) add 64-bit versus 32-bit arg tests. */
 524TEST(arg_out_of_range)
 525{
 526        struct sock_filter filter[] = {
 527                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
 528                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 529        };
 530        struct sock_fprog prog = {
 531                .len = (unsigned short)ARRAY_SIZE(filter),
 532                .filter = filter,
 533        };
 534        long ret;
 535
 536        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 537        ASSERT_EQ(0, ret);
 538
 539        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 540        EXPECT_EQ(-1, ret);
 541        EXPECT_EQ(EINVAL, errno);
 542}
 543
 544TEST(ERRNO_valid)
 545{
 546        struct sock_filter filter[] = {
 547                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 548                        offsetof(struct seccomp_data, nr)),
 549                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
 550                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
 551                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 552        };
 553        struct sock_fprog prog = {
 554                .len = (unsigned short)ARRAY_SIZE(filter),
 555                .filter = filter,
 556        };
 557        long ret;
 558        pid_t parent = getppid();
 559
 560        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 561        ASSERT_EQ(0, ret);
 562
 563        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 564        ASSERT_EQ(0, ret);
 565
 566        EXPECT_EQ(parent, syscall(__NR_getppid));
 567        EXPECT_EQ(-1, read(0, NULL, 0));
 568        EXPECT_EQ(E2BIG, errno);
 569}
 570
 571TEST(ERRNO_zero)
 572{
 573        struct sock_filter filter[] = {
 574                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 575                        offsetof(struct seccomp_data, nr)),
 576                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
 577                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
 578                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 579        };
 580        struct sock_fprog prog = {
 581                .len = (unsigned short)ARRAY_SIZE(filter),
 582                .filter = filter,
 583        };
 584        long ret;
 585        pid_t parent = getppid();
 586
 587        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 588        ASSERT_EQ(0, ret);
 589
 590        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 591        ASSERT_EQ(0, ret);
 592
 593        EXPECT_EQ(parent, syscall(__NR_getppid));
 594        /* "errno" of 0 is ok. */
 595        EXPECT_EQ(0, read(0, NULL, 0));
 596}
 597
 598TEST(ERRNO_capped)
 599{
 600        struct sock_filter filter[] = {
 601                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 602                        offsetof(struct seccomp_data, nr)),
 603                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
 604                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
 605                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 606        };
 607        struct sock_fprog prog = {
 608                .len = (unsigned short)ARRAY_SIZE(filter),
 609                .filter = filter,
 610        };
 611        long ret;
 612        pid_t parent = getppid();
 613
 614        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 615        ASSERT_EQ(0, ret);
 616
 617        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 618        ASSERT_EQ(0, ret);
 619
 620        EXPECT_EQ(parent, syscall(__NR_getppid));
 621        EXPECT_EQ(-1, read(0, NULL, 0));
 622        EXPECT_EQ(4095, errno);
 623}
 624
 625FIXTURE_DATA(TRAP) {
 626        struct sock_fprog prog;
 627};
 628
 629FIXTURE_SETUP(TRAP)
 630{
 631        struct sock_filter filter[] = {
 632                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 633                        offsetof(struct seccomp_data, nr)),
 634                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 635                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 636                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 637        };
 638
 639        memset(&self->prog, 0, sizeof(self->prog));
 640        self->prog.filter = malloc(sizeof(filter));
 641        ASSERT_NE(NULL, self->prog.filter);
 642        memcpy(self->prog.filter, filter, sizeof(filter));
 643        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
 644}
 645
 646FIXTURE_TEARDOWN(TRAP)
 647{
 648        if (self->prog.filter)
 649                free(self->prog.filter);
 650}
 651
 652TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
 653{
 654        long ret;
 655
 656        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 657        ASSERT_EQ(0, ret);
 658
 659        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 660        ASSERT_EQ(0, ret);
 661        syscall(__NR_getpid);
 662}
 663
 664/* Ensure that SIGSYS overrides SIG_IGN */
 665TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 666{
 667        long ret;
 668
 669        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 670        ASSERT_EQ(0, ret);
 671
 672        signal(SIGSYS, SIG_IGN);
 673
 674        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 675        ASSERT_EQ(0, ret);
 676        syscall(__NR_getpid);
 677}
 678
 679static struct siginfo TRAP_info;
 680static volatile int TRAP_nr;
 681static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 682{
 683        memcpy(&TRAP_info, info, sizeof(TRAP_info));
 684        TRAP_nr = nr;
 685}
 686
 687TEST_F(TRAP, handler)
 688{
 689        int ret, test;
 690        struct sigaction act;
 691        sigset_t mask;
 692
 693        memset(&act, 0, sizeof(act));
 694        sigemptyset(&mask);
 695        sigaddset(&mask, SIGSYS);
 696
 697        act.sa_sigaction = &TRAP_action;
 698        act.sa_flags = SA_SIGINFO;
 699        ret = sigaction(SIGSYS, &act, NULL);
 700        ASSERT_EQ(0, ret) {
 701                TH_LOG("sigaction failed");
 702        }
 703        ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
 704        ASSERT_EQ(0, ret) {
 705                TH_LOG("sigprocmask failed");
 706        }
 707
 708        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 709        ASSERT_EQ(0, ret);
 710        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 711        ASSERT_EQ(0, ret);
 712        TRAP_nr = 0;
 713        memset(&TRAP_info, 0, sizeof(TRAP_info));
 714        /* Expect the registers to be rolled back. (nr = error) may vary
 715         * based on arch. */
 716        ret = syscall(__NR_getpid);
 717        /* Silence gcc warning about volatile. */
 718        test = TRAP_nr;
 719        EXPECT_EQ(SIGSYS, test);
 720        struct local_sigsys {
 721                void *_call_addr;       /* calling user insn */
 722                int _syscall;           /* triggering system call number */
 723                unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
 724        } *sigsys = (struct local_sigsys *)
 725#ifdef si_syscall
 726                &(TRAP_info.si_call_addr);
 727#else
 728                &TRAP_info.si_pid;
 729#endif
 730        EXPECT_EQ(__NR_getpid, sigsys->_syscall);
 731        /* Make sure arch is non-zero. */
 732        EXPECT_NE(0, sigsys->_arch);
 733        EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
 734}
 735
 736FIXTURE_DATA(precedence) {
 737        struct sock_fprog allow;
 738        struct sock_fprog trace;
 739        struct sock_fprog error;
 740        struct sock_fprog trap;
 741        struct sock_fprog kill;
 742};
 743
 744FIXTURE_SETUP(precedence)
 745{
 746        struct sock_filter allow_insns[] = {
 747                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 748        };
 749        struct sock_filter trace_insns[] = {
 750                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 751                        offsetof(struct seccomp_data, nr)),
 752                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 753                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 754                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
 755        };
 756        struct sock_filter error_insns[] = {
 757                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 758                        offsetof(struct seccomp_data, nr)),
 759                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 760                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 761                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
 762        };
 763        struct sock_filter trap_insns[] = {
 764                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 765                        offsetof(struct seccomp_data, nr)),
 766                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 767                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 768                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 769        };
 770        struct sock_filter kill_insns[] = {
 771                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 772                        offsetof(struct seccomp_data, nr)),
 773                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 774                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 775                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 776        };
 777
 778        memset(self, 0, sizeof(*self));
 779#define FILTER_ALLOC(_x) \
 780        self->_x.filter = malloc(sizeof(_x##_insns)); \
 781        ASSERT_NE(NULL, self->_x.filter); \
 782        memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 783        self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 784        FILTER_ALLOC(allow);
 785        FILTER_ALLOC(trace);
 786        FILTER_ALLOC(error);
 787        FILTER_ALLOC(trap);
 788        FILTER_ALLOC(kill);
 789}
 790
 791FIXTURE_TEARDOWN(precedence)
 792{
 793#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 794        FILTER_FREE(allow);
 795        FILTER_FREE(trace);
 796        FILTER_FREE(error);
 797        FILTER_FREE(trap);
 798        FILTER_FREE(kill);
 799}
 800
 801TEST_F(precedence, allow_ok)
 802{
 803        pid_t parent, res = 0;
 804        long ret;
 805
 806        parent = getppid();
 807        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 808        ASSERT_EQ(0, ret);
 809
 810        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 811        ASSERT_EQ(0, ret);
 812        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 813        ASSERT_EQ(0, ret);
 814        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 815        ASSERT_EQ(0, ret);
 816        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 817        ASSERT_EQ(0, ret);
 818        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
 819        ASSERT_EQ(0, ret);
 820        /* Should work just fine. */
 821        res = syscall(__NR_getppid);
 822        EXPECT_EQ(parent, res);
 823}
 824
 825TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 826{
 827        pid_t parent, res = 0;
 828        long ret;
 829
 830        parent = getppid();
 831        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 832        ASSERT_EQ(0, ret);
 833
 834        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 835        ASSERT_EQ(0, ret);
 836        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 837        ASSERT_EQ(0, ret);
 838        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 839        ASSERT_EQ(0, ret);
 840        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 841        ASSERT_EQ(0, ret);
 842        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
 843        ASSERT_EQ(0, ret);
 844        /* Should work just fine. */
 845        res = syscall(__NR_getppid);
 846        EXPECT_EQ(parent, res);
 847        /* getpid() should never return. */
 848        res = syscall(__NR_getpid);
 849        EXPECT_EQ(0, res);
 850}
 851
 852TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 853{
 854        pid_t parent;
 855        long ret;
 856
 857        parent = getppid();
 858        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 859        ASSERT_EQ(0, ret);
 860
 861        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 862        ASSERT_EQ(0, ret);
 863        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
 864        ASSERT_EQ(0, ret);
 865        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 866        ASSERT_EQ(0, ret);
 867        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 868        ASSERT_EQ(0, ret);
 869        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 870        ASSERT_EQ(0, ret);
 871        /* Should work just fine. */
 872        EXPECT_EQ(parent, syscall(__NR_getppid));
 873        /* getpid() should never return. */
 874        EXPECT_EQ(0, syscall(__NR_getpid));
 875}
 876
 877TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 878{
 879        pid_t parent;
 880        long ret;
 881
 882        parent = getppid();
 883        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 884        ASSERT_EQ(0, ret);
 885
 886        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 887        ASSERT_EQ(0, ret);
 888        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 889        ASSERT_EQ(0, ret);
 890        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 891        ASSERT_EQ(0, ret);
 892        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 893        ASSERT_EQ(0, ret);
 894        /* Should work just fine. */
 895        EXPECT_EQ(parent, syscall(__NR_getppid));
 896        /* getpid() should never return. */
 897        EXPECT_EQ(0, syscall(__NR_getpid));
 898}
 899
 900TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 901{
 902        pid_t parent;
 903        long ret;
 904
 905        parent = getppid();
 906        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 907        ASSERT_EQ(0, ret);
 908
 909        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 910        ASSERT_EQ(0, ret);
 911        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 912        ASSERT_EQ(0, ret);
 913        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 914        ASSERT_EQ(0, ret);
 915        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 916        ASSERT_EQ(0, ret);
 917        /* Should work just fine. */
 918        EXPECT_EQ(parent, syscall(__NR_getppid));
 919        /* getpid() should never return. */
 920        EXPECT_EQ(0, syscall(__NR_getpid));
 921}
 922
 923TEST_F(precedence, errno_is_third)
 924{
 925        pid_t parent;
 926        long ret;
 927
 928        parent = getppid();
 929        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 930        ASSERT_EQ(0, ret);
 931
 932        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 933        ASSERT_EQ(0, ret);
 934        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 935        ASSERT_EQ(0, ret);
 936        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 937        ASSERT_EQ(0, ret);
 938        /* Should work just fine. */
 939        EXPECT_EQ(parent, syscall(__NR_getppid));
 940        EXPECT_EQ(0, syscall(__NR_getpid));
 941}
 942
 943TEST_F(precedence, errno_is_third_in_any_order)
 944{
 945        pid_t parent;
 946        long ret;
 947
 948        parent = getppid();
 949        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 950        ASSERT_EQ(0, ret);
 951
 952        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 953        ASSERT_EQ(0, ret);
 954        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 955        ASSERT_EQ(0, ret);
 956        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 957        ASSERT_EQ(0, ret);
 958        /* Should work just fine. */
 959        EXPECT_EQ(parent, syscall(__NR_getppid));
 960        EXPECT_EQ(0, syscall(__NR_getpid));
 961}
 962
 963TEST_F(precedence, trace_is_fourth)
 964{
 965        pid_t parent;
 966        long ret;
 967
 968        parent = getppid();
 969        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 970        ASSERT_EQ(0, ret);
 971
 972        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 973        ASSERT_EQ(0, ret);
 974        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 975        ASSERT_EQ(0, ret);
 976        /* Should work just fine. */
 977        EXPECT_EQ(parent, syscall(__NR_getppid));
 978        /* No ptracer */
 979        EXPECT_EQ(-1, syscall(__NR_getpid));
 980}
 981
 982TEST_F(precedence, trace_is_fourth_in_any_order)
 983{
 984        pid_t parent;
 985        long ret;
 986
 987        parent = getppid();
 988        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 989        ASSERT_EQ(0, ret);
 990
 991        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 992        ASSERT_EQ(0, ret);
 993        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 994        ASSERT_EQ(0, ret);
 995        /* Should work just fine. */
 996        EXPECT_EQ(parent, syscall(__NR_getppid));
 997        /* No ptracer */
 998        EXPECT_EQ(-1, syscall(__NR_getpid));
 999}
1000
1001#ifndef PTRACE_O_TRACESECCOMP
1002#define PTRACE_O_TRACESECCOMP   0x00000080
1003#endif
1004
1005/* Catch the Ubuntu 12.04 value error. */
1006#if PTRACE_EVENT_SECCOMP != 7
1007#undef PTRACE_EVENT_SECCOMP
1008#endif
1009
1010#ifndef PTRACE_EVENT_SECCOMP
1011#define PTRACE_EVENT_SECCOMP 7
1012#endif
1013
1014#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1015bool tracer_running;
1016void tracer_stop(int sig)
1017{
1018        tracer_running = false;
1019}
1020
1021typedef void tracer_func_t(struct __test_metadata *_metadata,
1022                           pid_t tracee, int status, void *args);
1023
1024void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1025            tracer_func_t tracer_func, void *args)
1026{
1027        int ret = -1;
1028        struct sigaction action = {
1029                .sa_handler = tracer_stop,
1030        };
1031
1032        /* Allow external shutdown. */
1033        tracer_running = true;
1034        ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1035
1036        errno = 0;
1037        while (ret == -1 && errno != EINVAL)
1038                ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1039        ASSERT_EQ(0, ret) {
1040                kill(tracee, SIGKILL);
1041        }
1042        /* Wait for attach stop */
1043        wait(NULL);
1044
1045        ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
1046        ASSERT_EQ(0, ret) {
1047                TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1048                kill(tracee, SIGKILL);
1049        }
1050        ptrace(PTRACE_CONT, tracee, NULL, 0);
1051
1052        /* Unblock the tracee */
1053        ASSERT_EQ(1, write(fd, "A", 1));
1054        ASSERT_EQ(0, close(fd));
1055
1056        /* Run until we're shut down. Must assert to stop execution. */
1057        while (tracer_running) {
1058                int status;
1059
1060                if (wait(&status) != tracee)
1061                        continue;
1062                if (WIFSIGNALED(status) || WIFEXITED(status))
1063                        /* Child is dead. Time to go. */
1064                        return;
1065
1066                /* Make sure this is a seccomp event. */
1067                ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
1068
1069                tracer_func(_metadata, tracee, status, args);
1070
1071                ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
1072                ASSERT_EQ(0, ret);
1073        }
1074        /* Directly report the status of our test harness results. */
1075        syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1076}
1077
1078/* Common tracer setup/teardown functions. */
1079void cont_handler(int num)
1080{ }
1081pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1082                          tracer_func_t func, void *args)
1083{
1084        char sync;
1085        int pipefd[2];
1086        pid_t tracer_pid;
1087        pid_t tracee = getpid();
1088
1089        /* Setup a pipe for clean synchronization. */
1090        ASSERT_EQ(0, pipe(pipefd));
1091
1092        /* Fork a child which we'll promote to tracer */
1093        tracer_pid = fork();
1094        ASSERT_LE(0, tracer_pid);
1095        signal(SIGALRM, cont_handler);
1096        if (tracer_pid == 0) {
1097                close(pipefd[0]);
1098                tracer(_metadata, pipefd[1], tracee, func, args);
1099                syscall(__NR_exit, 0);
1100        }
1101        close(pipefd[1]);
1102        prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1103        read(pipefd[0], &sync, 1);
1104        close(pipefd[0]);
1105
1106        return tracer_pid;
1107}
1108void teardown_trace_fixture(struct __test_metadata *_metadata,
1109                            pid_t tracer)
1110{
1111        if (tracer) {
1112                int status;
1113                /*
1114                 * Extract the exit code from the other process and
1115                 * adopt it for ourselves in case its asserts failed.
1116                 */
1117                ASSERT_EQ(0, kill(tracer, SIGUSR1));
1118                ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1119                if (WEXITSTATUS(status))
1120                        _metadata->passed = 0;
1121        }
1122}
1123
1124/* "poke" tracer arguments and function. */
1125struct tracer_args_poke_t {
1126        unsigned long poke_addr;
1127};
1128
1129void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1130                 void *args)
1131{
1132        int ret;
1133        unsigned long msg;
1134        struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1135
1136        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1137        EXPECT_EQ(0, ret);
1138        /* If this fails, don't try to recover. */
1139        ASSERT_EQ(0x1001, msg) {
1140                kill(tracee, SIGKILL);
1141        }
1142        /*
1143         * Poke in the message.
1144         * Registers are not touched to try to keep this relatively arch
1145         * agnostic.
1146         */
1147        ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1148        EXPECT_EQ(0, ret);
1149}
1150
1151FIXTURE_DATA(TRACE_poke) {
1152        struct sock_fprog prog;
1153        pid_t tracer;
1154        long poked;
1155        struct tracer_args_poke_t tracer_args;
1156};
1157
1158FIXTURE_SETUP(TRACE_poke)
1159{
1160        struct sock_filter filter[] = {
1161                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1162                        offsetof(struct seccomp_data, nr)),
1163                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1164                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1165                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1166        };
1167
1168        self->poked = 0;
1169        memset(&self->prog, 0, sizeof(self->prog));
1170        self->prog.filter = malloc(sizeof(filter));
1171        ASSERT_NE(NULL, self->prog.filter);
1172        memcpy(self->prog.filter, filter, sizeof(filter));
1173        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1174
1175        /* Set up tracer args. */
1176        self->tracer_args.poke_addr = (unsigned long)&self->poked;
1177
1178        /* Launch tracer. */
1179        self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1180                                           &self->tracer_args);
1181}
1182
1183FIXTURE_TEARDOWN(TRACE_poke)
1184{
1185        teardown_trace_fixture(_metadata, self->tracer);
1186        if (self->prog.filter)
1187                free(self->prog.filter);
1188}
1189
1190TEST_F(TRACE_poke, read_has_side_effects)
1191{
1192        ssize_t ret;
1193
1194        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1195        ASSERT_EQ(0, ret);
1196
1197        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1198        ASSERT_EQ(0, ret);
1199
1200        EXPECT_EQ(0, self->poked);
1201        ret = read(-1, NULL, 0);
1202        EXPECT_EQ(-1, ret);
1203        EXPECT_EQ(0x1001, self->poked);
1204}
1205
1206TEST_F(TRACE_poke, getpid_runs_normally)
1207{
1208        long ret;
1209
1210        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1211        ASSERT_EQ(0, ret);
1212
1213        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1214        ASSERT_EQ(0, ret);
1215
1216        EXPECT_EQ(0, self->poked);
1217        EXPECT_NE(0, syscall(__NR_getpid));
1218        EXPECT_EQ(0, self->poked);
1219}
1220
1221#if defined(__x86_64__)
1222# define ARCH_REGS      struct user_regs_struct
1223# define SYSCALL_NUM    orig_rax
1224# define SYSCALL_RET    rax
1225#elif defined(__i386__)
1226# define ARCH_REGS      struct user_regs_struct
1227# define SYSCALL_NUM    orig_eax
1228# define SYSCALL_RET    eax
1229#elif defined(__arm__)
1230# define ARCH_REGS      struct pt_regs
1231# define SYSCALL_NUM    ARM_r7
1232# define SYSCALL_RET    ARM_r0
1233#elif defined(__aarch64__)
1234# define ARCH_REGS      struct user_pt_regs
1235# define SYSCALL_NUM    regs[8]
1236# define SYSCALL_RET    regs[0]
1237#elif defined(__powerpc__)
1238# define ARCH_REGS      struct pt_regs
1239# define SYSCALL_NUM    gpr[0]
1240# define SYSCALL_RET    gpr[3]
1241#elif defined(__s390__)
1242# define ARCH_REGS     s390_regs
1243# define SYSCALL_NUM   gprs[2]
1244# define SYSCALL_RET   gprs[2]
1245#else
1246# error "Do not know how to find your architecture's registers and syscalls"
1247#endif
1248
1249/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1250 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1251 */
1252#if defined(__x86_64__) || defined(__i386__)
1253#define HAVE_GETREGS
1254#endif
1255
1256/* Architecture-specific syscall fetching routine. */
1257int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1258{
1259        ARCH_REGS regs;
1260#ifdef HAVE_GETREGS
1261        EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1262                TH_LOG("PTRACE_GETREGS failed");
1263                return -1;
1264        }
1265#else
1266        struct iovec iov;
1267
1268        iov.iov_base = &regs;
1269        iov.iov_len = sizeof(regs);
1270        EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1271                TH_LOG("PTRACE_GETREGSET failed");
1272                return -1;
1273        }
1274#endif
1275
1276        return regs.SYSCALL_NUM;
1277}
1278
1279/* Architecture-specific syscall changing routine. */
1280void change_syscall(struct __test_metadata *_metadata,
1281                    pid_t tracee, int syscall)
1282{
1283        int ret;
1284        ARCH_REGS regs;
1285#ifdef HAVE_GETREGS
1286        ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1287#else
1288        struct iovec iov;
1289        iov.iov_base = &regs;
1290        iov.iov_len = sizeof(regs);
1291        ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1292#endif
1293        EXPECT_EQ(0, ret);
1294
1295#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1296    defined(__s390__)
1297        {
1298                regs.SYSCALL_NUM = syscall;
1299        }
1300
1301#elif defined(__arm__)
1302# ifndef PTRACE_SET_SYSCALL
1303#  define PTRACE_SET_SYSCALL   23
1304# endif
1305        {
1306                ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1307                EXPECT_EQ(0, ret);
1308        }
1309
1310#elif defined(__aarch64__)
1311# ifndef NT_ARM_SYSTEM_CALL
1312#  define NT_ARM_SYSTEM_CALL 0x404
1313# endif
1314        {
1315                iov.iov_base = &syscall;
1316                iov.iov_len = sizeof(syscall);
1317                ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1318                             &iov);
1319                EXPECT_EQ(0, ret);
1320        }
1321
1322#else
1323        ASSERT_EQ(1, 0) {
1324                TH_LOG("How is the syscall changed on this architecture?");
1325        }
1326#endif
1327
1328        /* If syscall is skipped, change return value. */
1329        if (syscall == -1)
1330                regs.SYSCALL_RET = 1;
1331
1332#ifdef HAVE_GETREGS
1333        ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1334#else
1335        iov.iov_base = &regs;
1336        iov.iov_len = sizeof(regs);
1337        ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1338#endif
1339        EXPECT_EQ(0, ret);
1340}
1341
1342void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1343                    int status, void *args)
1344{
1345        int ret;
1346        unsigned long msg;
1347
1348        /* Make sure we got the right message. */
1349        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1350        EXPECT_EQ(0, ret);
1351
1352        /* Validate and take action on expected syscalls. */
1353        switch (msg) {
1354        case 0x1002:
1355                /* change getpid to getppid. */
1356                EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1357                change_syscall(_metadata, tracee, __NR_getppid);
1358                break;
1359        case 0x1003:
1360                /* skip gettid. */
1361                EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1362                change_syscall(_metadata, tracee, -1);
1363                break;
1364        case 0x1004:
1365                /* do nothing (allow getppid) */
1366                EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1367                break;
1368        default:
1369                EXPECT_EQ(0, msg) {
1370                        TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1371                        kill(tracee, SIGKILL);
1372                }
1373        }
1374
1375}
1376
1377FIXTURE_DATA(TRACE_syscall) {
1378        struct sock_fprog prog;
1379        pid_t tracer, mytid, mypid, parent;
1380};
1381
1382FIXTURE_SETUP(TRACE_syscall)
1383{
1384        struct sock_filter filter[] = {
1385                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1386                        offsetof(struct seccomp_data, nr)),
1387                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1388                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1389                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1390                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1391                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1392                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1393                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1394        };
1395
1396        memset(&self->prog, 0, sizeof(self->prog));
1397        self->prog.filter = malloc(sizeof(filter));
1398        ASSERT_NE(NULL, self->prog.filter);
1399        memcpy(self->prog.filter, filter, sizeof(filter));
1400        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1401
1402        /* Prepare some testable syscall results. */
1403        self->mytid = syscall(__NR_gettid);
1404        ASSERT_GT(self->mytid, 0);
1405        ASSERT_NE(self->mytid, 1) {
1406                TH_LOG("Running this test as init is not supported. :)");
1407        }
1408
1409        self->mypid = getpid();
1410        ASSERT_GT(self->mypid, 0);
1411        ASSERT_EQ(self->mytid, self->mypid);
1412
1413        self->parent = getppid();
1414        ASSERT_GT(self->parent, 0);
1415        ASSERT_NE(self->parent, self->mypid);
1416
1417        /* Launch tracer. */
1418        self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
1419}
1420
1421FIXTURE_TEARDOWN(TRACE_syscall)
1422{
1423        teardown_trace_fixture(_metadata, self->tracer);
1424        if (self->prog.filter)
1425                free(self->prog.filter);
1426}
1427
1428TEST_F(TRACE_syscall, syscall_allowed)
1429{
1430        long ret;
1431
1432        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1433        ASSERT_EQ(0, ret);
1434
1435        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1436        ASSERT_EQ(0, ret);
1437
1438        /* getppid works as expected (no changes). */
1439        EXPECT_EQ(self->parent, syscall(__NR_getppid));
1440        EXPECT_NE(self->mypid, syscall(__NR_getppid));
1441}
1442
1443TEST_F(TRACE_syscall, syscall_redirected)
1444{
1445        long ret;
1446
1447        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1448        ASSERT_EQ(0, ret);
1449
1450        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1451        ASSERT_EQ(0, ret);
1452
1453        /* getpid has been redirected to getppid as expected. */
1454        EXPECT_EQ(self->parent, syscall(__NR_getpid));
1455        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1456}
1457
1458TEST_F(TRACE_syscall, syscall_dropped)
1459{
1460        long ret;
1461
1462        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1463        ASSERT_EQ(0, ret);
1464
1465        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1466        ASSERT_EQ(0, ret);
1467
1468        /* gettid has been skipped and an altered return value stored. */
1469        EXPECT_EQ(1, syscall(__NR_gettid));
1470        EXPECT_NE(self->mytid, syscall(__NR_gettid));
1471}
1472
1473#ifndef __NR_seccomp
1474# if defined(__i386__)
1475#  define __NR_seccomp 354
1476# elif defined(__x86_64__)
1477#  define __NR_seccomp 317
1478# elif defined(__arm__)
1479#  define __NR_seccomp 383
1480# elif defined(__aarch64__)
1481#  define __NR_seccomp 277
1482# elif defined(__powerpc__)
1483#  define __NR_seccomp 358
1484# elif defined(__s390__)
1485#  define __NR_seccomp 348
1486# else
1487#  warning "seccomp syscall number unknown for this architecture"
1488#  define __NR_seccomp 0xffff
1489# endif
1490#endif
1491
1492#ifndef SECCOMP_SET_MODE_STRICT
1493#define SECCOMP_SET_MODE_STRICT 0
1494#endif
1495
1496#ifndef SECCOMP_SET_MODE_FILTER
1497#define SECCOMP_SET_MODE_FILTER 1
1498#endif
1499
1500#ifndef SECCOMP_FILTER_FLAG_TSYNC
1501#define SECCOMP_FILTER_FLAG_TSYNC 1
1502#endif
1503
1504#ifndef seccomp
1505int seccomp(unsigned int op, unsigned int flags, void *args)
1506{
1507        errno = 0;
1508        return syscall(__NR_seccomp, op, flags, args);
1509}
1510#endif
1511
1512TEST(seccomp_syscall)
1513{
1514        struct sock_filter filter[] = {
1515                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1516        };
1517        struct sock_fprog prog = {
1518                .len = (unsigned short)ARRAY_SIZE(filter),
1519                .filter = filter,
1520        };
1521        long ret;
1522
1523        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1524        ASSERT_EQ(0, ret) {
1525                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1526        }
1527
1528        /* Reject insane operation. */
1529        ret = seccomp(-1, 0, &prog);
1530        ASSERT_NE(ENOSYS, errno) {
1531                TH_LOG("Kernel does not support seccomp syscall!");
1532        }
1533        EXPECT_EQ(EINVAL, errno) {
1534                TH_LOG("Did not reject crazy op value!");
1535        }
1536
1537        /* Reject strict with flags or pointer. */
1538        ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1539        EXPECT_EQ(EINVAL, errno) {
1540                TH_LOG("Did not reject mode strict with flags!");
1541        }
1542        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1543        EXPECT_EQ(EINVAL, errno) {
1544                TH_LOG("Did not reject mode strict with uargs!");
1545        }
1546
1547        /* Reject insane args for filter. */
1548        ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1549        EXPECT_EQ(EINVAL, errno) {
1550                TH_LOG("Did not reject crazy filter flags!");
1551        }
1552        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1553        EXPECT_EQ(EFAULT, errno) {
1554                TH_LOG("Did not reject NULL filter!");
1555        }
1556
1557        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1558        EXPECT_EQ(0, errno) {
1559                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1560                        strerror(errno));
1561        }
1562}
1563
1564TEST(seccomp_syscall_mode_lock)
1565{
1566        struct sock_filter filter[] = {
1567                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1568        };
1569        struct sock_fprog prog = {
1570                .len = (unsigned short)ARRAY_SIZE(filter),
1571                .filter = filter,
1572        };
1573        long ret;
1574
1575        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1576        ASSERT_EQ(0, ret) {
1577                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1578        }
1579
1580        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1581        ASSERT_NE(ENOSYS, errno) {
1582                TH_LOG("Kernel does not support seccomp syscall!");
1583        }
1584        EXPECT_EQ(0, ret) {
1585                TH_LOG("Could not install filter!");
1586        }
1587
1588        /* Make sure neither entry point will switch to strict. */
1589        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1590        EXPECT_EQ(EINVAL, errno) {
1591                TH_LOG("Switched to mode strict!");
1592        }
1593
1594        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1595        EXPECT_EQ(EINVAL, errno) {
1596                TH_LOG("Switched to mode strict!");
1597        }
1598}
1599
1600TEST(TSYNC_first)
1601{
1602        struct sock_filter filter[] = {
1603                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1604        };
1605        struct sock_fprog prog = {
1606                .len = (unsigned short)ARRAY_SIZE(filter),
1607                .filter = filter,
1608        };
1609        long ret;
1610
1611        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1612        ASSERT_EQ(0, ret) {
1613                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1614        }
1615
1616        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1617                      &prog);
1618        ASSERT_NE(ENOSYS, errno) {
1619                TH_LOG("Kernel does not support seccomp syscall!");
1620        }
1621        EXPECT_EQ(0, ret) {
1622                TH_LOG("Could not install initial filter with TSYNC!");
1623        }
1624}
1625
1626#define TSYNC_SIBLINGS 2
1627struct tsync_sibling {
1628        pthread_t tid;
1629        pid_t system_tid;
1630        sem_t *started;
1631        pthread_cond_t *cond;
1632        pthread_mutex_t *mutex;
1633        int diverge;
1634        int num_waits;
1635        struct sock_fprog *prog;
1636        struct __test_metadata *metadata;
1637};
1638
1639FIXTURE_DATA(TSYNC) {
1640        struct sock_fprog root_prog, apply_prog;
1641        struct tsync_sibling sibling[TSYNC_SIBLINGS];
1642        sem_t started;
1643        pthread_cond_t cond;
1644        pthread_mutex_t mutex;
1645        int sibling_count;
1646};
1647
1648FIXTURE_SETUP(TSYNC)
1649{
1650        struct sock_filter root_filter[] = {
1651                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1652        };
1653        struct sock_filter apply_filter[] = {
1654                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1655                        offsetof(struct seccomp_data, nr)),
1656                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1657                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1658                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1659        };
1660
1661        memset(&self->root_prog, 0, sizeof(self->root_prog));
1662        memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1663        memset(&self->sibling, 0, sizeof(self->sibling));
1664        self->root_prog.filter = malloc(sizeof(root_filter));
1665        ASSERT_NE(NULL, self->root_prog.filter);
1666        memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1667        self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1668
1669        self->apply_prog.filter = malloc(sizeof(apply_filter));
1670        ASSERT_NE(NULL, self->apply_prog.filter);
1671        memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1672        self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1673
1674        self->sibling_count = 0;
1675        pthread_mutex_init(&self->mutex, NULL);
1676        pthread_cond_init(&self->cond, NULL);
1677        sem_init(&self->started, 0, 0);
1678        self->sibling[0].tid = 0;
1679        self->sibling[0].cond = &self->cond;
1680        self->sibling[0].started = &self->started;
1681        self->sibling[0].mutex = &self->mutex;
1682        self->sibling[0].diverge = 0;
1683        self->sibling[0].num_waits = 1;
1684        self->sibling[0].prog = &self->root_prog;
1685        self->sibling[0].metadata = _metadata;
1686        self->sibling[1].tid = 0;
1687        self->sibling[1].cond = &self->cond;
1688        self->sibling[1].started = &self->started;
1689        self->sibling[1].mutex = &self->mutex;
1690        self->sibling[1].diverge = 0;
1691        self->sibling[1].prog = &self->root_prog;
1692        self->sibling[1].num_waits = 1;
1693        self->sibling[1].metadata = _metadata;
1694}
1695
1696FIXTURE_TEARDOWN(TSYNC)
1697{
1698        int sib = 0;
1699
1700        if (self->root_prog.filter)
1701                free(self->root_prog.filter);
1702        if (self->apply_prog.filter)
1703                free(self->apply_prog.filter);
1704
1705        for ( ; sib < self->sibling_count; ++sib) {
1706                struct tsync_sibling *s = &self->sibling[sib];
1707                void *status;
1708
1709                if (!s->tid)
1710                        continue;
1711                if (pthread_kill(s->tid, 0)) {
1712                        pthread_cancel(s->tid);
1713                        pthread_join(s->tid, &status);
1714                }
1715        }
1716        pthread_mutex_destroy(&self->mutex);
1717        pthread_cond_destroy(&self->cond);
1718        sem_destroy(&self->started);
1719}
1720
1721void *tsync_sibling(void *data)
1722{
1723        long ret = 0;
1724        struct tsync_sibling *me = data;
1725
1726        me->system_tid = syscall(__NR_gettid);
1727
1728        pthread_mutex_lock(me->mutex);
1729        if (me->diverge) {
1730                /* Just re-apply the root prog to fork the tree */
1731                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1732                                me->prog, 0, 0);
1733        }
1734        sem_post(me->started);
1735        /* Return outside of started so parent notices failures. */
1736        if (ret) {
1737                pthread_mutex_unlock(me->mutex);
1738                return (void *)SIBLING_EXIT_FAILURE;
1739        }
1740        do {
1741                pthread_cond_wait(me->cond, me->mutex);
1742                me->num_waits = me->num_waits - 1;
1743        } while (me->num_waits);
1744        pthread_mutex_unlock(me->mutex);
1745
1746        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1747        if (!ret)
1748                return (void *)SIBLING_EXIT_NEWPRIVS;
1749        read(0, NULL, 0);
1750        return (void *)SIBLING_EXIT_UNKILLED;
1751}
1752
1753void tsync_start_sibling(struct tsync_sibling *sibling)
1754{
1755        pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1756}
1757
1758TEST_F(TSYNC, siblings_fail_prctl)
1759{
1760        long ret;
1761        void *status;
1762        struct sock_filter filter[] = {
1763                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1764                        offsetof(struct seccomp_data, nr)),
1765                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1766                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1767                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1768        };
1769        struct sock_fprog prog = {
1770                .len = (unsigned short)ARRAY_SIZE(filter),
1771                .filter = filter,
1772        };
1773
1774        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1775                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1776        }
1777
1778        /* Check prctl failure detection by requesting sib 0 diverge. */
1779        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1780        ASSERT_NE(ENOSYS, errno) {
1781                TH_LOG("Kernel does not support seccomp syscall!");
1782        }
1783        ASSERT_EQ(0, ret) {
1784                TH_LOG("setting filter failed");
1785        }
1786
1787        self->sibling[0].diverge = 1;
1788        tsync_start_sibling(&self->sibling[0]);
1789        tsync_start_sibling(&self->sibling[1]);
1790
1791        while (self->sibling_count < TSYNC_SIBLINGS) {
1792                sem_wait(&self->started);
1793                self->sibling_count++;
1794        }
1795
1796        /* Signal the threads to clean up*/
1797        pthread_mutex_lock(&self->mutex);
1798        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1799                TH_LOG("cond broadcast non-zero");
1800        }
1801        pthread_mutex_unlock(&self->mutex);
1802
1803        /* Ensure diverging sibling failed to call prctl. */
1804        pthread_join(self->sibling[0].tid, &status);
1805        EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
1806        pthread_join(self->sibling[1].tid, &status);
1807        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1808}
1809
1810TEST_F(TSYNC, two_siblings_with_ancestor)
1811{
1812        long ret;
1813        void *status;
1814
1815        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1816                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1817        }
1818
1819        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1820        ASSERT_NE(ENOSYS, errno) {
1821                TH_LOG("Kernel does not support seccomp syscall!");
1822        }
1823        ASSERT_EQ(0, ret) {
1824                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1825        }
1826        tsync_start_sibling(&self->sibling[0]);
1827        tsync_start_sibling(&self->sibling[1]);
1828
1829        while (self->sibling_count < TSYNC_SIBLINGS) {
1830                sem_wait(&self->started);
1831                self->sibling_count++;
1832        }
1833
1834        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1835                      &self->apply_prog);
1836        ASSERT_EQ(0, ret) {
1837                TH_LOG("Could install filter on all threads!");
1838        }
1839        /* Tell the siblings to test the policy */
1840        pthread_mutex_lock(&self->mutex);
1841        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1842                TH_LOG("cond broadcast non-zero");
1843        }
1844        pthread_mutex_unlock(&self->mutex);
1845        /* Ensure they are both killed and don't exit cleanly. */
1846        pthread_join(self->sibling[0].tid, &status);
1847        EXPECT_EQ(0x0, (long)status);
1848        pthread_join(self->sibling[1].tid, &status);
1849        EXPECT_EQ(0x0, (long)status);
1850}
1851
1852TEST_F(TSYNC, two_sibling_want_nnp)
1853{
1854        void *status;
1855
1856        /* start siblings before any prctl() operations */
1857        tsync_start_sibling(&self->sibling[0]);
1858        tsync_start_sibling(&self->sibling[1]);
1859        while (self->sibling_count < TSYNC_SIBLINGS) {
1860                sem_wait(&self->started);
1861                self->sibling_count++;
1862        }
1863
1864        /* Tell the siblings to test no policy */
1865        pthread_mutex_lock(&self->mutex);
1866        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1867                TH_LOG("cond broadcast non-zero");
1868        }
1869        pthread_mutex_unlock(&self->mutex);
1870
1871        /* Ensure they are both upset about lacking nnp. */
1872        pthread_join(self->sibling[0].tid, &status);
1873        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1874        pthread_join(self->sibling[1].tid, &status);
1875        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1876}
1877
1878TEST_F(TSYNC, two_siblings_with_no_filter)
1879{
1880        long ret;
1881        void *status;
1882
1883        /* start siblings before any prctl() operations */
1884        tsync_start_sibling(&self->sibling[0]);
1885        tsync_start_sibling(&self->sibling[1]);
1886        while (self->sibling_count < TSYNC_SIBLINGS) {
1887                sem_wait(&self->started);
1888                self->sibling_count++;
1889        }
1890
1891        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1892                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1893        }
1894
1895        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1896                      &self->apply_prog);
1897        ASSERT_NE(ENOSYS, errno) {
1898                TH_LOG("Kernel does not support seccomp syscall!");
1899        }
1900        ASSERT_EQ(0, ret) {
1901                TH_LOG("Could install filter on all threads!");
1902        }
1903
1904        /* Tell the siblings to test the policy */
1905        pthread_mutex_lock(&self->mutex);
1906        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1907                TH_LOG("cond broadcast non-zero");
1908        }
1909        pthread_mutex_unlock(&self->mutex);
1910
1911        /* Ensure they are both killed and don't exit cleanly. */
1912        pthread_join(self->sibling[0].tid, &status);
1913        EXPECT_EQ(0x0, (long)status);
1914        pthread_join(self->sibling[1].tid, &status);
1915        EXPECT_EQ(0x0, (long)status);
1916}
1917
1918TEST_F(TSYNC, two_siblings_with_one_divergence)
1919{
1920        long ret;
1921        void *status;
1922
1923        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1924                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1925        }
1926
1927        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1928        ASSERT_NE(ENOSYS, errno) {
1929                TH_LOG("Kernel does not support seccomp syscall!");
1930        }
1931        ASSERT_EQ(0, ret) {
1932                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1933        }
1934        self->sibling[0].diverge = 1;
1935        tsync_start_sibling(&self->sibling[0]);
1936        tsync_start_sibling(&self->sibling[1]);
1937
1938        while (self->sibling_count < TSYNC_SIBLINGS) {
1939                sem_wait(&self->started);
1940                self->sibling_count++;
1941        }
1942
1943        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1944                      &self->apply_prog);
1945        ASSERT_EQ(self->sibling[0].system_tid, ret) {
1946                TH_LOG("Did not fail on diverged sibling.");
1947        }
1948
1949        /* Wake the threads */
1950        pthread_mutex_lock(&self->mutex);
1951        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1952                TH_LOG("cond broadcast non-zero");
1953        }
1954        pthread_mutex_unlock(&self->mutex);
1955
1956        /* Ensure they are both unkilled. */
1957        pthread_join(self->sibling[0].tid, &status);
1958        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1959        pthread_join(self->sibling[1].tid, &status);
1960        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1961}
1962
1963TEST_F(TSYNC, two_siblings_not_under_filter)
1964{
1965        long ret, sib;
1966        void *status;
1967
1968        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1969                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1970        }
1971
1972        /*
1973         * Sibling 0 will have its own seccomp policy
1974         * and Sibling 1 will not be under seccomp at
1975         * all. Sibling 1 will enter seccomp and 0
1976         * will cause failure.
1977         */
1978        self->sibling[0].diverge = 1;
1979        tsync_start_sibling(&self->sibling[0]);
1980        tsync_start_sibling(&self->sibling[1]);
1981
1982        while (self->sibling_count < TSYNC_SIBLINGS) {
1983                sem_wait(&self->started);
1984                self->sibling_count++;
1985        }
1986
1987        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1988        ASSERT_NE(ENOSYS, errno) {
1989                TH_LOG("Kernel does not support seccomp syscall!");
1990        }
1991        ASSERT_EQ(0, ret) {
1992                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1993        }
1994
1995        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1996                      &self->apply_prog);
1997        ASSERT_EQ(ret, self->sibling[0].system_tid) {
1998                TH_LOG("Did not fail on diverged sibling.");
1999        }
2000        sib = 1;
2001        if (ret == self->sibling[0].system_tid)
2002                sib = 0;
2003
2004        pthread_mutex_lock(&self->mutex);
2005
2006        /* Increment the other siblings num_waits so we can clean up
2007         * the one we just saw.
2008         */
2009        self->sibling[!sib].num_waits += 1;
2010
2011        /* Signal the thread to clean up*/
2012        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2013                TH_LOG("cond broadcast non-zero");
2014        }
2015        pthread_mutex_unlock(&self->mutex);
2016        pthread_join(self->sibling[sib].tid, &status);
2017        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2018        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2019        while (!kill(self->sibling[sib].system_tid, 0))
2020                sleep(0.1);
2021        /* Switch to the remaining sibling */
2022        sib = !sib;
2023
2024        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2025                      &self->apply_prog);
2026        ASSERT_EQ(0, ret) {
2027                TH_LOG("Expected the remaining sibling to sync");
2028        };
2029
2030        pthread_mutex_lock(&self->mutex);
2031
2032        /* If remaining sibling didn't have a chance to wake up during
2033         * the first broadcast, manually reduce the num_waits now.
2034         */
2035        if (self->sibling[sib].num_waits > 1)
2036                self->sibling[sib].num_waits = 1;
2037        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2038                TH_LOG("cond broadcast non-zero");
2039        }
2040        pthread_mutex_unlock(&self->mutex);
2041        pthread_join(self->sibling[sib].tid, &status);
2042        EXPECT_EQ(0, (long)status);
2043        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2044        while (!kill(self->sibling[sib].system_tid, 0))
2045                sleep(0.1);
2046
2047        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2048                      &self->apply_prog);
2049        ASSERT_EQ(0, ret);  /* just us chickens */
2050}
2051
2052/* Make sure restarted syscalls are seen directly as "restart_syscall". */
2053TEST(syscall_restart)
2054{
2055        long ret;
2056        unsigned long msg;
2057        pid_t child_pid;
2058        int pipefd[2];
2059        int status;
2060        siginfo_t info = { };
2061        struct sock_filter filter[] = {
2062                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2063                         offsetof(struct seccomp_data, nr)),
2064
2065#ifdef __NR_sigreturn
2066                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2067#endif
2068                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2069                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2070                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2071                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2072                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2073
2074                /* Allow __NR_write for easy logging. */
2075                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2076                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2077                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2078                /* The nanosleep jump target. */
2079                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2080                /* The restart_syscall jump target. */
2081                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2082        };
2083        struct sock_fprog prog = {
2084                .len = (unsigned short)ARRAY_SIZE(filter),
2085                .filter = filter,
2086        };
2087#if defined(__arm__)
2088        struct utsname utsbuf;
2089#endif
2090
2091        ASSERT_EQ(0, pipe(pipefd));
2092
2093        child_pid = fork();
2094        ASSERT_LE(0, child_pid);
2095        if (child_pid == 0) {
2096                /* Child uses EXPECT not ASSERT to deliver status correctly. */
2097                char buf = ' ';
2098                struct timespec timeout = { };
2099
2100                /* Attach parent as tracer and stop. */
2101                EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2102                EXPECT_EQ(0, raise(SIGSTOP));
2103
2104                EXPECT_EQ(0, close(pipefd[1]));
2105
2106                EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2107                        TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2108                }
2109
2110                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2111                EXPECT_EQ(0, ret) {
2112                        TH_LOG("Failed to install filter!");
2113                }
2114
2115                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2116                        TH_LOG("Failed to read() sync from parent");
2117                }
2118                EXPECT_EQ('.', buf) {
2119                        TH_LOG("Failed to get sync data from read()");
2120                }
2121
2122                /* Start nanosleep to be interrupted. */
2123                timeout.tv_sec = 1;
2124                errno = 0;
2125                EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2126                        TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2127                }
2128
2129                /* Read final sync from parent. */
2130                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2131                        TH_LOG("Failed final read() from parent");
2132                }
2133                EXPECT_EQ('!', buf) {
2134                        TH_LOG("Failed to get final data from read()");
2135                }
2136
2137                /* Directly report the status of our test harness results. */
2138                syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2139                                                     : EXIT_FAILURE);
2140        }
2141        EXPECT_EQ(0, close(pipefd[0]));
2142
2143        /* Attach to child, setup options, and release. */
2144        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2145        ASSERT_EQ(true, WIFSTOPPED(status));
2146        ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2147                            PTRACE_O_TRACESECCOMP));
2148        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2149        ASSERT_EQ(1, write(pipefd[1], ".", 1));
2150
2151        /* Wait for nanosleep() to start. */
2152        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2153        ASSERT_EQ(true, WIFSTOPPED(status));
2154        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2155        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2156        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2157        ASSERT_EQ(0x100, msg);
2158        EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2159
2160        /* Might as well check siginfo for sanity while we're here. */
2161        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2162        ASSERT_EQ(SIGTRAP, info.si_signo);
2163        ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2164        EXPECT_EQ(0, info.si_errno);
2165        EXPECT_EQ(getuid(), info.si_uid);
2166        /* Verify signal delivery came from child (seccomp-triggered). */
2167        EXPECT_EQ(child_pid, info.si_pid);
2168
2169        /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2170        ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2171        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2172        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2173        ASSERT_EQ(true, WIFSTOPPED(status));
2174        ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2175        /* Verify signal delivery came from parent now. */
2176        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2177        EXPECT_EQ(getpid(), info.si_pid);
2178
2179        /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2180        ASSERT_EQ(0, kill(child_pid, SIGCONT));
2181        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2182        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2183        ASSERT_EQ(true, WIFSTOPPED(status));
2184        ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2185        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2186
2187        /* Wait for restart_syscall() to start. */
2188        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2189        ASSERT_EQ(true, WIFSTOPPED(status));
2190        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2191        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2192        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2193
2194        ASSERT_EQ(0x200, msg);
2195        ret = get_syscall(_metadata, child_pid);
2196#if defined(__arm__)
2197        /*
2198         * FIXME:
2199         * - native ARM registers do NOT expose true syscall.
2200         * - compat ARM registers on ARM64 DO expose true syscall.
2201         */
2202        ASSERT_EQ(0, uname(&utsbuf));
2203        if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2204                EXPECT_EQ(__NR_nanosleep, ret);
2205        } else
2206#endif
2207        {
2208                EXPECT_EQ(__NR_restart_syscall, ret);
2209        }
2210
2211        /* Write again to end test. */
2212        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2213        ASSERT_EQ(1, write(pipefd[1], "!", 1));
2214        EXPECT_EQ(0, close(pipefd[1]));
2215
2216        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2217        if (WIFSIGNALED(status) || WEXITSTATUS(status))
2218                _metadata->passed = 0;
2219}
2220
2221/*
2222 * TODO:
2223 * - add microbenchmarks
2224 * - expand NNP testing
2225 * - better arch-specific TRACE and TRAP handlers.
2226 * - endianness checking when appropriate
2227 * - 64-bit arg prodding
2228 * - arch value testing (x86 modes especially)
2229 * - ...
2230 */
2231
2232TEST_HARNESS_MAIN
2233