linux/tools/testing/selftests/seccomp/seccomp_bpf.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   3 * Use of this source code is governed by the GPLv2 license.
   4 *
   5 * Test code for seccomp bpf.
   6 */
   7
   8#include <asm/siginfo.h>
   9#define __have_siginfo_t 1
  10#define __have_sigval_t 1
  11#define __have_sigevent_t 1
  12
  13#include <errno.h>
  14#include <linux/filter.h>
  15#include <sys/prctl.h>
  16#include <sys/ptrace.h>
  17#include <sys/types.h>
  18#include <sys/user.h>
  19#include <linux/prctl.h>
  20#include <linux/ptrace.h>
  21#include <linux/seccomp.h>
  22#include <pthread.h>
  23#include <semaphore.h>
  24#include <signal.h>
  25#include <stddef.h>
  26#include <stdbool.h>
  27#include <string.h>
  28#include <time.h>
  29#include <linux/elf.h>
  30#include <sys/uio.h>
  31#include <sys/utsname.h>
  32#include <sys/fcntl.h>
  33#include <sys/mman.h>
  34#include <sys/times.h>
  35
  36#define _GNU_SOURCE
  37#include <unistd.h>
  38#include <sys/syscall.h>
  39
  40#include "test_harness.h"
  41
  42#ifndef PR_SET_PTRACER
  43# define PR_SET_PTRACER 0x59616d61
  44#endif
  45
  46#ifndef PR_SET_NO_NEW_PRIVS
  47#define PR_SET_NO_NEW_PRIVS 38
  48#define PR_GET_NO_NEW_PRIVS 39
  49#endif
  50
  51#ifndef PR_SECCOMP_EXT
  52#define PR_SECCOMP_EXT 43
  53#endif
  54
  55#ifndef SECCOMP_EXT_ACT
  56#define SECCOMP_EXT_ACT 1
  57#endif
  58
  59#ifndef SECCOMP_EXT_ACT_TSYNC
  60#define SECCOMP_EXT_ACT_TSYNC 1
  61#endif
  62
  63#ifndef SECCOMP_MODE_STRICT
  64#define SECCOMP_MODE_STRICT 1
  65#endif
  66
  67#ifndef SECCOMP_MODE_FILTER
  68#define SECCOMP_MODE_FILTER 2
  69#endif
  70
  71#ifndef SECCOMP_RET_KILL
  72#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
  73#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
  74#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
  75#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
  76#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
  77
  78/* Masks for the return value sections. */
  79#define SECCOMP_RET_ACTION      0x7fff0000U
  80#define SECCOMP_RET_DATA        0x0000ffffU
  81
  82struct seccomp_data {
  83        int nr;
  84        __u32 arch;
  85        __u64 instruction_pointer;
  86        __u64 args[6];
  87};
  88#endif
  89
  90#if __BYTE_ORDER == __LITTLE_ENDIAN
  91#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
  92#elif __BYTE_ORDER == __BIG_ENDIAN
  93#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
  94#else
  95#error "wut? Unknown __BYTE_ORDER?!"
  96#endif
  97
  98#define SIBLING_EXIT_UNKILLED   0xbadbeef
  99#define SIBLING_EXIT_FAILURE    0xbadface
 100#define SIBLING_EXIT_NEWPRIVS   0xbadfeed
 101
 102TEST(mode_strict_support)
 103{
 104        long ret;
 105
 106        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 107        ASSERT_EQ(0, ret) {
 108                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 109        }
 110        syscall(__NR_exit, 1);
 111}
 112
 113TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
 114{
 115        long ret;
 116
 117        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
 118        ASSERT_EQ(0, ret) {
 119                TH_LOG("Kernel does not support CONFIG_SECCOMP");
 120        }
 121        syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
 122                NULL, NULL, NULL);
 123        EXPECT_FALSE(true) {
 124                TH_LOG("Unreachable!");
 125        }
 126}
 127
 128/* Note! This doesn't test no new privs behavior */
 129TEST(no_new_privs_support)
 130{
 131        long ret;
 132
 133        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 134        EXPECT_EQ(0, ret) {
 135                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 136        }
 137}
 138
 139/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
 140TEST(mode_filter_support)
 141{
 142        long ret;
 143
 144        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
 145        ASSERT_EQ(0, ret) {
 146                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 147        }
 148        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
 149        EXPECT_EQ(-1, ret);
 150        EXPECT_EQ(EFAULT, errno) {
 151                TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
 152        }
 153}
 154
 155TEST(mode_filter_without_nnp)
 156{
 157        struct sock_filter filter[] = {
 158                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 159        };
 160        struct sock_fprog prog = {
 161                .len = (unsigned short)ARRAY_SIZE(filter),
 162                .filter = filter,
 163        };
 164        long ret;
 165
 166        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
 167        ASSERT_LE(0, ret) {
 168                TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
 169        }
 170        errno = 0;
 171        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 172        /* Succeeds with CAP_SYS_ADMIN, fails without */
 173        /* TODO(wad) check caps not euid */
 174        if (geteuid()) {
 175                EXPECT_EQ(-1, ret);
 176                EXPECT_EQ(EACCES, errno);
 177        } else {
 178                EXPECT_EQ(0, ret);
 179        }
 180}
 181
 182#define MAX_INSNS_PER_PATH 32768
 183
 184TEST(filter_size_limits)
 185{
 186        int i;
 187        int count = BPF_MAXINSNS + 1;
 188        struct sock_filter allow[] = {
 189                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 190        };
 191        struct sock_filter *filter;
 192        struct sock_fprog prog = { };
 193        long ret;
 194
 195        filter = calloc(count, sizeof(*filter));
 196        ASSERT_NE(NULL, filter);
 197
 198        for (i = 0; i < count; i++)
 199                filter[i] = allow[0];
 200
 201        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 202        ASSERT_EQ(0, ret);
 203
 204        prog.filter = filter;
 205        prog.len = count;
 206
 207        /* Too many filter instructions in a single filter. */
 208        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 209        ASSERT_NE(0, ret) {
 210                TH_LOG("Installing %d insn filter was allowed", prog.len);
 211        }
 212
 213        /* One less is okay, though. */
 214        prog.len -= 1;
 215        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 216        ASSERT_EQ(0, ret) {
 217                TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
 218        }
 219}
 220
 221TEST(filter_chain_limits)
 222{
 223        int i;
 224        int count = BPF_MAXINSNS;
 225        struct sock_filter allow[] = {
 226                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 227        };
 228        struct sock_filter *filter;
 229        struct sock_fprog prog = { };
 230        long ret;
 231
 232        filter = calloc(count, sizeof(*filter));
 233        ASSERT_NE(NULL, filter);
 234
 235        for (i = 0; i < count; i++)
 236                filter[i] = allow[0];
 237
 238        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 239        ASSERT_EQ(0, ret);
 240
 241        prog.filter = filter;
 242        prog.len = 1;
 243
 244        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 245        ASSERT_EQ(0, ret);
 246
 247        prog.len = count;
 248
 249        /* Too many total filter instructions. */
 250        for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
 251                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 252                if (ret != 0)
 253                        break;
 254        }
 255        ASSERT_NE(0, ret) {
 256                TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
 257                       i, count, i * (count + 4));
 258        }
 259}
 260
 261TEST(mode_filter_cannot_move_to_strict)
 262{
 263        struct sock_filter filter[] = {
 264                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 265        };
 266        struct sock_fprog prog = {
 267                .len = (unsigned short)ARRAY_SIZE(filter),
 268                .filter = filter,
 269        };
 270        long ret;
 271
 272        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 273        ASSERT_EQ(0, ret);
 274
 275        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 276        ASSERT_EQ(0, ret);
 277
 278        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
 279        EXPECT_EQ(-1, ret);
 280        EXPECT_EQ(EINVAL, errno);
 281}
 282
 283
 284TEST(mode_filter_get_seccomp)
 285{
 286        struct sock_filter filter[] = {
 287                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 288        };
 289        struct sock_fprog prog = {
 290                .len = (unsigned short)ARRAY_SIZE(filter),
 291                .filter = filter,
 292        };
 293        long ret;
 294
 295        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 296        ASSERT_EQ(0, ret);
 297
 298        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 299        EXPECT_EQ(0, ret);
 300
 301        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
 302        ASSERT_EQ(0, ret);
 303
 304        ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
 305        EXPECT_EQ(2, ret);
 306}
 307
 308
 309TEST(ALLOW_all)
 310{
 311        struct sock_filter filter[] = {
 312                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 313        };
 314        struct sock_fprog prog = {
 315                .len = (unsigned short)ARRAY_SIZE(filter),
 316                .filter = filter,
 317        };
 318        long ret;
 319
 320        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 321        ASSERT_EQ(0, ret);
 322
 323        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 324        ASSERT_EQ(0, ret);
 325}
 326
 327TEST(empty_prog)
 328{
 329        struct sock_filter filter[] = {
 330        };
 331        struct sock_fprog prog = {
 332                .len = (unsigned short)ARRAY_SIZE(filter),
 333                .filter = filter,
 334        };
 335        long ret;
 336
 337        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 338        ASSERT_EQ(0, ret);
 339
 340        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 341        EXPECT_EQ(-1, ret);
 342        EXPECT_EQ(EINVAL, errno);
 343}
 344
 345TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 346{
 347        struct sock_filter filter[] = {
 348                BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
 349        };
 350        struct sock_fprog prog = {
 351                .len = (unsigned short)ARRAY_SIZE(filter),
 352                .filter = filter,
 353        };
 354        long ret;
 355
 356        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 357        ASSERT_EQ(0, ret);
 358
 359        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 360        ASSERT_EQ(0, ret);
 361        EXPECT_EQ(0, syscall(__NR_getpid)) {
 362                TH_LOG("getpid() shouldn't ever return");
 363        }
 364}
 365
 366/* return code >= 0x80000000 is unused. */
 367TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
 368{
 369        struct sock_filter filter[] = {
 370                BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
 371        };
 372        struct sock_fprog prog = {
 373                .len = (unsigned short)ARRAY_SIZE(filter),
 374                .filter = filter,
 375        };
 376        long ret;
 377
 378        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 379        ASSERT_EQ(0, ret);
 380
 381        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 382        ASSERT_EQ(0, ret);
 383        EXPECT_EQ(0, syscall(__NR_getpid)) {
 384                TH_LOG("getpid() shouldn't ever return");
 385        }
 386}
 387
 388TEST_SIGNAL(KILL_all, SIGSYS)
 389{
 390        struct sock_filter filter[] = {
 391                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 392        };
 393        struct sock_fprog prog = {
 394                .len = (unsigned short)ARRAY_SIZE(filter),
 395                .filter = filter,
 396        };
 397        long ret;
 398
 399        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 400        ASSERT_EQ(0, ret);
 401
 402        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 403        ASSERT_EQ(0, ret);
 404}
 405
 406TEST_SIGNAL(KILL_one, SIGSYS)
 407{
 408        struct sock_filter filter[] = {
 409                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 410                        offsetof(struct seccomp_data, nr)),
 411                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 412                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 413                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 414        };
 415        struct sock_fprog prog = {
 416                .len = (unsigned short)ARRAY_SIZE(filter),
 417                .filter = filter,
 418        };
 419        long ret;
 420        pid_t parent = getppid();
 421
 422        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 423        ASSERT_EQ(0, ret);
 424
 425        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 426        ASSERT_EQ(0, ret);
 427
 428        EXPECT_EQ(parent, syscall(__NR_getppid));
 429        /* getpid() should never return. */
 430        EXPECT_EQ(0, syscall(__NR_getpid));
 431}
 432
 433TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
 434{
 435        void *fatal_address;
 436        struct sock_filter filter[] = {
 437                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 438                        offsetof(struct seccomp_data, nr)),
 439                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
 440                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 441                /* Only both with lower 32-bit for now. */
 442                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
 443                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
 444                        (unsigned long)&fatal_address, 0, 1),
 445                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 446                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 447        };
 448        struct sock_fprog prog = {
 449                .len = (unsigned short)ARRAY_SIZE(filter),
 450                .filter = filter,
 451        };
 452        long ret;
 453        pid_t parent = getppid();
 454        struct tms timebuf;
 455        clock_t clock = times(&timebuf);
 456
 457        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 458        ASSERT_EQ(0, ret);
 459
 460        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 461        ASSERT_EQ(0, ret);
 462
 463        EXPECT_EQ(parent, syscall(__NR_getppid));
 464        EXPECT_LE(clock, syscall(__NR_times, &timebuf));
 465        /* times() should never return. */
 466        EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
 467}
 468
 469TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 470{
 471#ifndef __NR_mmap2
 472        int sysno = __NR_mmap;
 473#else
 474        int sysno = __NR_mmap2;
 475#endif
 476        struct sock_filter filter[] = {
 477                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 478                        offsetof(struct seccomp_data, nr)),
 479                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
 480                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 481                /* Only both with lower 32-bit for now. */
 482                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
 483                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
 484                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 485                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 486        };
 487        struct sock_fprog prog = {
 488                .len = (unsigned short)ARRAY_SIZE(filter),
 489                .filter = filter,
 490        };
 491        long ret;
 492        pid_t parent = getppid();
 493        int fd;
 494        void *map1, *map2;
 495        int page_size = sysconf(_SC_PAGESIZE);
 496
 497        ASSERT_LT(0, page_size);
 498
 499        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 500        ASSERT_EQ(0, ret);
 501
 502        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 503        ASSERT_EQ(0, ret);
 504
 505        fd = open("/dev/zero", O_RDONLY);
 506        ASSERT_NE(-1, fd);
 507
 508        EXPECT_EQ(parent, syscall(__NR_getppid));
 509        map1 = (void *)syscall(sysno,
 510                NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 511        EXPECT_NE(MAP_FAILED, map1);
 512        /* mmap2() should never return. */
 513        map2 = (void *)syscall(sysno,
 514                 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 515        EXPECT_EQ(MAP_FAILED, map2);
 516
 517        /* The test failed, so clean up the resources. */
 518        munmap(map1, page_size);
 519        munmap(map2, page_size);
 520        close(fd);
 521}
 522
 523/* TODO(wad) add 64-bit versus 32-bit arg tests. */
 524TEST(arg_out_of_range)
 525{
 526        struct sock_filter filter[] = {
 527                BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
 528                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 529        };
 530        struct sock_fprog prog = {
 531                .len = (unsigned short)ARRAY_SIZE(filter),
 532                .filter = filter,
 533        };
 534        long ret;
 535
 536        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 537        ASSERT_EQ(0, ret);
 538
 539        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 540        EXPECT_EQ(-1, ret);
 541        EXPECT_EQ(EINVAL, errno);
 542}
 543
 544TEST(ERRNO_valid)
 545{
 546        struct sock_filter filter[] = {
 547                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 548                        offsetof(struct seccomp_data, nr)),
 549                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
 550                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
 551                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 552        };
 553        struct sock_fprog prog = {
 554                .len = (unsigned short)ARRAY_SIZE(filter),
 555                .filter = filter,
 556        };
 557        long ret;
 558        pid_t parent = getppid();
 559
 560        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 561        ASSERT_EQ(0, ret);
 562
 563        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 564        ASSERT_EQ(0, ret);
 565
 566        EXPECT_EQ(parent, syscall(__NR_getppid));
 567        EXPECT_EQ(-1, read(0, NULL, 0));
 568        EXPECT_EQ(E2BIG, errno);
 569}
 570
 571TEST(ERRNO_zero)
 572{
 573        struct sock_filter filter[] = {
 574                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 575                        offsetof(struct seccomp_data, nr)),
 576                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
 577                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
 578                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 579        };
 580        struct sock_fprog prog = {
 581                .len = (unsigned short)ARRAY_SIZE(filter),
 582                .filter = filter,
 583        };
 584        long ret;
 585        pid_t parent = getppid();
 586
 587        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 588        ASSERT_EQ(0, ret);
 589
 590        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 591        ASSERT_EQ(0, ret);
 592
 593        EXPECT_EQ(parent, syscall(__NR_getppid));
 594        /* "errno" of 0 is ok. */
 595        EXPECT_EQ(0, read(0, NULL, 0));
 596}
 597
 598TEST(ERRNO_capped)
 599{
 600        struct sock_filter filter[] = {
 601                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 602                        offsetof(struct seccomp_data, nr)),
 603                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
 604                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
 605                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 606        };
 607        struct sock_fprog prog = {
 608                .len = (unsigned short)ARRAY_SIZE(filter),
 609                .filter = filter,
 610        };
 611        long ret;
 612        pid_t parent = getppid();
 613
 614        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 615        ASSERT_EQ(0, ret);
 616
 617        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 618        ASSERT_EQ(0, ret);
 619
 620        EXPECT_EQ(parent, syscall(__NR_getppid));
 621        EXPECT_EQ(-1, read(0, NULL, 0));
 622        EXPECT_EQ(4095, errno);
 623}
 624
 625FIXTURE_DATA(TRAP) {
 626        struct sock_fprog prog;
 627};
 628
 629FIXTURE_SETUP(TRAP)
 630{
 631        struct sock_filter filter[] = {
 632                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 633                        offsetof(struct seccomp_data, nr)),
 634                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
 635                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 636                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 637        };
 638
 639        memset(&self->prog, 0, sizeof(self->prog));
 640        self->prog.filter = malloc(sizeof(filter));
 641        ASSERT_NE(NULL, self->prog.filter);
 642        memcpy(self->prog.filter, filter, sizeof(filter));
 643        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
 644}
 645
 646FIXTURE_TEARDOWN(TRAP)
 647{
 648        if (self->prog.filter)
 649                free(self->prog.filter);
 650}
 651
 652TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
 653{
 654        long ret;
 655
 656        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 657        ASSERT_EQ(0, ret);
 658
 659        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 660        ASSERT_EQ(0, ret);
 661        syscall(__NR_getpid);
 662}
 663
 664/* Ensure that SIGSYS overrides SIG_IGN */
 665TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 666{
 667        long ret;
 668
 669        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 670        ASSERT_EQ(0, ret);
 671
 672        signal(SIGSYS, SIG_IGN);
 673
 674        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 675        ASSERT_EQ(0, ret);
 676        syscall(__NR_getpid);
 677}
 678
 679static struct siginfo TRAP_info;
 680static volatile int TRAP_nr;
 681static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 682{
 683        memcpy(&TRAP_info, info, sizeof(TRAP_info));
 684        TRAP_nr = nr;
 685}
 686
 687TEST_F(TRAP, handler)
 688{
 689        int ret, test;
 690        struct sigaction act;
 691        sigset_t mask;
 692
 693        memset(&act, 0, sizeof(act));
 694        sigemptyset(&mask);
 695        sigaddset(&mask, SIGSYS);
 696
 697        act.sa_sigaction = &TRAP_action;
 698        act.sa_flags = SA_SIGINFO;
 699        ret = sigaction(SIGSYS, &act, NULL);
 700        ASSERT_EQ(0, ret) {
 701                TH_LOG("sigaction failed");
 702        }
 703        ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
 704        ASSERT_EQ(0, ret) {
 705                TH_LOG("sigprocmask failed");
 706        }
 707
 708        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 709        ASSERT_EQ(0, ret);
 710        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
 711        ASSERT_EQ(0, ret);
 712        TRAP_nr = 0;
 713        memset(&TRAP_info, 0, sizeof(TRAP_info));
 714        /* Expect the registers to be rolled back. (nr = error) may vary
 715         * based on arch. */
 716        ret = syscall(__NR_getpid);
 717        /* Silence gcc warning about volatile. */
 718        test = TRAP_nr;
 719        EXPECT_EQ(SIGSYS, test);
 720        struct local_sigsys {
 721                void *_call_addr;       /* calling user insn */
 722                int _syscall;           /* triggering system call number */
 723                unsigned int _arch;     /* AUDIT_ARCH_* of syscall */
 724        } *sigsys = (struct local_sigsys *)
 725#ifdef si_syscall
 726                &(TRAP_info.si_call_addr);
 727#else
 728                &TRAP_info.si_pid;
 729#endif
 730        EXPECT_EQ(__NR_getpid, sigsys->_syscall);
 731        /* Make sure arch is non-zero. */
 732        EXPECT_NE(0, sigsys->_arch);
 733        EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
 734}
 735
 736FIXTURE_DATA(precedence) {
 737        struct sock_fprog allow;
 738        struct sock_fprog trace;
 739        struct sock_fprog error;
 740        struct sock_fprog trap;
 741        struct sock_fprog kill;
 742};
 743
 744FIXTURE_SETUP(precedence)
 745{
 746        struct sock_filter allow_insns[] = {
 747                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 748        };
 749        struct sock_filter trace_insns[] = {
 750                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 751                        offsetof(struct seccomp_data, nr)),
 752                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 753                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 754                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
 755        };
 756        struct sock_filter error_insns[] = {
 757                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 758                        offsetof(struct seccomp_data, nr)),
 759                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 760                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 761                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
 762        };
 763        struct sock_filter trap_insns[] = {
 764                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 765                        offsetof(struct seccomp_data, nr)),
 766                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 767                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 768                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
 769        };
 770        struct sock_filter kill_insns[] = {
 771                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 772                        offsetof(struct seccomp_data, nr)),
 773                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
 774                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 775                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
 776        };
 777
 778        memset(self, 0, sizeof(*self));
 779#define FILTER_ALLOC(_x) \
 780        self->_x.filter = malloc(sizeof(_x##_insns)); \
 781        ASSERT_NE(NULL, self->_x.filter); \
 782        memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 783        self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 784        FILTER_ALLOC(allow);
 785        FILTER_ALLOC(trace);
 786        FILTER_ALLOC(error);
 787        FILTER_ALLOC(trap);
 788        FILTER_ALLOC(kill);
 789}
 790
 791FIXTURE_TEARDOWN(precedence)
 792{
 793#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 794        FILTER_FREE(allow);
 795        FILTER_FREE(trace);
 796        FILTER_FREE(error);
 797        FILTER_FREE(trap);
 798        FILTER_FREE(kill);
 799}
 800
 801TEST_F(precedence, allow_ok)
 802{
 803        pid_t parent, res = 0;
 804        long ret;
 805
 806        parent = getppid();
 807        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 808        ASSERT_EQ(0, ret);
 809
 810        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 811        ASSERT_EQ(0, ret);
 812        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 813        ASSERT_EQ(0, ret);
 814        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 815        ASSERT_EQ(0, ret);
 816        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 817        ASSERT_EQ(0, ret);
 818        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
 819        ASSERT_EQ(0, ret);
 820        /* Should work just fine. */
 821        res = syscall(__NR_getppid);
 822        EXPECT_EQ(parent, res);
 823}
 824
 825TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 826{
 827        pid_t parent, res = 0;
 828        long ret;
 829
 830        parent = getppid();
 831        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 832        ASSERT_EQ(0, ret);
 833
 834        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 835        ASSERT_EQ(0, ret);
 836        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 837        ASSERT_EQ(0, ret);
 838        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 839        ASSERT_EQ(0, ret);
 840        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 841        ASSERT_EQ(0, ret);
 842        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
 843        ASSERT_EQ(0, ret);
 844        /* Should work just fine. */
 845        res = syscall(__NR_getppid);
 846        EXPECT_EQ(parent, res);
 847        /* getpid() should never return. */
 848        res = syscall(__NR_getpid);
 849        EXPECT_EQ(0, res);
 850}
 851
 852TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 853{
 854        pid_t parent;
 855        long ret;
 856
 857        parent = getppid();
 858        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 859        ASSERT_EQ(0, ret);
 860
 861        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 862        ASSERT_EQ(0, ret);
 863        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
 864        ASSERT_EQ(0, ret);
 865        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 866        ASSERT_EQ(0, ret);
 867        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 868        ASSERT_EQ(0, ret);
 869        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 870        ASSERT_EQ(0, ret);
 871        /* Should work just fine. */
 872        EXPECT_EQ(parent, syscall(__NR_getppid));
 873        /* getpid() should never return. */
 874        EXPECT_EQ(0, syscall(__NR_getpid));
 875}
 876
 877TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 878{
 879        pid_t parent;
 880        long ret;
 881
 882        parent = getppid();
 883        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 884        ASSERT_EQ(0, ret);
 885
 886        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 887        ASSERT_EQ(0, ret);
 888        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 889        ASSERT_EQ(0, ret);
 890        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 891        ASSERT_EQ(0, ret);
 892        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 893        ASSERT_EQ(0, ret);
 894        /* Should work just fine. */
 895        EXPECT_EQ(parent, syscall(__NR_getppid));
 896        /* getpid() should never return. */
 897        EXPECT_EQ(0, syscall(__NR_getpid));
 898}
 899
 900TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 901{
 902        pid_t parent;
 903        long ret;
 904
 905        parent = getppid();
 906        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 907        ASSERT_EQ(0, ret);
 908
 909        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 910        ASSERT_EQ(0, ret);
 911        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 912        ASSERT_EQ(0, ret);
 913        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 914        ASSERT_EQ(0, ret);
 915        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 916        ASSERT_EQ(0, ret);
 917        /* Should work just fine. */
 918        EXPECT_EQ(parent, syscall(__NR_getppid));
 919        /* getpid() should never return. */
 920        EXPECT_EQ(0, syscall(__NR_getpid));
 921}
 922
 923TEST_F(precedence, errno_is_third)
 924{
 925        pid_t parent;
 926        long ret;
 927
 928        parent = getppid();
 929        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 930        ASSERT_EQ(0, ret);
 931
 932        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 933        ASSERT_EQ(0, ret);
 934        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 935        ASSERT_EQ(0, ret);
 936        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 937        ASSERT_EQ(0, ret);
 938        /* Should work just fine. */
 939        EXPECT_EQ(parent, syscall(__NR_getppid));
 940        EXPECT_EQ(0, syscall(__NR_getpid));
 941}
 942
 943TEST_F(precedence, errno_is_third_in_any_order)
 944{
 945        pid_t parent;
 946        long ret;
 947
 948        parent = getppid();
 949        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 950        ASSERT_EQ(0, ret);
 951
 952        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 953        ASSERT_EQ(0, ret);
 954        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 955        ASSERT_EQ(0, ret);
 956        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 957        ASSERT_EQ(0, ret);
 958        /* Should work just fine. */
 959        EXPECT_EQ(parent, syscall(__NR_getppid));
 960        EXPECT_EQ(0, syscall(__NR_getpid));
 961}
 962
 963TEST_F(precedence, trace_is_fourth)
 964{
 965        pid_t parent;
 966        long ret;
 967
 968        parent = getppid();
 969        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 970        ASSERT_EQ(0, ret);
 971
 972        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 973        ASSERT_EQ(0, ret);
 974        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 975        ASSERT_EQ(0, ret);
 976        /* Should work just fine. */
 977        EXPECT_EQ(parent, syscall(__NR_getppid));
 978        /* No ptracer */
 979        EXPECT_EQ(-1, syscall(__NR_getpid));
 980}
 981
 982TEST_F(precedence, trace_is_fourth_in_any_order)
 983{
 984        pid_t parent;
 985        long ret;
 986
 987        parent = getppid();
 988        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 989        ASSERT_EQ(0, ret);
 990
 991        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 992        ASSERT_EQ(0, ret);
 993        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 994        ASSERT_EQ(0, ret);
 995        /* Should work just fine. */
 996        EXPECT_EQ(parent, syscall(__NR_getppid));
 997        /* No ptracer */
 998        EXPECT_EQ(-1, syscall(__NR_getpid));
 999}
1000
1001#ifndef PTRACE_O_TRACESECCOMP
1002#define PTRACE_O_TRACESECCOMP   0x00000080
1003#endif
1004
1005/* Catch the Ubuntu 12.04 value error. */
1006#if PTRACE_EVENT_SECCOMP != 7
1007#undef PTRACE_EVENT_SECCOMP
1008#endif
1009
1010#ifndef PTRACE_EVENT_SECCOMP
1011#define PTRACE_EVENT_SECCOMP 7
1012#endif
1013
1014#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1015bool tracer_running;
1016void tracer_stop(int sig)
1017{
1018        tracer_running = false;
1019}
1020
1021typedef void tracer_func_t(struct __test_metadata *_metadata,
1022                           pid_t tracee, int status, void *args);
1023
1024void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1025            tracer_func_t tracer_func, void *args)
1026{
1027        int ret = -1;
1028        struct sigaction action = {
1029                .sa_handler = tracer_stop,
1030        };
1031
1032        /* Allow external shutdown. */
1033        tracer_running = true;
1034        ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1035
1036        errno = 0;
1037        while (ret == -1 && errno != EINVAL)
1038                ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1039        ASSERT_EQ(0, ret) {
1040                kill(tracee, SIGKILL);
1041        }
1042        /* Wait for attach stop */
1043        wait(NULL);
1044
1045        ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
1046        ASSERT_EQ(0, ret) {
1047                TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1048                kill(tracee, SIGKILL);
1049        }
1050        ptrace(PTRACE_CONT, tracee, NULL, 0);
1051
1052        /* Unblock the tracee */
1053        ASSERT_EQ(1, write(fd, "A", 1));
1054        ASSERT_EQ(0, close(fd));
1055
1056        /* Run until we're shut down. Must assert to stop execution. */
1057        while (tracer_running) {
1058                int status;
1059
1060                if (wait(&status) != tracee)
1061                        continue;
1062                if (WIFSIGNALED(status) || WIFEXITED(status))
1063                        /* Child is dead. Time to go. */
1064                        return;
1065
1066                /* Make sure this is a seccomp event. */
1067                ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
1068
1069                tracer_func(_metadata, tracee, status, args);
1070
1071                ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
1072                ASSERT_EQ(0, ret);
1073        }
1074        /* Directly report the status of our test harness results. */
1075        syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1076}
1077
1078/* Common tracer setup/teardown functions. */
1079void cont_handler(int num)
1080{ }
1081pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1082                          tracer_func_t func, void *args)
1083{
1084        char sync;
1085        int pipefd[2];
1086        pid_t tracer_pid;
1087        pid_t tracee = getpid();
1088
1089        /* Setup a pipe for clean synchronization. */
1090        ASSERT_EQ(0, pipe(pipefd));
1091
1092        /* Fork a child which we'll promote to tracer */
1093        tracer_pid = fork();
1094        ASSERT_LE(0, tracer_pid);
1095        signal(SIGALRM, cont_handler);
1096        if (tracer_pid == 0) {
1097                close(pipefd[0]);
1098                tracer(_metadata, pipefd[1], tracee, func, args);
1099                syscall(__NR_exit, 0);
1100        }
1101        close(pipefd[1]);
1102        prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1103        read(pipefd[0], &sync, 1);
1104        close(pipefd[0]);
1105
1106        return tracer_pid;
1107}
1108void teardown_trace_fixture(struct __test_metadata *_metadata,
1109                            pid_t tracer)
1110{
1111        if (tracer) {
1112                int status;
1113                /*
1114                 * Extract the exit code from the other process and
1115                 * adopt it for ourselves in case its asserts failed.
1116                 */
1117                ASSERT_EQ(0, kill(tracer, SIGUSR1));
1118                ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1119                if (WEXITSTATUS(status))
1120                        _metadata->passed = 0;
1121        }
1122}
1123
1124/* "poke" tracer arguments and function. */
1125struct tracer_args_poke_t {
1126        unsigned long poke_addr;
1127};
1128
1129void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1130                 void *args)
1131{
1132        int ret;
1133        unsigned long msg;
1134        struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1135
1136        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1137        EXPECT_EQ(0, ret);
1138        /* If this fails, don't try to recover. */
1139        ASSERT_EQ(0x1001, msg) {
1140                kill(tracee, SIGKILL);
1141        }
1142        /*
1143         * Poke in the message.
1144         * Registers are not touched to try to keep this relatively arch
1145         * agnostic.
1146         */
1147        ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1148        EXPECT_EQ(0, ret);
1149}
1150
1151FIXTURE_DATA(TRACE_poke) {
1152        struct sock_fprog prog;
1153        pid_t tracer;
1154        long poked;
1155        struct tracer_args_poke_t tracer_args;
1156};
1157
1158FIXTURE_SETUP(TRACE_poke)
1159{
1160        struct sock_filter filter[] = {
1161                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1162                        offsetof(struct seccomp_data, nr)),
1163                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1164                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1165                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1166        };
1167
1168        self->poked = 0;
1169        memset(&self->prog, 0, sizeof(self->prog));
1170        self->prog.filter = malloc(sizeof(filter));
1171        ASSERT_NE(NULL, self->prog.filter);
1172        memcpy(self->prog.filter, filter, sizeof(filter));
1173        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1174
1175        /* Set up tracer args. */
1176        self->tracer_args.poke_addr = (unsigned long)&self->poked;
1177
1178        /* Launch tracer. */
1179        self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1180                                           &self->tracer_args);
1181}
1182
1183FIXTURE_TEARDOWN(TRACE_poke)
1184{
1185        teardown_trace_fixture(_metadata, self->tracer);
1186        if (self->prog.filter)
1187                free(self->prog.filter);
1188}
1189
1190TEST_F(TRACE_poke, read_has_side_effects)
1191{
1192        ssize_t ret;
1193
1194        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1195        ASSERT_EQ(0, ret);
1196
1197        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1198        ASSERT_EQ(0, ret);
1199
1200        EXPECT_EQ(0, self->poked);
1201        ret = read(-1, NULL, 0);
1202        EXPECT_EQ(-1, ret);
1203        EXPECT_EQ(0x1001, self->poked);
1204}
1205
1206TEST_F(TRACE_poke, getpid_runs_normally)
1207{
1208        long ret;
1209
1210        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1211        ASSERT_EQ(0, ret);
1212
1213        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1214        ASSERT_EQ(0, ret);
1215
1216        EXPECT_EQ(0, self->poked);
1217        EXPECT_NE(0, syscall(__NR_getpid));
1218        EXPECT_EQ(0, self->poked);
1219}
1220
1221#if defined(__x86_64__)
1222# define ARCH_REGS      struct user_regs_struct
1223# define SYSCALL_NUM    orig_rax
1224# define SYSCALL_RET    rax
1225#elif defined(__i386__)
1226# define ARCH_REGS      struct user_regs_struct
1227# define SYSCALL_NUM    orig_eax
1228# define SYSCALL_RET    eax
1229#elif defined(__arm__)
1230# define ARCH_REGS      struct pt_regs
1231# define SYSCALL_NUM    ARM_r7
1232# define SYSCALL_RET    ARM_r0
1233#elif defined(__aarch64__)
1234# define ARCH_REGS      struct user_pt_regs
1235# define SYSCALL_NUM    regs[8]
1236# define SYSCALL_RET    regs[0]
1237#elif defined(__powerpc__)
1238# define ARCH_REGS      struct pt_regs
1239# define SYSCALL_NUM    gpr[0]
1240# define SYSCALL_RET    gpr[3]
1241#elif defined(__s390__)
1242# define ARCH_REGS     s390_regs
1243# define SYSCALL_NUM   gprs[2]
1244# define SYSCALL_RET   gprs[2]
1245#else
1246# error "Do not know how to find your architecture's registers and syscalls"
1247#endif
1248
1249/* Architecture-specific syscall fetching routine. */
1250int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1251{
1252        struct iovec iov;
1253        ARCH_REGS regs;
1254
1255        iov.iov_base = &regs;
1256        iov.iov_len = sizeof(regs);
1257        EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1258                TH_LOG("PTRACE_GETREGSET failed");
1259                return -1;
1260        }
1261
1262        return regs.SYSCALL_NUM;
1263}
1264
1265/* Architecture-specific syscall changing routine. */
1266void change_syscall(struct __test_metadata *_metadata,
1267                    pid_t tracee, int syscall)
1268{
1269        struct iovec iov;
1270        int ret;
1271        ARCH_REGS regs;
1272
1273        iov.iov_base = &regs;
1274        iov.iov_len = sizeof(regs);
1275        ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1276        EXPECT_EQ(0, ret);
1277
1278#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1279    defined(__s390__)
1280        {
1281                regs.SYSCALL_NUM = syscall;
1282        }
1283
1284#elif defined(__arm__)
1285# ifndef PTRACE_SET_SYSCALL
1286#  define PTRACE_SET_SYSCALL   23
1287# endif
1288        {
1289                ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1290                EXPECT_EQ(0, ret);
1291        }
1292
1293#elif defined(__aarch64__)
1294# ifndef NT_ARM_SYSTEM_CALL
1295#  define NT_ARM_SYSTEM_CALL 0x404
1296# endif
1297        {
1298                iov.iov_base = &syscall;
1299                iov.iov_len = sizeof(syscall);
1300                ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1301                             &iov);
1302                EXPECT_EQ(0, ret);
1303        }
1304
1305#else
1306        ASSERT_EQ(1, 0) {
1307                TH_LOG("How is the syscall changed on this architecture?");
1308        }
1309#endif
1310
1311        /* If syscall is skipped, change return value. */
1312        if (syscall == -1)
1313                regs.SYSCALL_RET = 1;
1314
1315        iov.iov_base = &regs;
1316        iov.iov_len = sizeof(regs);
1317        ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1318        EXPECT_EQ(0, ret);
1319}
1320
1321void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1322                    int status, void *args)
1323{
1324        int ret;
1325        unsigned long msg;
1326
1327        /* Make sure we got the right message. */
1328        ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1329        EXPECT_EQ(0, ret);
1330
1331        /* Validate and take action on expected syscalls. */
1332        switch (msg) {
1333        case 0x1002:
1334                /* change getpid to getppid. */
1335                EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1336                change_syscall(_metadata, tracee, __NR_getppid);
1337                break;
1338        case 0x1003:
1339                /* skip gettid. */
1340                EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1341                change_syscall(_metadata, tracee, -1);
1342                break;
1343        case 0x1004:
1344                /* do nothing (allow getppid) */
1345                EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1346                break;
1347        default:
1348                EXPECT_EQ(0, msg) {
1349                        TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1350                        kill(tracee, SIGKILL);
1351                }
1352        }
1353
1354}
1355
1356FIXTURE_DATA(TRACE_syscall) {
1357        struct sock_fprog prog;
1358        pid_t tracer, mytid, mypid, parent;
1359};
1360
1361FIXTURE_SETUP(TRACE_syscall)
1362{
1363        struct sock_filter filter[] = {
1364                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1365                        offsetof(struct seccomp_data, nr)),
1366                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1367                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1368                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1369                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1370                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1371                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1372                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1373        };
1374
1375        memset(&self->prog, 0, sizeof(self->prog));
1376        self->prog.filter = malloc(sizeof(filter));
1377        ASSERT_NE(NULL, self->prog.filter);
1378        memcpy(self->prog.filter, filter, sizeof(filter));
1379        self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1380
1381        /* Prepare some testable syscall results. */
1382        self->mytid = syscall(__NR_gettid);
1383        ASSERT_GT(self->mytid, 0);
1384        ASSERT_NE(self->mytid, 1) {
1385                TH_LOG("Running this test as init is not supported. :)");
1386        }
1387
1388        self->mypid = getpid();
1389        ASSERT_GT(self->mypid, 0);
1390        ASSERT_EQ(self->mytid, self->mypid);
1391
1392        self->parent = getppid();
1393        ASSERT_GT(self->parent, 0);
1394        ASSERT_NE(self->parent, self->mypid);
1395
1396        /* Launch tracer. */
1397        self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
1398}
1399
1400FIXTURE_TEARDOWN(TRACE_syscall)
1401{
1402        teardown_trace_fixture(_metadata, self->tracer);
1403        if (self->prog.filter)
1404                free(self->prog.filter);
1405}
1406
1407TEST_F(TRACE_syscall, syscall_allowed)
1408{
1409        long ret;
1410
1411        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1412        ASSERT_EQ(0, ret);
1413
1414        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1415        ASSERT_EQ(0, ret);
1416
1417        /* getppid works as expected (no changes). */
1418        EXPECT_EQ(self->parent, syscall(__NR_getppid));
1419        EXPECT_NE(self->mypid, syscall(__NR_getppid));
1420}
1421
1422TEST_F(TRACE_syscall, syscall_redirected)
1423{
1424        long ret;
1425
1426        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1427        ASSERT_EQ(0, ret);
1428
1429        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1430        ASSERT_EQ(0, ret);
1431
1432        /* getpid has been redirected to getppid as expected. */
1433        EXPECT_EQ(self->parent, syscall(__NR_getpid));
1434        EXPECT_NE(self->mypid, syscall(__NR_getpid));
1435}
1436
1437TEST_F(TRACE_syscall, syscall_dropped)
1438{
1439        long ret;
1440
1441        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1442        ASSERT_EQ(0, ret);
1443
1444        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1445        ASSERT_EQ(0, ret);
1446
1447        /* gettid has been skipped and an altered return value stored. */
1448        EXPECT_EQ(1, syscall(__NR_gettid));
1449        EXPECT_NE(self->mytid, syscall(__NR_gettid));
1450}
1451
1452#ifndef __NR_seccomp
1453# if defined(__i386__)
1454#  define __NR_seccomp 354
1455# elif defined(__x86_64__)
1456#  define __NR_seccomp 317
1457# elif defined(__arm__)
1458#  define __NR_seccomp 383
1459# elif defined(__aarch64__)
1460#  define __NR_seccomp 277
1461# elif defined(__powerpc__)
1462#  define __NR_seccomp 358
1463# elif defined(__s390__)
1464#  define __NR_seccomp 348
1465# else
1466#  warning "seccomp syscall number unknown for this architecture"
1467#  define __NR_seccomp 0xffff
1468# endif
1469#endif
1470
1471#ifndef SECCOMP_SET_MODE_STRICT
1472#define SECCOMP_SET_MODE_STRICT 0
1473#endif
1474
1475#ifndef SECCOMP_SET_MODE_FILTER
1476#define SECCOMP_SET_MODE_FILTER 1
1477#endif
1478
1479#ifndef SECCOMP_FLAG_FILTER_TSYNC
1480#define SECCOMP_FLAG_FILTER_TSYNC 1
1481#endif
1482
1483#ifndef seccomp
1484int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
1485{
1486        errno = 0;
1487        return syscall(__NR_seccomp, op, flags, filter);
1488}
1489#endif
1490
1491TEST(seccomp_syscall)
1492{
1493        struct sock_filter filter[] = {
1494                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1495        };
1496        struct sock_fprog prog = {
1497                .len = (unsigned short)ARRAY_SIZE(filter),
1498                .filter = filter,
1499        };
1500        long ret;
1501
1502        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1503        ASSERT_EQ(0, ret) {
1504                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1505        }
1506
1507        /* Reject insane operation. */
1508        ret = seccomp(-1, 0, &prog);
1509        ASSERT_NE(ENOSYS, errno) {
1510                TH_LOG("Kernel does not support seccomp syscall!");
1511        }
1512        EXPECT_EQ(EINVAL, errno) {
1513                TH_LOG("Did not reject crazy op value!");
1514        }
1515
1516        /* Reject strict with flags or pointer. */
1517        ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1518        EXPECT_EQ(EINVAL, errno) {
1519                TH_LOG("Did not reject mode strict with flags!");
1520        }
1521        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1522        EXPECT_EQ(EINVAL, errno) {
1523                TH_LOG("Did not reject mode strict with uargs!");
1524        }
1525
1526        /* Reject insane args for filter. */
1527        ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1528        EXPECT_EQ(EINVAL, errno) {
1529                TH_LOG("Did not reject crazy filter flags!");
1530        }
1531        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1532        EXPECT_EQ(EFAULT, errno) {
1533                TH_LOG("Did not reject NULL filter!");
1534        }
1535
1536        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1537        EXPECT_EQ(0, errno) {
1538                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1539                        strerror(errno));
1540        }
1541}
1542
1543TEST(seccomp_syscall_mode_lock)
1544{
1545        struct sock_filter filter[] = {
1546                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1547        };
1548        struct sock_fprog prog = {
1549                .len = (unsigned short)ARRAY_SIZE(filter),
1550                .filter = filter,
1551        };
1552        long ret;
1553
1554        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1555        ASSERT_EQ(0, ret) {
1556                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1557        }
1558
1559        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1560        ASSERT_NE(ENOSYS, errno) {
1561                TH_LOG("Kernel does not support seccomp syscall!");
1562        }
1563        EXPECT_EQ(0, ret) {
1564                TH_LOG("Could not install filter!");
1565        }
1566
1567        /* Make sure neither entry point will switch to strict. */
1568        ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1569        EXPECT_EQ(EINVAL, errno) {
1570                TH_LOG("Switched to mode strict!");
1571        }
1572
1573        ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1574        EXPECT_EQ(EINVAL, errno) {
1575                TH_LOG("Switched to mode strict!");
1576        }
1577}
1578
1579TEST(TSYNC_first)
1580{
1581        struct sock_filter filter[] = {
1582                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1583        };
1584        struct sock_fprog prog = {
1585                .len = (unsigned short)ARRAY_SIZE(filter),
1586                .filter = filter,
1587        };
1588        long ret;
1589
1590        ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1591        ASSERT_EQ(0, ret) {
1592                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1593        }
1594
1595        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1596                      &prog);
1597        ASSERT_NE(ENOSYS, errno) {
1598                TH_LOG("Kernel does not support seccomp syscall!");
1599        }
1600        EXPECT_EQ(0, ret) {
1601                TH_LOG("Could not install initial filter with TSYNC!");
1602        }
1603}
1604
1605#define TSYNC_SIBLINGS 2
1606struct tsync_sibling {
1607        pthread_t tid;
1608        pid_t system_tid;
1609        sem_t *started;
1610        pthread_cond_t *cond;
1611        pthread_mutex_t *mutex;
1612        int diverge;
1613        int num_waits;
1614        struct sock_fprog *prog;
1615        struct __test_metadata *metadata;
1616};
1617
1618FIXTURE_DATA(TSYNC) {
1619        struct sock_fprog root_prog, apply_prog;
1620        struct tsync_sibling sibling[TSYNC_SIBLINGS];
1621        sem_t started;
1622        pthread_cond_t cond;
1623        pthread_mutex_t mutex;
1624        int sibling_count;
1625};
1626
1627FIXTURE_SETUP(TSYNC)
1628{
1629        struct sock_filter root_filter[] = {
1630                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1631        };
1632        struct sock_filter apply_filter[] = {
1633                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1634                        offsetof(struct seccomp_data, nr)),
1635                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1636                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1637                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1638        };
1639
1640        memset(&self->root_prog, 0, sizeof(self->root_prog));
1641        memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1642        memset(&self->sibling, 0, sizeof(self->sibling));
1643        self->root_prog.filter = malloc(sizeof(root_filter));
1644        ASSERT_NE(NULL, self->root_prog.filter);
1645        memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1646        self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1647
1648        self->apply_prog.filter = malloc(sizeof(apply_filter));
1649        ASSERT_NE(NULL, self->apply_prog.filter);
1650        memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1651        self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1652
1653        self->sibling_count = 0;
1654        pthread_mutex_init(&self->mutex, NULL);
1655        pthread_cond_init(&self->cond, NULL);
1656        sem_init(&self->started, 0, 0);
1657        self->sibling[0].tid = 0;
1658        self->sibling[0].cond = &self->cond;
1659        self->sibling[0].started = &self->started;
1660        self->sibling[0].mutex = &self->mutex;
1661        self->sibling[0].diverge = 0;
1662        self->sibling[0].num_waits = 1;
1663        self->sibling[0].prog = &self->root_prog;
1664        self->sibling[0].metadata = _metadata;
1665        self->sibling[1].tid = 0;
1666        self->sibling[1].cond = &self->cond;
1667        self->sibling[1].started = &self->started;
1668        self->sibling[1].mutex = &self->mutex;
1669        self->sibling[1].diverge = 0;
1670        self->sibling[1].prog = &self->root_prog;
1671        self->sibling[1].num_waits = 1;
1672        self->sibling[1].metadata = _metadata;
1673}
1674
1675FIXTURE_TEARDOWN(TSYNC)
1676{
1677        int sib = 0;
1678
1679        if (self->root_prog.filter)
1680                free(self->root_prog.filter);
1681        if (self->apply_prog.filter)
1682                free(self->apply_prog.filter);
1683
1684        for ( ; sib < self->sibling_count; ++sib) {
1685                struct tsync_sibling *s = &self->sibling[sib];
1686                void *status;
1687
1688                if (!s->tid)
1689                        continue;
1690                if (pthread_kill(s->tid, 0)) {
1691                        pthread_cancel(s->tid);
1692                        pthread_join(s->tid, &status);
1693                }
1694        }
1695        pthread_mutex_destroy(&self->mutex);
1696        pthread_cond_destroy(&self->cond);
1697        sem_destroy(&self->started);
1698}
1699
1700void *tsync_sibling(void *data)
1701{
1702        long ret = 0;
1703        struct tsync_sibling *me = data;
1704
1705        me->system_tid = syscall(__NR_gettid);
1706
1707        pthread_mutex_lock(me->mutex);
1708        if (me->diverge) {
1709                /* Just re-apply the root prog to fork the tree */
1710                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1711                                me->prog, 0, 0);
1712        }
1713        sem_post(me->started);
1714        /* Return outside of started so parent notices failures. */
1715        if (ret) {
1716                pthread_mutex_unlock(me->mutex);
1717                return (void *)SIBLING_EXIT_FAILURE;
1718        }
1719        do {
1720                pthread_cond_wait(me->cond, me->mutex);
1721                me->num_waits = me->num_waits - 1;
1722        } while (me->num_waits);
1723        pthread_mutex_unlock(me->mutex);
1724
1725        ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1726        if (!ret)
1727                return (void *)SIBLING_EXIT_NEWPRIVS;
1728        read(0, NULL, 0);
1729        return (void *)SIBLING_EXIT_UNKILLED;
1730}
1731
1732void tsync_start_sibling(struct tsync_sibling *sibling)
1733{
1734        pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1735}
1736
1737TEST_F(TSYNC, siblings_fail_prctl)
1738{
1739        long ret;
1740        void *status;
1741        struct sock_filter filter[] = {
1742                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1743                        offsetof(struct seccomp_data, nr)),
1744                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1745                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1746                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1747        };
1748        struct sock_fprog prog = {
1749                .len = (unsigned short)ARRAY_SIZE(filter),
1750                .filter = filter,
1751        };
1752
1753        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1754                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1755        }
1756
1757        /* Check prctl failure detection by requesting sib 0 diverge. */
1758        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1759        ASSERT_NE(ENOSYS, errno) {
1760                TH_LOG("Kernel does not support seccomp syscall!");
1761        }
1762        ASSERT_EQ(0, ret) {
1763                TH_LOG("setting filter failed");
1764        }
1765
1766        self->sibling[0].diverge = 1;
1767        tsync_start_sibling(&self->sibling[0]);
1768        tsync_start_sibling(&self->sibling[1]);
1769
1770        while (self->sibling_count < TSYNC_SIBLINGS) {
1771                sem_wait(&self->started);
1772                self->sibling_count++;
1773        }
1774
1775        /* Signal the threads to clean up*/
1776        pthread_mutex_lock(&self->mutex);
1777        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1778                TH_LOG("cond broadcast non-zero");
1779        }
1780        pthread_mutex_unlock(&self->mutex);
1781
1782        /* Ensure diverging sibling failed to call prctl. */
1783        pthread_join(self->sibling[0].tid, &status);
1784        EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
1785        pthread_join(self->sibling[1].tid, &status);
1786        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1787}
1788
1789TEST_F(TSYNC, two_siblings_with_ancestor)
1790{
1791        long ret;
1792        void *status;
1793
1794        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1795                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1796        }
1797
1798        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1799        ASSERT_NE(ENOSYS, errno) {
1800                TH_LOG("Kernel does not support seccomp syscall!");
1801        }
1802        ASSERT_EQ(0, ret) {
1803                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1804        }
1805        tsync_start_sibling(&self->sibling[0]);
1806        tsync_start_sibling(&self->sibling[1]);
1807
1808        while (self->sibling_count < TSYNC_SIBLINGS) {
1809                sem_wait(&self->started);
1810                self->sibling_count++;
1811        }
1812
1813        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1814                      &self->apply_prog);
1815        ASSERT_EQ(0, ret) {
1816                TH_LOG("Could install filter on all threads!");
1817        }
1818        /* Tell the siblings to test the policy */
1819        pthread_mutex_lock(&self->mutex);
1820        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1821                TH_LOG("cond broadcast non-zero");
1822        }
1823        pthread_mutex_unlock(&self->mutex);
1824        /* Ensure they are both killed and don't exit cleanly. */
1825        pthread_join(self->sibling[0].tid, &status);
1826        EXPECT_EQ(0x0, (long)status);
1827        pthread_join(self->sibling[1].tid, &status);
1828        EXPECT_EQ(0x0, (long)status);
1829}
1830
1831TEST_F(TSYNC, two_sibling_want_nnp)
1832{
1833        void *status;
1834
1835        /* start siblings before any prctl() operations */
1836        tsync_start_sibling(&self->sibling[0]);
1837        tsync_start_sibling(&self->sibling[1]);
1838        while (self->sibling_count < TSYNC_SIBLINGS) {
1839                sem_wait(&self->started);
1840                self->sibling_count++;
1841        }
1842
1843        /* Tell the siblings to test no policy */
1844        pthread_mutex_lock(&self->mutex);
1845        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1846                TH_LOG("cond broadcast non-zero");
1847        }
1848        pthread_mutex_unlock(&self->mutex);
1849
1850        /* Ensure they are both upset about lacking nnp. */
1851        pthread_join(self->sibling[0].tid, &status);
1852        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1853        pthread_join(self->sibling[1].tid, &status);
1854        EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
1855}
1856
1857TEST_F(TSYNC, two_siblings_with_no_filter)
1858{
1859        long ret;
1860        void *status;
1861
1862        /* start siblings before any prctl() operations */
1863        tsync_start_sibling(&self->sibling[0]);
1864        tsync_start_sibling(&self->sibling[1]);
1865        while (self->sibling_count < TSYNC_SIBLINGS) {
1866                sem_wait(&self->started);
1867                self->sibling_count++;
1868        }
1869
1870        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1871                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1872        }
1873
1874        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1875                      &self->apply_prog);
1876        ASSERT_NE(ENOSYS, errno) {
1877                TH_LOG("Kernel does not support seccomp syscall!");
1878        }
1879        ASSERT_EQ(0, ret) {
1880                TH_LOG("Could install filter on all threads!");
1881        }
1882
1883        /* Tell the siblings to test the policy */
1884        pthread_mutex_lock(&self->mutex);
1885        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1886                TH_LOG("cond broadcast non-zero");
1887        }
1888        pthread_mutex_unlock(&self->mutex);
1889
1890        /* Ensure they are both killed and don't exit cleanly. */
1891        pthread_join(self->sibling[0].tid, &status);
1892        EXPECT_EQ(0x0, (long)status);
1893        pthread_join(self->sibling[1].tid, &status);
1894        EXPECT_EQ(0x0, (long)status);
1895}
1896
1897TEST_F(TSYNC, two_siblings_with_one_divergence)
1898{
1899        long ret;
1900        void *status;
1901
1902        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1903                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1904        }
1905
1906        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1907        ASSERT_NE(ENOSYS, errno) {
1908                TH_LOG("Kernel does not support seccomp syscall!");
1909        }
1910        ASSERT_EQ(0, ret) {
1911                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1912        }
1913        self->sibling[0].diverge = 1;
1914        tsync_start_sibling(&self->sibling[0]);
1915        tsync_start_sibling(&self->sibling[1]);
1916
1917        while (self->sibling_count < TSYNC_SIBLINGS) {
1918                sem_wait(&self->started);
1919                self->sibling_count++;
1920        }
1921
1922        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1923                      &self->apply_prog);
1924        ASSERT_EQ(self->sibling[0].system_tid, ret) {
1925                TH_LOG("Did not fail on diverged sibling.");
1926        }
1927
1928        /* Wake the threads */
1929        pthread_mutex_lock(&self->mutex);
1930        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1931                TH_LOG("cond broadcast non-zero");
1932        }
1933        pthread_mutex_unlock(&self->mutex);
1934
1935        /* Ensure they are both unkilled. */
1936        pthread_join(self->sibling[0].tid, &status);
1937        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1938        pthread_join(self->sibling[1].tid, &status);
1939        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1940}
1941
1942TEST_F(TSYNC, two_siblings_not_under_filter)
1943{
1944        long ret, sib;
1945        void *status;
1946
1947        ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1948                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1949        }
1950
1951        /*
1952         * Sibling 0 will have its own seccomp policy
1953         * and Sibling 1 will not be under seccomp at
1954         * all. Sibling 1 will enter seccomp and 0
1955         * will cause failure.
1956         */
1957        self->sibling[0].diverge = 1;
1958        tsync_start_sibling(&self->sibling[0]);
1959        tsync_start_sibling(&self->sibling[1]);
1960
1961        while (self->sibling_count < TSYNC_SIBLINGS) {
1962                sem_wait(&self->started);
1963                self->sibling_count++;
1964        }
1965
1966        ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
1967        ASSERT_NE(ENOSYS, errno) {
1968                TH_LOG("Kernel does not support seccomp syscall!");
1969        }
1970        ASSERT_EQ(0, ret) {
1971                TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
1972        }
1973
1974        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
1975                      &self->apply_prog);
1976        ASSERT_EQ(ret, self->sibling[0].system_tid) {
1977                TH_LOG("Did not fail on diverged sibling.");
1978        }
1979        sib = 1;
1980        if (ret == self->sibling[0].system_tid)
1981                sib = 0;
1982
1983        pthread_mutex_lock(&self->mutex);
1984
1985        /* Increment the other siblings num_waits so we can clean up
1986         * the one we just saw.
1987         */
1988        self->sibling[!sib].num_waits += 1;
1989
1990        /* Signal the thread to clean up*/
1991        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1992                TH_LOG("cond broadcast non-zero");
1993        }
1994        pthread_mutex_unlock(&self->mutex);
1995        pthread_join(self->sibling[sib].tid, &status);
1996        EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
1997        /* Poll for actual task death. pthread_join doesn't guarantee it. */
1998        while (!kill(self->sibling[sib].system_tid, 0))
1999                sleep(0.1);
2000        /* Switch to the remaining sibling */
2001        sib = !sib;
2002
2003        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
2004                      &self->apply_prog);
2005        ASSERT_EQ(0, ret) {
2006                TH_LOG("Expected the remaining sibling to sync");
2007        };
2008
2009        pthread_mutex_lock(&self->mutex);
2010
2011        /* If remaining sibling didn't have a chance to wake up during
2012         * the first broadcast, manually reduce the num_waits now.
2013         */
2014        if (self->sibling[sib].num_waits > 1)
2015                self->sibling[sib].num_waits = 1;
2016        ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2017                TH_LOG("cond broadcast non-zero");
2018        }
2019        pthread_mutex_unlock(&self->mutex);
2020        pthread_join(self->sibling[sib].tid, &status);
2021        EXPECT_EQ(0, (long)status);
2022        /* Poll for actual task death. pthread_join doesn't guarantee it. */
2023        while (!kill(self->sibling[sib].system_tid, 0))
2024                sleep(0.1);
2025
2026        ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
2027                      &self->apply_prog);
2028        ASSERT_EQ(0, ret);  /* just us chickens */
2029}
2030
2031/* Make sure restarted syscalls are seen directly as "restart_syscall". */
2032TEST(syscall_restart)
2033{
2034        long ret;
2035        unsigned long msg;
2036        pid_t child_pid;
2037        int pipefd[2];
2038        int status;
2039        siginfo_t info = { };
2040        struct sock_filter filter[] = {
2041                BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2042                         offsetof(struct seccomp_data, nr)),
2043
2044#ifdef __NR_sigreturn
2045                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2046#endif
2047                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2048                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2049                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2050                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2051                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2052
2053                /* Allow __NR_write for easy logging. */
2054                BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2055                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2056                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2057                /* The nanosleep jump target. */
2058                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2059                /* The restart_syscall jump target. */
2060                BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2061        };
2062        struct sock_fprog prog = {
2063                .len = (unsigned short)ARRAY_SIZE(filter),
2064                .filter = filter,
2065        };
2066#if defined(__arm__)
2067        struct utsname utsbuf;
2068#endif
2069
2070        ASSERT_EQ(0, pipe(pipefd));
2071
2072        child_pid = fork();
2073        ASSERT_LE(0, child_pid);
2074        if (child_pid == 0) {
2075                /* Child uses EXPECT not ASSERT to deliver status correctly. */
2076                char buf = ' ';
2077                struct timespec timeout = { };
2078
2079                /* Attach parent as tracer and stop. */
2080                EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2081                EXPECT_EQ(0, raise(SIGSTOP));
2082
2083                EXPECT_EQ(0, close(pipefd[1]));
2084
2085                EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2086                        TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2087                }
2088
2089                ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2090                EXPECT_EQ(0, ret) {
2091                        TH_LOG("Failed to install filter!");
2092                }
2093
2094                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2095                        TH_LOG("Failed to read() sync from parent");
2096                }
2097                EXPECT_EQ('.', buf) {
2098                        TH_LOG("Failed to get sync data from read()");
2099                }
2100
2101                /* Start nanosleep to be interrupted. */
2102                timeout.tv_sec = 1;
2103                errno = 0;
2104                EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2105                        TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2106                }
2107
2108                /* Read final sync from parent. */
2109                EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2110                        TH_LOG("Failed final read() from parent");
2111                }
2112                EXPECT_EQ('!', buf) {
2113                        TH_LOG("Failed to get final data from read()");
2114                }
2115
2116                /* Directly report the status of our test harness results. */
2117                syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2118                                                     : EXIT_FAILURE);
2119        }
2120        EXPECT_EQ(0, close(pipefd[0]));
2121
2122        /* Attach to child, setup options, and release. */
2123        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2124        ASSERT_EQ(true, WIFSTOPPED(status));
2125        ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2126                            PTRACE_O_TRACESECCOMP));
2127        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2128        ASSERT_EQ(1, write(pipefd[1], ".", 1));
2129
2130        /* Wait for nanosleep() to start. */
2131        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2132        ASSERT_EQ(true, WIFSTOPPED(status));
2133        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2134        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2135        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2136        ASSERT_EQ(0x100, msg);
2137        EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2138
2139        /* Might as well check siginfo for sanity while we're here. */
2140        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2141        ASSERT_EQ(SIGTRAP, info.si_signo);
2142        ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2143        EXPECT_EQ(0, info.si_errno);
2144        EXPECT_EQ(getuid(), info.si_uid);
2145        /* Verify signal delivery came from child (seccomp-triggered). */
2146        EXPECT_EQ(child_pid, info.si_pid);
2147
2148        /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2149        ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2150        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2151        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2152        ASSERT_EQ(true, WIFSTOPPED(status));
2153        ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2154        /* Verify signal delivery came from parent now. */
2155        ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2156        EXPECT_EQ(getpid(), info.si_pid);
2157
2158        /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2159        ASSERT_EQ(0, kill(child_pid, SIGCONT));
2160        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2161        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2162        ASSERT_EQ(true, WIFSTOPPED(status));
2163        ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2164        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2165
2166        /* Wait for restart_syscall() to start. */
2167        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2168        ASSERT_EQ(true, WIFSTOPPED(status));
2169        ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2170        ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2171        ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2172
2173        ASSERT_EQ(0x200, msg);
2174        ret = get_syscall(_metadata, child_pid);
2175#if defined(__arm__)
2176        /*
2177         * FIXME:
2178         * - native ARM registers do NOT expose true syscall.
2179         * - compat ARM registers on ARM64 DO expose true syscall.
2180         */
2181        ASSERT_EQ(0, uname(&utsbuf));
2182        if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2183                EXPECT_EQ(__NR_nanosleep, ret);
2184        } else
2185#endif
2186        {
2187                EXPECT_EQ(__NR_restart_syscall, ret);
2188        }
2189
2190        /* Write again to end test. */
2191        ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2192        ASSERT_EQ(1, write(pipefd[1], "!", 1));
2193        EXPECT_EQ(0, close(pipefd[1]));
2194
2195        ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2196        if (WIFSIGNALED(status) || WEXITSTATUS(status))
2197                _metadata->passed = 0;
2198}
2199
2200/*
2201 * TODO:
2202 * - add microbenchmarks
2203 * - expand NNP testing
2204 * - better arch-specific TRACE and TRAP handlers.
2205 * - endianness checking when appropriate
2206 * - 64-bit arg prodding
2207 * - arch value testing (x86 modes especially)
2208 * - ...
2209 */
2210
2211TEST_HARNESS_MAIN
2212