linux/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (c) 2020 Cloudflare
   3/*
   4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
   5 * Covers:
   6 *  1. BPF map operations - bpf_map_{update,lookup delete}_elem
   7 *  2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
   8 *  3. BPF reuseport helper - bpf_sk_select_reuseport
   9 */
  10
  11#include <linux/compiler.h>
  12#include <errno.h>
  13#include <error.h>
  14#include <limits.h>
  15#include <netinet/in.h>
  16#include <pthread.h>
  17#include <stdlib.h>
  18#include <string.h>
  19#include <sys/select.h>
  20#include <unistd.h>
  21
  22#include <bpf/bpf.h>
  23#include <bpf/libbpf.h>
  24
  25#include "bpf_util.h"
  26#include "test_progs.h"
  27#include "test_sockmap_listen.skel.h"
  28
  29#define IO_TIMEOUT_SEC 30
  30#define MAX_STRERR_LEN 256
  31#define MAX_TEST_NAME 80
  32
  33#define _FAIL(errnum, fmt...)                                                  \
  34        ({                                                                     \
  35                error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
  36                CHECK_FAIL(true);                                              \
  37        })
  38#define FAIL(fmt...) _FAIL(0, fmt)
  39#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
  40#define FAIL_LIBBPF(err, msg)                                                  \
  41        ({                                                                     \
  42                char __buf[MAX_STRERR_LEN];                                    \
  43                libbpf_strerror((err), __buf, sizeof(__buf));                  \
  44                FAIL("%s: %s", (msg), __buf);                                  \
  45        })
  46
  47/* Wrappers that fail the test on error and report it. */
  48
  49#define xaccept_nonblock(fd, addr, len)                                        \
  50        ({                                                                     \
  51                int __ret =                                                    \
  52                        accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
  53                if (__ret == -1)                                               \
  54                        FAIL_ERRNO("accept");                                  \
  55                __ret;                                                         \
  56        })
  57
  58#define xbind(fd, addr, len)                                                   \
  59        ({                                                                     \
  60                int __ret = bind((fd), (addr), (len));                         \
  61                if (__ret == -1)                                               \
  62                        FAIL_ERRNO("bind");                                    \
  63                __ret;                                                         \
  64        })
  65
  66#define xclose(fd)                                                             \
  67        ({                                                                     \
  68                int __ret = close((fd));                                       \
  69                if (__ret == -1)                                               \
  70                        FAIL_ERRNO("close");                                   \
  71                __ret;                                                         \
  72        })
  73
  74#define xconnect(fd, addr, len)                                                \
  75        ({                                                                     \
  76                int __ret = connect((fd), (addr), (len));                      \
  77                if (__ret == -1)                                               \
  78                        FAIL_ERRNO("connect");                                 \
  79                __ret;                                                         \
  80        })
  81
  82#define xgetsockname(fd, addr, len)                                            \
  83        ({                                                                     \
  84                int __ret = getsockname((fd), (addr), (len));                  \
  85                if (__ret == -1)                                               \
  86                        FAIL_ERRNO("getsockname");                             \
  87                __ret;                                                         \
  88        })
  89
  90#define xgetsockopt(fd, level, name, val, len)                                 \
  91        ({                                                                     \
  92                int __ret = getsockopt((fd), (level), (name), (val), (len));   \
  93                if (__ret == -1)                                               \
  94                        FAIL_ERRNO("getsockopt(" #name ")");                   \
  95                __ret;                                                         \
  96        })
  97
  98#define xlisten(fd, backlog)                                                   \
  99        ({                                                                     \
 100                int __ret = listen((fd), (backlog));                           \
 101                if (__ret == -1)                                               \
 102                        FAIL_ERRNO("listen");                                  \
 103                __ret;                                                         \
 104        })
 105
 106#define xsetsockopt(fd, level, name, val, len)                                 \
 107        ({                                                                     \
 108                int __ret = setsockopt((fd), (level), (name), (val), (len));   \
 109                if (__ret == -1)                                               \
 110                        FAIL_ERRNO("setsockopt(" #name ")");                   \
 111                __ret;                                                         \
 112        })
 113
 114#define xsend(fd, buf, len, flags)                                             \
 115        ({                                                                     \
 116                ssize_t __ret = send((fd), (buf), (len), (flags));             \
 117                if (__ret == -1)                                               \
 118                        FAIL_ERRNO("send");                                    \
 119                __ret;                                                         \
 120        })
 121
 122#define xrecv_nonblock(fd, buf, len, flags)                                    \
 123        ({                                                                     \
 124                ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
 125                                             IO_TIMEOUT_SEC);                  \
 126                if (__ret == -1)                                               \
 127                        FAIL_ERRNO("recv");                                    \
 128                __ret;                                                         \
 129        })
 130
 131#define xsocket(family, sotype, flags)                                         \
 132        ({                                                                     \
 133                int __ret = socket(family, sotype, flags);                     \
 134                if (__ret == -1)                                               \
 135                        FAIL_ERRNO("socket");                                  \
 136                __ret;                                                         \
 137        })
 138
 139#define xbpf_map_delete_elem(fd, key)                                          \
 140        ({                                                                     \
 141                int __ret = bpf_map_delete_elem((fd), (key));                  \
 142                if (__ret < 0)                                               \
 143                        FAIL_ERRNO("map_delete");                              \
 144                __ret;                                                         \
 145        })
 146
 147#define xbpf_map_lookup_elem(fd, key, val)                                     \
 148        ({                                                                     \
 149                int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
 150                if (__ret < 0)                                               \
 151                        FAIL_ERRNO("map_lookup");                              \
 152                __ret;                                                         \
 153        })
 154
 155#define xbpf_map_update_elem(fd, key, val, flags)                              \
 156        ({                                                                     \
 157                int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
 158                if (__ret < 0)                                               \
 159                        FAIL_ERRNO("map_update");                              \
 160                __ret;                                                         \
 161        })
 162
 163#define xbpf_prog_attach(prog, target, type, flags)                            \
 164        ({                                                                     \
 165                int __ret =                                                    \
 166                        bpf_prog_attach((prog), (target), (type), (flags));    \
 167                if (__ret < 0)                                               \
 168                        FAIL_ERRNO("prog_attach(" #type ")");                  \
 169                __ret;                                                         \
 170        })
 171
 172#define xbpf_prog_detach2(prog, target, type)                                  \
 173        ({                                                                     \
 174                int __ret = bpf_prog_detach2((prog), (target), (type));        \
 175                if (__ret < 0)                                               \
 176                        FAIL_ERRNO("prog_detach2(" #type ")");                 \
 177                __ret;                                                         \
 178        })
 179
 180#define xpthread_create(thread, attr, func, arg)                               \
 181        ({                                                                     \
 182                int __ret = pthread_create((thread), (attr), (func), (arg));   \
 183                errno = __ret;                                                 \
 184                if (__ret)                                                     \
 185                        FAIL_ERRNO("pthread_create");                          \
 186                __ret;                                                         \
 187        })
 188
 189#define xpthread_join(thread, retval)                                          \
 190        ({                                                                     \
 191                int __ret = pthread_join((thread), (retval));                  \
 192                errno = __ret;                                                 \
 193                if (__ret)                                                     \
 194                        FAIL_ERRNO("pthread_join");                            \
 195                __ret;                                                         \
 196        })
 197
 198static int poll_read(int fd, unsigned int timeout_sec)
 199{
 200        struct timeval timeout = { .tv_sec = timeout_sec };
 201        fd_set rfds;
 202        int r;
 203
 204        FD_ZERO(&rfds);
 205        FD_SET(fd, &rfds);
 206
 207        r = select(fd + 1, &rfds, NULL, NULL, &timeout);
 208        if (r == 0)
 209                errno = ETIME;
 210
 211        return r == 1 ? 0 : -1;
 212}
 213
 214static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
 215                          unsigned int timeout_sec)
 216{
 217        if (poll_read(fd, timeout_sec))
 218                return -1;
 219
 220        return accept(fd, addr, len);
 221}
 222
 223static int recv_timeout(int fd, void *buf, size_t len, int flags,
 224                        unsigned int timeout_sec)
 225{
 226        if (poll_read(fd, timeout_sec))
 227                return -1;
 228
 229        return recv(fd, buf, len, flags);
 230}
 231
 232static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
 233{
 234        struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
 235
 236        addr4->sin_family = AF_INET;
 237        addr4->sin_port = 0;
 238        addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 239        *len = sizeof(*addr4);
 240}
 241
 242static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
 243{
 244        struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
 245
 246        addr6->sin6_family = AF_INET6;
 247        addr6->sin6_port = 0;
 248        addr6->sin6_addr = in6addr_loopback;
 249        *len = sizeof(*addr6);
 250}
 251
 252static void init_addr_loopback(int family, struct sockaddr_storage *ss,
 253                               socklen_t *len)
 254{
 255        switch (family) {
 256        case AF_INET:
 257                init_addr_loopback4(ss, len);
 258                return;
 259        case AF_INET6:
 260                init_addr_loopback6(ss, len);
 261                return;
 262        default:
 263                FAIL("unsupported address family %d", family);
 264        }
 265}
 266
 267static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
 268{
 269        return (struct sockaddr *)ss;
 270}
 271
 272static int enable_reuseport(int s, int progfd)
 273{
 274        int err, one = 1;
 275
 276        err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
 277        if (err)
 278                return -1;
 279        err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
 280                          sizeof(progfd));
 281        if (err)
 282                return -1;
 283
 284        return 0;
 285}
 286
 287static int socket_loopback_reuseport(int family, int sotype, int progfd)
 288{
 289        struct sockaddr_storage addr;
 290        socklen_t len;
 291        int err, s;
 292
 293        init_addr_loopback(family, &addr, &len);
 294
 295        s = xsocket(family, sotype, 0);
 296        if (s == -1)
 297                return -1;
 298
 299        if (progfd >= 0)
 300                enable_reuseport(s, progfd);
 301
 302        err = xbind(s, sockaddr(&addr), len);
 303        if (err)
 304                goto close;
 305
 306        if (sotype & SOCK_DGRAM)
 307                return s;
 308
 309        err = xlisten(s, SOMAXCONN);
 310        if (err)
 311                goto close;
 312
 313        return s;
 314close:
 315        xclose(s);
 316        return -1;
 317}
 318
 319static int socket_loopback(int family, int sotype)
 320{
 321        return socket_loopback_reuseport(family, sotype, -1);
 322}
 323
 324static void test_insert_invalid(int family, int sotype, int mapfd)
 325{
 326        u32 key = 0;
 327        u64 value;
 328        int err;
 329
 330        value = -1;
 331        err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 332        if (!err || errno != EINVAL)
 333                FAIL_ERRNO("map_update: expected EINVAL");
 334
 335        value = INT_MAX;
 336        err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 337        if (!err || errno != EBADF)
 338                FAIL_ERRNO("map_update: expected EBADF");
 339}
 340
 341static void test_insert_opened(int family, int sotype, int mapfd)
 342{
 343        u32 key = 0;
 344        u64 value;
 345        int err, s;
 346
 347        s = xsocket(family, sotype, 0);
 348        if (s == -1)
 349                return;
 350
 351        errno = 0;
 352        value = s;
 353        err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 354        if (sotype == SOCK_STREAM) {
 355                if (!err || errno != EOPNOTSUPP)
 356                        FAIL_ERRNO("map_update: expected EOPNOTSUPP");
 357        } else if (err)
 358                FAIL_ERRNO("map_update: expected success");
 359        xclose(s);
 360}
 361
 362static void test_insert_bound(int family, int sotype, int mapfd)
 363{
 364        struct sockaddr_storage addr;
 365        socklen_t len;
 366        u32 key = 0;
 367        u64 value;
 368        int err, s;
 369
 370        init_addr_loopback(family, &addr, &len);
 371
 372        s = xsocket(family, sotype, 0);
 373        if (s == -1)
 374                return;
 375
 376        err = xbind(s, sockaddr(&addr), len);
 377        if (err)
 378                goto close;
 379
 380        errno = 0;
 381        value = s;
 382        err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 383        if (!err || errno != EOPNOTSUPP)
 384                FAIL_ERRNO("map_update: expected EOPNOTSUPP");
 385close:
 386        xclose(s);
 387}
 388
 389static void test_insert(int family, int sotype, int mapfd)
 390{
 391        u64 value;
 392        u32 key;
 393        int s;
 394
 395        s = socket_loopback(family, sotype);
 396        if (s < 0)
 397                return;
 398
 399        key = 0;
 400        value = s;
 401        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 402        xclose(s);
 403}
 404
 405static void test_delete_after_insert(int family, int sotype, int mapfd)
 406{
 407        u64 value;
 408        u32 key;
 409        int s;
 410
 411        s = socket_loopback(family, sotype);
 412        if (s < 0)
 413                return;
 414
 415        key = 0;
 416        value = s;
 417        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 418        xbpf_map_delete_elem(mapfd, &key);
 419        xclose(s);
 420}
 421
 422static void test_delete_after_close(int family, int sotype, int mapfd)
 423{
 424        int err, s;
 425        u64 value;
 426        u32 key;
 427
 428        s = socket_loopback(family, sotype);
 429        if (s < 0)
 430                return;
 431
 432        key = 0;
 433        value = s;
 434        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 435
 436        xclose(s);
 437
 438        errno = 0;
 439        err = bpf_map_delete_elem(mapfd, &key);
 440        if (!err || (errno != EINVAL && errno != ENOENT))
 441                /* SOCKMAP and SOCKHASH return different error codes */
 442                FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
 443}
 444
 445static void test_lookup_after_insert(int family, int sotype, int mapfd)
 446{
 447        u64 cookie, value;
 448        socklen_t len;
 449        u32 key;
 450        int s;
 451
 452        s = socket_loopback(family, sotype);
 453        if (s < 0)
 454                return;
 455
 456        key = 0;
 457        value = s;
 458        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 459
 460        len = sizeof(cookie);
 461        xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
 462
 463        xbpf_map_lookup_elem(mapfd, &key, &value);
 464
 465        if (value != cookie) {
 466                FAIL("map_lookup: have %#llx, want %#llx",
 467                     (unsigned long long)value, (unsigned long long)cookie);
 468        }
 469
 470        xclose(s);
 471}
 472
 473static void test_lookup_after_delete(int family, int sotype, int mapfd)
 474{
 475        int err, s;
 476        u64 value;
 477        u32 key;
 478
 479        s = socket_loopback(family, sotype);
 480        if (s < 0)
 481                return;
 482
 483        key = 0;
 484        value = s;
 485        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 486        xbpf_map_delete_elem(mapfd, &key);
 487
 488        errno = 0;
 489        err = bpf_map_lookup_elem(mapfd, &key, &value);
 490        if (!err || errno != ENOENT)
 491                FAIL_ERRNO("map_lookup: expected ENOENT");
 492
 493        xclose(s);
 494}
 495
 496static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
 497{
 498        u32 key, value32;
 499        int err, s;
 500
 501        s = socket_loopback(family, sotype);
 502        if (s < 0)
 503                return;
 504
 505        mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
 506                               sizeof(value32), 1, 0);
 507        if (mapfd < 0) {
 508                FAIL_ERRNO("map_create");
 509                goto close;
 510        }
 511
 512        key = 0;
 513        value32 = s;
 514        xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
 515
 516        errno = 0;
 517        err = bpf_map_lookup_elem(mapfd, &key, &value32);
 518        if (!err || errno != ENOSPC)
 519                FAIL_ERRNO("map_lookup: expected ENOSPC");
 520
 521        xclose(mapfd);
 522close:
 523        xclose(s);
 524}
 525
 526static void test_update_existing(int family, int sotype, int mapfd)
 527{
 528        int s1, s2;
 529        u64 value;
 530        u32 key;
 531
 532        s1 = socket_loopback(family, sotype);
 533        if (s1 < 0)
 534                return;
 535
 536        s2 = socket_loopback(family, sotype);
 537        if (s2 < 0)
 538                goto close_s1;
 539
 540        key = 0;
 541        value = s1;
 542        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 543
 544        value = s2;
 545        xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
 546        xclose(s2);
 547close_s1:
 548        xclose(s1);
 549}
 550
 551/* Exercise the code path where we destroy child sockets that never
 552 * got accept()'ed, aka orphans, when parent socket gets closed.
 553 */
 554static void test_destroy_orphan_child(int family, int sotype, int mapfd)
 555{
 556        struct sockaddr_storage addr;
 557        socklen_t len;
 558        int err, s, c;
 559        u64 value;
 560        u32 key;
 561
 562        s = socket_loopback(family, sotype);
 563        if (s < 0)
 564                return;
 565
 566        len = sizeof(addr);
 567        err = xgetsockname(s, sockaddr(&addr), &len);
 568        if (err)
 569                goto close_srv;
 570
 571        key = 0;
 572        value = s;
 573        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 574
 575        c = xsocket(family, sotype, 0);
 576        if (c == -1)
 577                goto close_srv;
 578
 579        xconnect(c, sockaddr(&addr), len);
 580        xclose(c);
 581close_srv:
 582        xclose(s);
 583}
 584
 585/* Perform a passive open after removing listening socket from SOCKMAP
 586 * to ensure that callbacks get restored properly.
 587 */
 588static void test_clone_after_delete(int family, int sotype, int mapfd)
 589{
 590        struct sockaddr_storage addr;
 591        socklen_t len;
 592        int err, s, c;
 593        u64 value;
 594        u32 key;
 595
 596        s = socket_loopback(family, sotype);
 597        if (s < 0)
 598                return;
 599
 600        len = sizeof(addr);
 601        err = xgetsockname(s, sockaddr(&addr), &len);
 602        if (err)
 603                goto close_srv;
 604
 605        key = 0;
 606        value = s;
 607        xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
 608        xbpf_map_delete_elem(mapfd, &key);
 609
 610        c = xsocket(family, sotype, 0);
 611        if (c < 0)
 612                goto close_srv;
 613
 614        xconnect(c, sockaddr(&addr), len);
 615        xclose(c);
 616close_srv:
 617        xclose(s);
 618}
 619
 620/* Check that child socket that got created while parent was in a
 621 * SOCKMAP, but got accept()'ed only after the parent has been removed
 622 * from SOCKMAP, gets cloned without parent psock state or callbacks.
 623 */
 624static void test_accept_after_delete(int family, int sotype, int mapfd)
 625{
 626        struct sockaddr_storage addr;
 627        const u32 zero = 0;
 628        int err, s, c, p;
 629        socklen_t len;
 630        u64 value;
 631
 632        s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 633        if (s == -1)
 634                return;
 635
 636        len = sizeof(addr);
 637        err = xgetsockname(s, sockaddr(&addr), &len);
 638        if (err)
 639                goto close_srv;
 640
 641        value = s;
 642        err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
 643        if (err)
 644                goto close_srv;
 645
 646        c = xsocket(family, sotype, 0);
 647        if (c == -1)
 648                goto close_srv;
 649
 650        /* Create child while parent is in sockmap */
 651        err = xconnect(c, sockaddr(&addr), len);
 652        if (err)
 653                goto close_cli;
 654
 655        /* Remove parent from sockmap */
 656        err = xbpf_map_delete_elem(mapfd, &zero);
 657        if (err)
 658                goto close_cli;
 659
 660        p = xaccept_nonblock(s, NULL, NULL);
 661        if (p == -1)
 662                goto close_cli;
 663
 664        /* Check that child sk_user_data is not set */
 665        value = p;
 666        xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
 667
 668        xclose(p);
 669close_cli:
 670        xclose(c);
 671close_srv:
 672        xclose(s);
 673}
 674
 675/* Check that child socket that got created and accepted while parent
 676 * was in a SOCKMAP is cloned without parent psock state or callbacks.
 677 */
 678static void test_accept_before_delete(int family, int sotype, int mapfd)
 679{
 680        struct sockaddr_storage addr;
 681        const u32 zero = 0, one = 1;
 682        int err, s, c, p;
 683        socklen_t len;
 684        u64 value;
 685
 686        s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 687        if (s == -1)
 688                return;
 689
 690        len = sizeof(addr);
 691        err = xgetsockname(s, sockaddr(&addr), &len);
 692        if (err)
 693                goto close_srv;
 694
 695        value = s;
 696        err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
 697        if (err)
 698                goto close_srv;
 699
 700        c = xsocket(family, sotype, 0);
 701        if (c == -1)
 702                goto close_srv;
 703
 704        /* Create & accept child while parent is in sockmap */
 705        err = xconnect(c, sockaddr(&addr), len);
 706        if (err)
 707                goto close_cli;
 708
 709        p = xaccept_nonblock(s, NULL, NULL);
 710        if (p == -1)
 711                goto close_cli;
 712
 713        /* Check that child sk_user_data is not set */
 714        value = p;
 715        xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
 716
 717        xclose(p);
 718close_cli:
 719        xclose(c);
 720close_srv:
 721        xclose(s);
 722}
 723
 724struct connect_accept_ctx {
 725        int sockfd;
 726        unsigned int done;
 727        unsigned int nr_iter;
 728};
 729
 730static bool is_thread_done(struct connect_accept_ctx *ctx)
 731{
 732        return READ_ONCE(ctx->done);
 733}
 734
 735static void *connect_accept_thread(void *arg)
 736{
 737        struct connect_accept_ctx *ctx = arg;
 738        struct sockaddr_storage addr;
 739        int family, socktype;
 740        socklen_t len;
 741        int err, i, s;
 742
 743        s = ctx->sockfd;
 744
 745        len = sizeof(addr);
 746        err = xgetsockname(s, sockaddr(&addr), &len);
 747        if (err)
 748                goto done;
 749
 750        len = sizeof(family);
 751        err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
 752        if (err)
 753                goto done;
 754
 755        len = sizeof(socktype);
 756        err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
 757        if (err)
 758                goto done;
 759
 760        for (i = 0; i < ctx->nr_iter; i++) {
 761                int c, p;
 762
 763                c = xsocket(family, socktype, 0);
 764                if (c < 0)
 765                        break;
 766
 767                err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
 768                if (err) {
 769                        xclose(c);
 770                        break;
 771                }
 772
 773                p = xaccept_nonblock(s, NULL, NULL);
 774                if (p < 0) {
 775                        xclose(c);
 776                        break;
 777                }
 778
 779                xclose(p);
 780                xclose(c);
 781        }
 782done:
 783        WRITE_ONCE(ctx->done, 1);
 784        return NULL;
 785}
 786
 787static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
 788{
 789        struct connect_accept_ctx ctx = { 0 };
 790        struct sockaddr_storage addr;
 791        socklen_t len;
 792        u32 zero = 0;
 793        pthread_t t;
 794        int err, s;
 795        u64 value;
 796
 797        s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 798        if (s < 0)
 799                return;
 800
 801        len = sizeof(addr);
 802        err = xgetsockname(s, sockaddr(&addr), &len);
 803        if (err)
 804                goto close;
 805
 806        ctx.sockfd = s;
 807        ctx.nr_iter = 1000;
 808
 809        err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
 810        if (err)
 811                goto close;
 812
 813        value = s;
 814        while (!is_thread_done(&ctx)) {
 815                err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
 816                if (err)
 817                        break;
 818
 819                err = xbpf_map_delete_elem(mapfd, &zero);
 820                if (err)
 821                        break;
 822        }
 823
 824        xpthread_join(t, NULL);
 825close:
 826        xclose(s);
 827}
 828
 829static void *listen_thread(void *arg)
 830{
 831        struct sockaddr unspec = { AF_UNSPEC };
 832        struct connect_accept_ctx *ctx = arg;
 833        int err, i, s;
 834
 835        s = ctx->sockfd;
 836
 837        for (i = 0; i < ctx->nr_iter; i++) {
 838                err = xlisten(s, 1);
 839                if (err)
 840                        break;
 841                err = xconnect(s, &unspec, sizeof(unspec));
 842                if (err)
 843                        break;
 844        }
 845
 846        WRITE_ONCE(ctx->done, 1);
 847        return NULL;
 848}
 849
 850static void test_race_insert_listen(int family, int socktype, int mapfd)
 851{
 852        struct connect_accept_ctx ctx = { 0 };
 853        const u32 zero = 0;
 854        const int one = 1;
 855        pthread_t t;
 856        int err, s;
 857        u64 value;
 858
 859        s = xsocket(family, socktype, 0);
 860        if (s < 0)
 861                return;
 862
 863        err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
 864        if (err)
 865                goto close;
 866
 867        ctx.sockfd = s;
 868        ctx.nr_iter = 10000;
 869
 870        err = pthread_create(&t, NULL, listen_thread, &ctx);
 871        if (err)
 872                goto close;
 873
 874        value = s;
 875        while (!is_thread_done(&ctx)) {
 876                err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
 877                /* Expecting EOPNOTSUPP before listen() */
 878                if (err && errno != EOPNOTSUPP) {
 879                        FAIL_ERRNO("map_update");
 880                        break;
 881                }
 882
 883                err = bpf_map_delete_elem(mapfd, &zero);
 884                /* Expecting no entry after unhash on connect(AF_UNSPEC) */
 885                if (err && errno != EINVAL && errno != ENOENT) {
 886                        FAIL_ERRNO("map_delete");
 887                        break;
 888                }
 889        }
 890
 891        xpthread_join(t, NULL);
 892close:
 893        xclose(s);
 894}
 895
 896static void zero_verdict_count(int mapfd)
 897{
 898        unsigned int zero = 0;
 899        int key;
 900
 901        key = SK_DROP;
 902        xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
 903        key = SK_PASS;
 904        xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
 905}
 906
 907enum redir_mode {
 908        REDIR_INGRESS,
 909        REDIR_EGRESS,
 910};
 911
 912static const char *redir_mode_str(enum redir_mode mode)
 913{
 914        switch (mode) {
 915        case REDIR_INGRESS:
 916                return "ingress";
 917        case REDIR_EGRESS:
 918                return "egress";
 919        default:
 920                return "unknown";
 921        }
 922}
 923
 924static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
 925{
 926        u64 value;
 927        u32 key;
 928        int err;
 929
 930        key = 0;
 931        value = fd1;
 932        err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
 933        if (err)
 934                return err;
 935
 936        key = 1;
 937        value = fd2;
 938        return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
 939}
 940
 941static void redir_to_connected(int family, int sotype, int sock_mapfd,
 942                               int verd_mapfd, enum redir_mode mode)
 943{
 944        const char *log_prefix = redir_mode_str(mode);
 945        struct sockaddr_storage addr;
 946        int s, c0, c1, p0, p1;
 947        unsigned int pass;
 948        socklen_t len;
 949        int err, n;
 950        u32 key;
 951        char b;
 952
 953        zero_verdict_count(verd_mapfd);
 954
 955        s = socket_loopback(family, sotype | SOCK_NONBLOCK);
 956        if (s < 0)
 957                return;
 958
 959        len = sizeof(addr);
 960        err = xgetsockname(s, sockaddr(&addr), &len);
 961        if (err)
 962                goto close_srv;
 963
 964        c0 = xsocket(family, sotype, 0);
 965        if (c0 < 0)
 966                goto close_srv;
 967        err = xconnect(c0, sockaddr(&addr), len);
 968        if (err)
 969                goto close_cli0;
 970
 971        p0 = xaccept_nonblock(s, NULL, NULL);
 972        if (p0 < 0)
 973                goto close_cli0;
 974
 975        c1 = xsocket(family, sotype, 0);
 976        if (c1 < 0)
 977                goto close_peer0;
 978        err = xconnect(c1, sockaddr(&addr), len);
 979        if (err)
 980                goto close_cli1;
 981
 982        p1 = xaccept_nonblock(s, NULL, NULL);
 983        if (p1 < 0)
 984                goto close_cli1;
 985
 986        err = add_to_sockmap(sock_mapfd, p0, p1);
 987        if (err)
 988                goto close_peer1;
 989
 990        n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
 991        if (n < 0)
 992                FAIL_ERRNO("%s: write", log_prefix);
 993        if (n == 0)
 994                FAIL("%s: incomplete write", log_prefix);
 995        if (n < 1)
 996                goto close_peer1;
 997
 998        key = SK_PASS;
 999        err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1000        if (err)
1001                goto close_peer1;
1002        if (pass != 1)
1003                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1004        n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
1005        if (n < 0)
1006                FAIL_ERRNO("%s: recv_timeout", log_prefix);
1007        if (n == 0)
1008                FAIL("%s: incomplete recv", log_prefix);
1009
1010close_peer1:
1011        xclose(p1);
1012close_cli1:
1013        xclose(c1);
1014close_peer0:
1015        xclose(p0);
1016close_cli0:
1017        xclose(c0);
1018close_srv:
1019        xclose(s);
1020}
1021
1022static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
1023                                        struct bpf_map *inner_map, int family,
1024                                        int sotype)
1025{
1026        int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1027        int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1028        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1029        int sock_map = bpf_map__fd(inner_map);
1030        int err;
1031
1032        err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1033        if (err)
1034                return;
1035        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1036        if (err)
1037                goto detach;
1038
1039        redir_to_connected(family, sotype, sock_map, verdict_map,
1040                           REDIR_INGRESS);
1041
1042        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1043detach:
1044        xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1045}
1046
1047static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
1048                                        struct bpf_map *inner_map, int family,
1049                                        int sotype)
1050{
1051        int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1052        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1053        int sock_map = bpf_map__fd(inner_map);
1054        int err;
1055
1056        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1057        if (err)
1058                return;
1059
1060        redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1061
1062        xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1063}
1064
1065static void redir_to_listening(int family, int sotype, int sock_mapfd,
1066                               int verd_mapfd, enum redir_mode mode)
1067{
1068        const char *log_prefix = redir_mode_str(mode);
1069        struct sockaddr_storage addr;
1070        int s, c, p, err, n;
1071        unsigned int drop;
1072        socklen_t len;
1073        u32 key;
1074
1075        zero_verdict_count(verd_mapfd);
1076
1077        s = socket_loopback(family, sotype | SOCK_NONBLOCK);
1078        if (s < 0)
1079                return;
1080
1081        len = sizeof(addr);
1082        err = xgetsockname(s, sockaddr(&addr), &len);
1083        if (err)
1084                goto close_srv;
1085
1086        c = xsocket(family, sotype, 0);
1087        if (c < 0)
1088                goto close_srv;
1089        err = xconnect(c, sockaddr(&addr), len);
1090        if (err)
1091                goto close_cli;
1092
1093        p = xaccept_nonblock(s, NULL, NULL);
1094        if (p < 0)
1095                goto close_cli;
1096
1097        err = add_to_sockmap(sock_mapfd, s, p);
1098        if (err)
1099                goto close_peer;
1100
1101        n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
1102        if (n < 0 && errno != EACCES)
1103                FAIL_ERRNO("%s: write", log_prefix);
1104        if (n == 0)
1105                FAIL("%s: incomplete write", log_prefix);
1106        if (n < 1)
1107                goto close_peer;
1108
1109        key = SK_DROP;
1110        err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
1111        if (err)
1112                goto close_peer;
1113        if (drop != 1)
1114                FAIL("%s: want drop count 1, have %d", log_prefix, drop);
1115
1116close_peer:
1117        xclose(p);
1118close_cli:
1119        xclose(c);
1120close_srv:
1121        xclose(s);
1122}
1123
1124static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
1125                                        struct bpf_map *inner_map, int family,
1126                                        int sotype)
1127{
1128        int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1129        int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1130        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1131        int sock_map = bpf_map__fd(inner_map);
1132        int err;
1133
1134        err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1135        if (err)
1136                return;
1137        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1138        if (err)
1139                goto detach;
1140
1141        redir_to_listening(family, sotype, sock_map, verdict_map,
1142                           REDIR_INGRESS);
1143
1144        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1145detach:
1146        xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1147}
1148
1149static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
1150                                        struct bpf_map *inner_map, int family,
1151                                        int sotype)
1152{
1153        int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1154        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1155        int sock_map = bpf_map__fd(inner_map);
1156        int err;
1157
1158        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1159        if (err)
1160                return;
1161
1162        redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1163
1164        xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1165}
1166
1167static void test_reuseport_select_listening(int family, int sotype,
1168                                            int sock_map, int verd_map,
1169                                            int reuseport_prog)
1170{
1171        struct sockaddr_storage addr;
1172        unsigned int pass;
1173        int s, c, err;
1174        socklen_t len;
1175        u64 value;
1176        u32 key;
1177
1178        zero_verdict_count(verd_map);
1179
1180        s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
1181                                      reuseport_prog);
1182        if (s < 0)
1183                return;
1184
1185        len = sizeof(addr);
1186        err = xgetsockname(s, sockaddr(&addr), &len);
1187        if (err)
1188                goto close_srv;
1189
1190        key = 0;
1191        value = s;
1192        err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1193        if (err)
1194                goto close_srv;
1195
1196        c = xsocket(family, sotype, 0);
1197        if (c < 0)
1198                goto close_srv;
1199        err = xconnect(c, sockaddr(&addr), len);
1200        if (err)
1201                goto close_cli;
1202
1203        if (sotype == SOCK_STREAM) {
1204                int p;
1205
1206                p = xaccept_nonblock(s, NULL, NULL);
1207                if (p < 0)
1208                        goto close_cli;
1209                xclose(p);
1210        } else {
1211                char b = 'a';
1212                ssize_t n;
1213
1214                n = xsend(c, &b, sizeof(b), 0);
1215                if (n == -1)
1216                        goto close_cli;
1217
1218                n = xrecv_nonblock(s, &b, sizeof(b), 0);
1219                if (n == -1)
1220                        goto close_cli;
1221        }
1222
1223        key = SK_PASS;
1224        err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1225        if (err)
1226                goto close_cli;
1227        if (pass != 1)
1228                FAIL("want pass count 1, have %d", pass);
1229
1230close_cli:
1231        xclose(c);
1232close_srv:
1233        xclose(s);
1234}
1235
1236static void test_reuseport_select_connected(int family, int sotype,
1237                                            int sock_map, int verd_map,
1238                                            int reuseport_prog)
1239{
1240        struct sockaddr_storage addr;
1241        int s, c0, c1, p0, err;
1242        unsigned int drop;
1243        socklen_t len;
1244        u64 value;
1245        u32 key;
1246
1247        zero_verdict_count(verd_map);
1248
1249        s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1250        if (s < 0)
1251                return;
1252
1253        /* Populate sock_map[0] to avoid ENOENT on first connection */
1254        key = 0;
1255        value = s;
1256        err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1257        if (err)
1258                goto close_srv;
1259
1260        len = sizeof(addr);
1261        err = xgetsockname(s, sockaddr(&addr), &len);
1262        if (err)
1263                goto close_srv;
1264
1265        c0 = xsocket(family, sotype, 0);
1266        if (c0 < 0)
1267                goto close_srv;
1268
1269        err = xconnect(c0, sockaddr(&addr), len);
1270        if (err)
1271                goto close_cli0;
1272
1273        if (sotype == SOCK_STREAM) {
1274                p0 = xaccept_nonblock(s, NULL, NULL);
1275                if (p0 < 0)
1276                        goto close_cli0;
1277        } else {
1278                p0 = xsocket(family, sotype, 0);
1279                if (p0 < 0)
1280                        goto close_cli0;
1281
1282                len = sizeof(addr);
1283                err = xgetsockname(c0, sockaddr(&addr), &len);
1284                if (err)
1285                        goto close_cli0;
1286
1287                err = xconnect(p0, sockaddr(&addr), len);
1288                if (err)
1289                        goto close_cli0;
1290        }
1291
1292        /* Update sock_map[0] to redirect to a connected socket */
1293        key = 0;
1294        value = p0;
1295        err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1296        if (err)
1297                goto close_peer0;
1298
1299        c1 = xsocket(family, sotype, 0);
1300        if (c1 < 0)
1301                goto close_peer0;
1302
1303        len = sizeof(addr);
1304        err = xgetsockname(s, sockaddr(&addr), &len);
1305        if (err)
1306                goto close_srv;
1307
1308        errno = 0;
1309        err = connect(c1, sockaddr(&addr), len);
1310        if (sotype == SOCK_DGRAM) {
1311                char b = 'a';
1312                ssize_t n;
1313
1314                n = xsend(c1, &b, sizeof(b), 0);
1315                if (n == -1)
1316                        goto close_cli1;
1317
1318                n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1319                err = n == -1;
1320        }
1321        if (!err || errno != ECONNREFUSED)
1322                FAIL_ERRNO("connect: expected ECONNREFUSED");
1323
1324        key = SK_DROP;
1325        err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1326        if (err)
1327                goto close_cli1;
1328        if (drop != 1)
1329                FAIL("want drop count 1, have %d", drop);
1330
1331close_cli1:
1332        xclose(c1);
1333close_peer0:
1334        xclose(p0);
1335close_cli0:
1336        xclose(c0);
1337close_srv:
1338        xclose(s);
1339}
1340
1341/* Check that redirecting across reuseport groups is not allowed. */
1342static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1343                                        int verd_map, int reuseport_prog)
1344{
1345        struct sockaddr_storage addr;
1346        int s1, s2, c, err;
1347        unsigned int drop;
1348        socklen_t len;
1349        u32 key;
1350
1351        zero_verdict_count(verd_map);
1352
1353        /* Create two listeners, each in its own reuseport group */
1354        s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1355        if (s1 < 0)
1356                return;
1357
1358        s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1359        if (s2 < 0)
1360                goto close_srv1;
1361
1362        err = add_to_sockmap(sock_map, s1, s2);
1363        if (err)
1364                goto close_srv2;
1365
1366        /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1367        len = sizeof(addr);
1368        err = xgetsockname(s2, sockaddr(&addr), &len);
1369        if (err)
1370                goto close_srv2;
1371
1372        c = xsocket(family, sotype, 0);
1373        if (c < 0)
1374                goto close_srv2;
1375
1376        err = connect(c, sockaddr(&addr), len);
1377        if (sotype == SOCK_DGRAM) {
1378                char b = 'a';
1379                ssize_t n;
1380
1381                n = xsend(c, &b, sizeof(b), 0);
1382                if (n == -1)
1383                        goto close_cli;
1384
1385                n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1386                err = n == -1;
1387        }
1388        if (!err || errno != ECONNREFUSED) {
1389                FAIL_ERRNO("connect: expected ECONNREFUSED");
1390                goto close_cli;
1391        }
1392
1393        /* Expect drop, can't redirect outside of reuseport group */
1394        key = SK_DROP;
1395        err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1396        if (err)
1397                goto close_cli;
1398        if (drop != 1)
1399                FAIL("want drop count 1, have %d", drop);
1400
1401close_cli:
1402        xclose(c);
1403close_srv2:
1404        xclose(s2);
1405close_srv1:
1406        xclose(s1);
1407}
1408
1409#define TEST(fn, ...)                                                          \
1410        {                                                                      \
1411                fn, #fn, __VA_ARGS__                                           \
1412        }
1413
1414static void test_ops_cleanup(const struct bpf_map *map)
1415{
1416        const struct bpf_map_def *def;
1417        int err, mapfd;
1418        u32 key;
1419
1420        def = bpf_map__def(map);
1421        mapfd = bpf_map__fd(map);
1422
1423        for (key = 0; key < def->max_entries; key++) {
1424                err = bpf_map_delete_elem(mapfd, &key);
1425                if (err && errno != EINVAL && errno != ENOENT)
1426                        FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1427        }
1428}
1429
1430static const char *family_str(sa_family_t family)
1431{
1432        switch (family) {
1433        case AF_INET:
1434                return "IPv4";
1435        case AF_INET6:
1436                return "IPv6";
1437        case AF_UNIX:
1438                return "Unix";
1439        default:
1440                return "unknown";
1441        }
1442}
1443
1444static const char *map_type_str(const struct bpf_map *map)
1445{
1446        const struct bpf_map_def *def;
1447
1448        def = bpf_map__def(map);
1449        if (IS_ERR(def))
1450                return "invalid";
1451
1452        switch (def->type) {
1453        case BPF_MAP_TYPE_SOCKMAP:
1454                return "sockmap";
1455        case BPF_MAP_TYPE_SOCKHASH:
1456                return "sockhash";
1457        default:
1458                return "unknown";
1459        }
1460}
1461
1462static const char *sotype_str(int sotype)
1463{
1464        switch (sotype) {
1465        case SOCK_DGRAM:
1466                return "UDP";
1467        case SOCK_STREAM:
1468                return "TCP";
1469        default:
1470                return "unknown";
1471        }
1472}
1473
1474static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1475                     int family, int sotype)
1476{
1477        const struct op_test {
1478                void (*fn)(int family, int sotype, int mapfd);
1479                const char *name;
1480                int sotype;
1481        } tests[] = {
1482                /* insert */
1483                TEST(test_insert_invalid),
1484                TEST(test_insert_opened),
1485                TEST(test_insert_bound, SOCK_STREAM),
1486                TEST(test_insert),
1487                /* delete */
1488                TEST(test_delete_after_insert),
1489                TEST(test_delete_after_close),
1490                /* lookup */
1491                TEST(test_lookup_after_insert),
1492                TEST(test_lookup_after_delete),
1493                TEST(test_lookup_32_bit_value),
1494                /* update */
1495                TEST(test_update_existing),
1496                /* races with insert/delete */
1497                TEST(test_destroy_orphan_child, SOCK_STREAM),
1498                TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1499                TEST(test_race_insert_listen, SOCK_STREAM),
1500                /* child clone */
1501                TEST(test_clone_after_delete, SOCK_STREAM),
1502                TEST(test_accept_after_delete, SOCK_STREAM),
1503                TEST(test_accept_before_delete, SOCK_STREAM),
1504        };
1505        const char *family_name, *map_name, *sotype_name;
1506        const struct op_test *t;
1507        char s[MAX_TEST_NAME];
1508        int map_fd;
1509
1510        family_name = family_str(family);
1511        map_name = map_type_str(map);
1512        sotype_name = sotype_str(sotype);
1513        map_fd = bpf_map__fd(map);
1514
1515        for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1516                snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1517                         sotype_name, t->name);
1518
1519                if (t->sotype != 0 && t->sotype != sotype)
1520                        continue;
1521
1522                if (!test__start_subtest(s))
1523                        continue;
1524
1525                t->fn(family, sotype, map_fd);
1526                test_ops_cleanup(map);
1527        }
1528}
1529
1530static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1531                       int family, int sotype)
1532{
1533        const struct redir_test {
1534                void (*fn)(struct test_sockmap_listen *skel,
1535                           struct bpf_map *map, int family, int sotype);
1536                const char *name;
1537        } tests[] = {
1538                TEST(test_skb_redir_to_connected),
1539                TEST(test_skb_redir_to_listening),
1540                TEST(test_msg_redir_to_connected),
1541                TEST(test_msg_redir_to_listening),
1542        };
1543        const char *family_name, *map_name;
1544        const struct redir_test *t;
1545        char s[MAX_TEST_NAME];
1546
1547        family_name = family_str(family);
1548        map_name = map_type_str(map);
1549
1550        for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1551                snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1552                         t->name);
1553
1554                if (!test__start_subtest(s))
1555                        continue;
1556
1557                t->fn(skel, map, family, sotype);
1558        }
1559}
1560
1561static void unix_redir_to_connected(int sotype, int sock_mapfd,
1562                               int verd_mapfd, enum redir_mode mode)
1563{
1564        const char *log_prefix = redir_mode_str(mode);
1565        int c0, c1, p0, p1;
1566        unsigned int pass;
1567        int err, n;
1568        int sfd[2];
1569        u32 key;
1570        char b;
1571
1572        zero_verdict_count(verd_mapfd);
1573
1574        if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1575                return;
1576        c0 = sfd[0], p0 = sfd[1];
1577
1578        if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1579                goto close0;
1580        c1 = sfd[0], p1 = sfd[1];
1581
1582        err = add_to_sockmap(sock_mapfd, p0, p1);
1583        if (err)
1584                goto close;
1585
1586        n = write(c1, "a", 1);
1587        if (n < 0)
1588                FAIL_ERRNO("%s: write", log_prefix);
1589        if (n == 0)
1590                FAIL("%s: incomplete write", log_prefix);
1591        if (n < 1)
1592                goto close;
1593
1594        key = SK_PASS;
1595        err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1596        if (err)
1597                goto close;
1598        if (pass != 1)
1599                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1600
1601        n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1602        if (n < 0)
1603                FAIL_ERRNO("%s: recv_timeout", log_prefix);
1604        if (n == 0)
1605                FAIL("%s: incomplete recv", log_prefix);
1606
1607close:
1608        xclose(c1);
1609        xclose(p1);
1610close0:
1611        xclose(c0);
1612        xclose(p0);
1613}
1614
1615static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1616                                        struct bpf_map *inner_map, int sotype)
1617{
1618        int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1619        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1620        int sock_map = bpf_map__fd(inner_map);
1621        int err;
1622
1623        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1624        if (err)
1625                return;
1626
1627        skel->bss->test_ingress = false;
1628        unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1629        skel->bss->test_ingress = true;
1630        unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1631
1632        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1633}
1634
1635static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1636                            int sotype)
1637{
1638        const char *family_name, *map_name;
1639        char s[MAX_TEST_NAME];
1640
1641        family_name = family_str(AF_UNIX);
1642        map_name = map_type_str(map);
1643        snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1644        if (!test__start_subtest(s))
1645                return;
1646        unix_skb_redir_to_connected(skel, map, sotype);
1647}
1648
1649static void test_reuseport(struct test_sockmap_listen *skel,
1650                           struct bpf_map *map, int family, int sotype)
1651{
1652        const struct reuseport_test {
1653                void (*fn)(int family, int sotype, int socket_map,
1654                           int verdict_map, int reuseport_prog);
1655                const char *name;
1656                int sotype;
1657        } tests[] = {
1658                TEST(test_reuseport_select_listening),
1659                TEST(test_reuseport_select_connected),
1660                TEST(test_reuseport_mixed_groups),
1661        };
1662        int socket_map, verdict_map, reuseport_prog;
1663        const char *family_name, *map_name, *sotype_name;
1664        const struct reuseport_test *t;
1665        char s[MAX_TEST_NAME];
1666
1667        family_name = family_str(family);
1668        map_name = map_type_str(map);
1669        sotype_name = sotype_str(sotype);
1670
1671        socket_map = bpf_map__fd(map);
1672        verdict_map = bpf_map__fd(skel->maps.verdict_map);
1673        reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1674
1675        for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1676                snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1677                         sotype_name, t->name);
1678
1679                if (t->sotype != 0 && t->sotype != sotype)
1680                        continue;
1681
1682                if (!test__start_subtest(s))
1683                        continue;
1684
1685                t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1686        }
1687}
1688
1689static int inet_socketpair(int family, int type, int *s, int *c)
1690{
1691        struct sockaddr_storage addr;
1692        socklen_t len;
1693        int p0, c0;
1694        int err;
1695
1696        p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1697        if (p0 < 0)
1698                return p0;
1699
1700        len = sizeof(addr);
1701        err = xgetsockname(p0, sockaddr(&addr), &len);
1702        if (err)
1703                goto close_peer0;
1704
1705        c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1706        if (c0 < 0) {
1707                err = c0;
1708                goto close_peer0;
1709        }
1710        err = xconnect(c0, sockaddr(&addr), len);
1711        if (err)
1712                goto close_cli0;
1713        err = xgetsockname(c0, sockaddr(&addr), &len);
1714        if (err)
1715                goto close_cli0;
1716        err = xconnect(p0, sockaddr(&addr), len);
1717        if (err)
1718                goto close_cli0;
1719
1720        *s = p0;
1721        *c = c0;
1722        return 0;
1723
1724close_cli0:
1725        xclose(c0);
1726close_peer0:
1727        xclose(p0);
1728        return err;
1729}
1730
1731static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1732                                   enum redir_mode mode)
1733{
1734        const char *log_prefix = redir_mode_str(mode);
1735        int c0, c1, p0, p1;
1736        unsigned int pass;
1737        int err, n;
1738        u32 key;
1739        char b;
1740
1741        zero_verdict_count(verd_mapfd);
1742
1743        err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1744        if (err)
1745                return;
1746        err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1747        if (err)
1748                goto close_cli0;
1749
1750        err = add_to_sockmap(sock_mapfd, p0, p1);
1751        if (err)
1752                goto close_cli1;
1753
1754        n = write(c1, "a", 1);
1755        if (n < 0)
1756                FAIL_ERRNO("%s: write", log_prefix);
1757        if (n == 0)
1758                FAIL("%s: incomplete write", log_prefix);
1759        if (n < 1)
1760                goto close_cli1;
1761
1762        key = SK_PASS;
1763        err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1764        if (err)
1765                goto close_cli1;
1766        if (pass != 1)
1767                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1768
1769        n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1770        if (n < 0)
1771                FAIL_ERRNO("%s: recv_timeout", log_prefix);
1772        if (n == 0)
1773                FAIL("%s: incomplete recv", log_prefix);
1774
1775close_cli1:
1776        xclose(c1);
1777        xclose(p1);
1778close_cli0:
1779        xclose(c0);
1780        xclose(p0);
1781}
1782
1783static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1784                                       struct bpf_map *inner_map, int family)
1785{
1786        int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1787        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1788        int sock_map = bpf_map__fd(inner_map);
1789        int err;
1790
1791        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1792        if (err)
1793                return;
1794
1795        skel->bss->test_ingress = false;
1796        udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1797        skel->bss->test_ingress = true;
1798        udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1799
1800        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1801}
1802
1803static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1804                           int family)
1805{
1806        const char *family_name, *map_name;
1807        char s[MAX_TEST_NAME];
1808
1809        family_name = family_str(family);
1810        map_name = map_type_str(map);
1811        snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1812        if (!test__start_subtest(s))
1813                return;
1814        udp_skb_redir_to_connected(skel, map, family);
1815}
1816
1817static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1818                                        int verd_mapfd, enum redir_mode mode)
1819{
1820        const char *log_prefix = redir_mode_str(mode);
1821        int c0, c1, p0, p1;
1822        unsigned int pass;
1823        int err, n;
1824        int sfd[2];
1825        u32 key;
1826        char b;
1827
1828        zero_verdict_count(verd_mapfd);
1829
1830        if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1831                return;
1832        c0 = sfd[0], p0 = sfd[1];
1833
1834        err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1835        if (err)
1836                goto close;
1837
1838        err = add_to_sockmap(sock_mapfd, p0, p1);
1839        if (err)
1840                goto close_cli1;
1841
1842        n = write(c1, "a", 1);
1843        if (n < 0)
1844                FAIL_ERRNO("%s: write", log_prefix);
1845        if (n == 0)
1846                FAIL("%s: incomplete write", log_prefix);
1847        if (n < 1)
1848                goto close_cli1;
1849
1850        key = SK_PASS;
1851        err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1852        if (err)
1853                goto close_cli1;
1854        if (pass != 1)
1855                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1856
1857        n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1858        if (n < 0)
1859                FAIL_ERRNO("%s: recv_timeout", log_prefix);
1860        if (n == 0)
1861                FAIL("%s: incomplete recv", log_prefix);
1862
1863close_cli1:
1864        xclose(c1);
1865        xclose(p1);
1866close:
1867        xclose(c0);
1868        xclose(p0);
1869}
1870
1871static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1872                                            struct bpf_map *inner_map, int family)
1873{
1874        int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1875        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1876        int sock_map = bpf_map__fd(inner_map);
1877        int err;
1878
1879        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1880        if (err)
1881                return;
1882
1883        skel->bss->test_ingress = false;
1884        inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1885                                    REDIR_EGRESS);
1886        inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1887                                    REDIR_EGRESS);
1888        skel->bss->test_ingress = true;
1889        inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1890                                    REDIR_INGRESS);
1891        inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1892                                    REDIR_INGRESS);
1893
1894        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1895}
1896
1897static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
1898                                        int verd_mapfd, enum redir_mode mode)
1899{
1900        const char *log_prefix = redir_mode_str(mode);
1901        int c0, c1, p0, p1;
1902        unsigned int pass;
1903        int err, n;
1904        int sfd[2];
1905        u32 key;
1906        char b;
1907
1908        zero_verdict_count(verd_mapfd);
1909
1910        err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1911        if (err)
1912                return;
1913
1914        if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1915                goto close_cli0;
1916        c1 = sfd[0], p1 = sfd[1];
1917
1918        err = add_to_sockmap(sock_mapfd, p0, p1);
1919        if (err)
1920                goto close;
1921
1922        n = write(c1, "a", 1);
1923        if (n < 0)
1924                FAIL_ERRNO("%s: write", log_prefix);
1925        if (n == 0)
1926                FAIL("%s: incomplete write", log_prefix);
1927        if (n < 1)
1928                goto close;
1929
1930        key = SK_PASS;
1931        err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1932        if (err)
1933                goto close;
1934        if (pass != 1)
1935                FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1936
1937        n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1938        if (n < 0)
1939                FAIL_ERRNO("%s: recv_timeout", log_prefix);
1940        if (n == 0)
1941                FAIL("%s: incomplete recv", log_prefix);
1942
1943close:
1944        xclose(c1);
1945        xclose(p1);
1946close_cli0:
1947        xclose(c0);
1948        xclose(p0);
1949
1950}
1951
1952static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1953                                            struct bpf_map *inner_map, int family)
1954{
1955        int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1956        int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1957        int sock_map = bpf_map__fd(inner_map);
1958        int err;
1959
1960        err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1961        if (err)
1962                return;
1963
1964        skel->bss->test_ingress = false;
1965        unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1966                                     REDIR_EGRESS);
1967        unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1968                                     REDIR_EGRESS);
1969        skel->bss->test_ingress = true;
1970        unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1971                                     REDIR_INGRESS);
1972        unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1973                                     REDIR_INGRESS);
1974
1975        xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1976}
1977
1978static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1979                                int family)
1980{
1981        const char *family_name, *map_name;
1982        char s[MAX_TEST_NAME];
1983
1984        family_name = family_str(family);
1985        map_name = map_type_str(map);
1986        snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1987        if (!test__start_subtest(s))
1988                return;
1989        inet_unix_skb_redir_to_connected(skel, map, family);
1990        unix_inet_skb_redir_to_connected(skel, map, family);
1991}
1992
1993static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1994                      int family)
1995{
1996        test_ops(skel, map, family, SOCK_STREAM);
1997        test_ops(skel, map, family, SOCK_DGRAM);
1998        test_redir(skel, map, family, SOCK_STREAM);
1999        test_reuseport(skel, map, family, SOCK_STREAM);
2000        test_reuseport(skel, map, family, SOCK_DGRAM);
2001        test_udp_redir(skel, map, family);
2002        test_udp_unix_redir(skel, map, family);
2003}
2004
2005void test_sockmap_listen(void)
2006{
2007        struct test_sockmap_listen *skel;
2008
2009        skel = test_sockmap_listen__open_and_load();
2010        if (!skel) {
2011                FAIL("skeleton open/load failed");
2012                return;
2013        }
2014
2015        skel->bss->test_sockmap = true;
2016        run_tests(skel, skel->maps.sock_map, AF_INET);
2017        run_tests(skel, skel->maps.sock_map, AF_INET6);
2018        test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
2019        test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
2020
2021        skel->bss->test_sockmap = false;
2022        run_tests(skel, skel->maps.sock_hash, AF_INET);
2023        run_tests(skel, skel->maps.sock_hash, AF_INET6);
2024        test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
2025        test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
2026
2027        test_sockmap_listen__destroy(skel);
2028}
2029