linux/tools/testing/selftests/net/toeplitz.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Toeplitz test
   3 *
   4 * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
   5 * 2. Compute the rx_hash in software based on the packet contents
   6 * 3. Compare the two
   7 *
   8 * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
   9 *
  10 * If '-C $rx_irq_cpu_list' is given, also
  11 *
  12 * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
  13 * 5. Compute the rxqueue that RSS would select based on this rx_hash
  14 * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
  15 * 7. Compare the cpus from 4 and 6
  16 *
  17 * Else if '-r $rps_bitmap' is given, also
  18 *
  19 * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
  20 * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
  21 * 6. Compare the cpus from 4 and 5
  22 */
  23
  24#define _GNU_SOURCE
  25
  26#include <arpa/inet.h>
  27#include <errno.h>
  28#include <error.h>
  29#include <fcntl.h>
  30#include <getopt.h>
  31#include <linux/filter.h>
  32#include <linux/if_ether.h>
  33#include <linux/if_packet.h>
  34#include <net/if.h>
  35#include <netdb.h>
  36#include <netinet/ip.h>
  37#include <netinet/ip6.h>
  38#include <netinet/tcp.h>
  39#include <netinet/udp.h>
  40#include <poll.h>
  41#include <stdbool.h>
  42#include <stddef.h>
  43#include <stdint.h>
  44#include <stdio.h>
  45#include <stdlib.h>
  46#include <string.h>
  47#include <sys/mman.h>
  48#include <sys/socket.h>
  49#include <sys/stat.h>
  50#include <sys/sysinfo.h>
  51#include <sys/time.h>
  52#include <sys/types.h>
  53#include <unistd.h>
  54
  55#define TOEPLITZ_KEY_MIN_LEN    40
  56#define TOEPLITZ_KEY_MAX_LEN    60
  57
  58#define TOEPLITZ_STR_LEN(K)     (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
  59#define TOEPLITZ_STR_MIN_LEN    TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
  60#define TOEPLITZ_STR_MAX_LEN    TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
  61
  62#define FOUR_TUPLE_MAX_LEN      ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
  63
  64#define RSS_MAX_CPUS (1 << 16)  /* real constraint is PACKET_FANOUT_MAX */
  65
  66#define RPS_MAX_CPUS 16UL       /* must be a power of 2 */
  67
  68/* configuration options (cmdline arguments) */
  69static uint16_t cfg_dport =     8000;
  70static int cfg_family =         AF_INET6;
  71static char *cfg_ifname =       "eth0";
  72static int cfg_num_queues;
  73static int cfg_num_rps_cpus;
  74static bool cfg_sink;
  75static int cfg_type =           SOCK_STREAM;
  76static int cfg_timeout_msec =   1000;
  77static bool cfg_verbose;
  78
  79/* global vars */
  80static int num_cpus;
  81static int ring_block_nr;
  82static int ring_block_sz;
  83
  84/* stats */
  85static int frames_received;
  86static int frames_nohash;
  87static int frames_error;
  88
  89#define log_verbose(args...)    do { if (cfg_verbose) fprintf(stderr, args); } while (0)
  90
  91/* tpacket ring */
  92struct ring_state {
  93        int fd;
  94        char *mmap;
  95        int idx;
  96        int cpu;
  97};
  98
  99static unsigned int rx_irq_cpus[RSS_MAX_CPUS];  /* map from rxq to cpu */
 100static int rps_silo_to_cpu[RPS_MAX_CPUS];
 101static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
 102static struct ring_state rings[RSS_MAX_CPUS];
 103
 104static inline uint32_t toeplitz(const unsigned char *four_tuple,
 105                                const unsigned char *key)
 106{
 107        int i, bit, ret = 0;
 108        uint32_t key32;
 109
 110        key32 = ntohl(*((uint32_t *)key));
 111        key += 4;
 112
 113        for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
 114                for (bit = 7; bit >= 0; bit--) {
 115                        if (four_tuple[i] & (1 << bit))
 116                                ret ^= key32;
 117
 118                        key32 <<= 1;
 119                        key32 |= !!(key[0] & (1 << bit));
 120                }
 121                key++;
 122        }
 123
 124        return ret;
 125}
 126
 127/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
 128static void verify_rss(uint32_t rx_hash, int cpu)
 129{
 130        int queue = rx_hash % cfg_num_queues;
 131
 132        log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
 133        if (rx_irq_cpus[queue] != cpu) {
 134                log_verbose(". error: rss cpu mismatch (%d)", cpu);
 135                frames_error++;
 136        }
 137}
 138
 139static void verify_rps(uint64_t rx_hash, int cpu)
 140{
 141        int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
 142
 143        log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
 144        if (rps_silo_to_cpu[silo] != cpu) {
 145                log_verbose(". error: rps cpu mismatch (%d)", cpu);
 146                frames_error++;
 147        }
 148}
 149
 150static void log_rxhash(int cpu, uint32_t rx_hash,
 151                       const char *addrs, int addr_len)
 152{
 153        char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
 154        uint16_t *ports;
 155
 156        if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
 157            !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
 158                error(1, 0, "address parse error");
 159
 160        ports = (void *)addrs + (addr_len * 2);
 161        log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
 162                    cpu, rx_hash, saddr, daddr,
 163                    ntohs(ports[0]), ntohs(ports[1]));
 164}
 165
 166/* Compare computed rxhash with rxhash received from tpacket_v3 */
 167static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
 168{
 169        unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
 170        uint32_t rx_hash_sw;
 171        const char *addrs;
 172        int addr_len;
 173
 174        if (cfg_family == AF_INET) {
 175                addr_len = sizeof(struct in_addr);
 176                addrs = pkt + offsetof(struct iphdr, saddr);
 177        } else {
 178                addr_len = sizeof(struct in6_addr);
 179                addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
 180        }
 181
 182        memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
 183        rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
 184
 185        if (cfg_verbose)
 186                log_rxhash(cpu, rx_hash, addrs, addr_len);
 187
 188        if (rx_hash != rx_hash_sw) {
 189                log_verbose(" != expected 0x%x\n", rx_hash_sw);
 190                frames_error++;
 191                return;
 192        }
 193
 194        log_verbose(" OK");
 195        if (cfg_num_queues)
 196                verify_rss(rx_hash, cpu);
 197        else if (cfg_num_rps_cpus)
 198                verify_rps(rx_hash, cpu);
 199        log_verbose("\n");
 200}
 201
 202static char *recv_frame(const struct ring_state *ring, char *frame)
 203{
 204        struct tpacket3_hdr *hdr = (void *)frame;
 205
 206        if (hdr->hv1.tp_rxhash)
 207                verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
 208                              ring->cpu);
 209        else
 210                frames_nohash++;
 211
 212        return frame + hdr->tp_next_offset;
 213}
 214
 215/* A single TPACKET_V3 block can hold multiple frames */
 216static void recv_block(struct ring_state *ring)
 217{
 218        struct tpacket_block_desc *block;
 219        char *frame;
 220        int i;
 221
 222        block = (void *)(ring->mmap + ring->idx * ring_block_sz);
 223        if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
 224                return;
 225
 226        frame = (char *)block;
 227        frame += block->hdr.bh1.offset_to_first_pkt;
 228
 229        for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
 230                frame = recv_frame(ring, frame);
 231                frames_received++;
 232        }
 233
 234        block->hdr.bh1.block_status = TP_STATUS_KERNEL;
 235        ring->idx = (ring->idx + 1) % ring_block_nr;
 236}
 237
 238/* simple test: sleep once unconditionally and then process all rings */
 239static void process_rings(void)
 240{
 241        int i;
 242
 243        usleep(1000 * cfg_timeout_msec);
 244
 245        for (i = 0; i < num_cpus; i++)
 246                recv_block(&rings[i]);
 247
 248        fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
 249                frames_received - frames_nohash - frames_error,
 250                frames_nohash, frames_error);
 251}
 252
 253static char *setup_ring(int fd)
 254{
 255        struct tpacket_req3 req3 = {0};
 256        void *ring;
 257
 258        req3.tp_retire_blk_tov = cfg_timeout_msec;
 259        req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
 260
 261        req3.tp_frame_size = 2048;
 262        req3.tp_frame_nr = 1 << 10;
 263        req3.tp_block_nr = 2;
 264
 265        req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
 266        req3.tp_block_size /= req3.tp_block_nr;
 267
 268        if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
 269                error(1, errno, "setsockopt PACKET_RX_RING");
 270
 271        ring_block_sz = req3.tp_block_size;
 272        ring_block_nr = req3.tp_block_nr;
 273
 274        ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
 275                    PROT_READ | PROT_WRITE,
 276                    MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
 277        if (ring == MAP_FAILED)
 278                error(1, 0, "mmap failed");
 279
 280        return ring;
 281}
 282
 283static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
 284{
 285        struct sock_filter filter[] = {
 286                BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
 287                BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
 288                BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
 289                BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
 290                BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
 291                BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
 292                BPF_STMT(BPF_RET + BPF_K, 0),
 293                BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
 294        };
 295        struct sock_fprog prog = {};
 296
 297        prog.filter = filter;
 298        prog.len = sizeof(filter) / sizeof(struct sock_filter);
 299        if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
 300                error(1, errno, "setsockopt filter");
 301}
 302
 303/* filter on transport protocol and destination port */
 304static void set_filter(int fd)
 305{
 306        const int off_dport = offsetof(struct tcphdr, dest);    /* same for udp */
 307        uint8_t proto;
 308
 309        proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
 310        if (cfg_family == AF_INET)
 311                __set_filter(fd, offsetof(struct iphdr, protocol), proto,
 312                             sizeof(struct iphdr) + off_dport);
 313        else
 314                __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
 315                             sizeof(struct ip6_hdr) + off_dport);
 316}
 317
 318/* drop everything: used temporarily during setup */
 319static void set_filter_null(int fd)
 320{
 321        struct sock_filter filter[] = {
 322                BPF_STMT(BPF_RET + BPF_K, 0),
 323        };
 324        struct sock_fprog prog = {};
 325
 326        prog.filter = filter;
 327        prog.len = sizeof(filter) / sizeof(struct sock_filter);
 328        if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
 329                error(1, errno, "setsockopt filter");
 330}
 331
 332static int create_ring(char **ring)
 333{
 334        struct fanout_args args = {
 335                .id = 1,
 336                .type_flags = PACKET_FANOUT_CPU,
 337                .max_num_members = RSS_MAX_CPUS
 338        };
 339        struct sockaddr_ll ll = { 0 };
 340        int fd, val;
 341
 342        fd = socket(PF_PACKET, SOCK_DGRAM, 0);
 343        if (fd == -1)
 344                error(1, errno, "socket creation failed");
 345
 346        val = TPACKET_V3;
 347        if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
 348                error(1, errno, "setsockopt PACKET_VERSION");
 349        *ring = setup_ring(fd);
 350
 351        /* block packets until all rings are added to the fanout group:
 352         * else packets can arrive during setup and get misclassified
 353         */
 354        set_filter_null(fd);
 355
 356        ll.sll_family = AF_PACKET;
 357        ll.sll_ifindex = if_nametoindex(cfg_ifname);
 358        ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
 359                                                  htons(ETH_P_IPV6);
 360        if (bind(fd, (void *)&ll, sizeof(ll)))
 361                error(1, errno, "bind");
 362
 363        /* must come after bind: verifies all programs in group match */
 364        if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
 365                /* on failure, retry using old API if that is sufficient:
 366                 * it has a hard limit of 256 sockets, so only try if
 367                 * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
 368                 * in this API, the third argument is left implicit.
 369                 */
 370                if (cfg_num_queues || num_cpus > 256 ||
 371                    setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
 372                               &args, sizeof(uint32_t)))
 373                        error(1, errno, "setsockopt PACKET_FANOUT cpu");
 374        }
 375
 376        return fd;
 377}
 378
 379/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
 380static int setup_sink(void)
 381{
 382        int fd, val;
 383
 384        fd = socket(cfg_family, cfg_type, 0);
 385        if (fd == -1)
 386                error(1, errno, "socket %d.%d", cfg_family, cfg_type);
 387
 388        val = 1 << 20;
 389        if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
 390                error(1, errno, "setsockopt rcvbuf");
 391
 392        return fd;
 393}
 394
 395static void setup_rings(void)
 396{
 397        int i;
 398
 399        for (i = 0; i < num_cpus; i++) {
 400                rings[i].cpu = i;
 401                rings[i].fd = create_ring(&rings[i].mmap);
 402        }
 403
 404        /* accept packets once all rings in the fanout group are up */
 405        for (i = 0; i < num_cpus; i++)
 406                set_filter(rings[i].fd);
 407}
 408
 409static void cleanup_rings(void)
 410{
 411        int i;
 412
 413        for (i = 0; i < num_cpus; i++) {
 414                if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
 415                        error(1, errno, "munmap");
 416                if (close(rings[i].fd))
 417                        error(1, errno, "close");
 418        }
 419}
 420
 421static void parse_cpulist(const char *arg)
 422{
 423        do {
 424                rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
 425
 426                arg = strchr(arg, ',');
 427                if (!arg)
 428                        break;
 429                arg++;                  // skip ','
 430        } while (1);
 431}
 432
 433static void show_cpulist(void)
 434{
 435        int i;
 436
 437        for (i = 0; i < cfg_num_queues; i++)
 438                fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
 439}
 440
 441static void show_silos(void)
 442{
 443        int i;
 444
 445        for (i = 0; i < cfg_num_rps_cpus; i++)
 446                fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
 447}
 448
 449static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
 450{
 451        int i, ret, off;
 452
 453        if (slen < TOEPLITZ_STR_MIN_LEN ||
 454            slen > TOEPLITZ_STR_MAX_LEN + 1)
 455                error(1, 0, "invalid toeplitz key");
 456
 457        for (i = 0, off = 0; off < slen; i++, off += 3) {
 458                ret = sscanf(str + off, "%hhx", &key[i]);
 459                if (ret != 1)
 460                        error(1, 0, "key parse error at %d off %d len %d",
 461                              i, off, slen);
 462        }
 463}
 464
 465static void parse_rps_bitmap(const char *arg)
 466{
 467        unsigned long bitmap;
 468        int i;
 469
 470        bitmap = strtoul(arg, NULL, 0);
 471
 472        if (bitmap & ~(RPS_MAX_CPUS - 1))
 473                error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
 474                      bitmap, RPS_MAX_CPUS - 1);
 475
 476        for (i = 0; i < RPS_MAX_CPUS; i++)
 477                if (bitmap & 1UL << i)
 478                        rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
 479}
 480
 481static void parse_opts(int argc, char **argv)
 482{
 483        static struct option long_options[] = {
 484            {"dport",   required_argument, 0, 'd'},
 485            {"cpus",    required_argument, 0, 'C'},
 486            {"key",     required_argument, 0, 'k'},
 487            {"iface",   required_argument, 0, 'i'},
 488            {"ipv4",    no_argument, 0, '4'},
 489            {"ipv6",    no_argument, 0, '6'},
 490            {"sink",    no_argument, 0, 's'},
 491            {"tcp",     no_argument, 0, 't'},
 492            {"timeout", required_argument, 0, 'T'},
 493            {"udp",     no_argument, 0, 'u'},
 494            {"verbose", no_argument, 0, 'v'},
 495            {"rps",     required_argument, 0, 'r'},
 496            {0, 0, 0, 0}
 497        };
 498        bool have_toeplitz = false;
 499        int index, c;
 500
 501        while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
 502                switch (c) {
 503                case '4':
 504                        cfg_family = AF_INET;
 505                        break;
 506                case '6':
 507                        cfg_family = AF_INET6;
 508                        break;
 509                case 'C':
 510                        parse_cpulist(optarg);
 511                        break;
 512                case 'd':
 513                        cfg_dport = strtol(optarg, NULL, 0);
 514                        break;
 515                case 'i':
 516                        cfg_ifname = optarg;
 517                        break;
 518                case 'k':
 519                        parse_toeplitz_key(optarg, strlen(optarg),
 520                                           toeplitz_key);
 521                        have_toeplitz = true;
 522                        break;
 523                case 'r':
 524                        parse_rps_bitmap(optarg);
 525                        break;
 526                case 's':
 527                        cfg_sink = true;
 528                        break;
 529                case 't':
 530                        cfg_type = SOCK_STREAM;
 531                        break;
 532                case 'T':
 533                        cfg_timeout_msec = strtol(optarg, NULL, 0);
 534                        break;
 535                case 'u':
 536                        cfg_type = SOCK_DGRAM;
 537                        break;
 538                case 'v':
 539                        cfg_verbose = true;
 540                        break;
 541
 542                default:
 543                        error(1, 0, "unknown option %c", optopt);
 544                        break;
 545                }
 546        }
 547
 548        if (!have_toeplitz)
 549                error(1, 0, "Must supply rss key ('-k')");
 550
 551        num_cpus = get_nprocs();
 552        if (num_cpus > RSS_MAX_CPUS)
 553                error(1, 0, "increase RSS_MAX_CPUS");
 554
 555        if (cfg_num_queues && cfg_num_rps_cpus)
 556                error(1, 0,
 557                      "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
 558        if (cfg_verbose) {
 559                show_cpulist();
 560                show_silos();
 561        }
 562}
 563
 564int main(int argc, char **argv)
 565{
 566        const int min_tests = 10;
 567        int fd_sink = -1;
 568
 569        parse_opts(argc, argv);
 570
 571        if (cfg_sink)
 572                fd_sink = setup_sink();
 573
 574        setup_rings();
 575        process_rings();
 576        cleanup_rings();
 577
 578        if (cfg_sink && close(fd_sink))
 579                error(1, errno, "close sink");
 580
 581        if (frames_received - frames_nohash < min_tests)
 582                error(1, 0, "too few frames for verification");
 583
 584        return frames_error;
 585}
 586