linux/samples/bpf/xdp_redirect_cpu_user.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   3 */
   4static const char *__doc__ =
   5        " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
   6
   7#include <errno.h>
   8#include <signal.h>
   9#include <stdio.h>
  10#include <stdlib.h>
  11#include <stdbool.h>
  12#include <string.h>
  13#include <unistd.h>
  14#include <locale.h>
  15#include <sys/resource.h>
  16#include <sys/sysinfo.h>
  17#include <getopt.h>
  18#include <net/if.h>
  19#include <time.h>
  20#include <linux/limits.h>
  21
  22#include <arpa/inet.h>
  23#include <linux/if_link.h>
  24
  25/* How many xdp_progs are defined in _kern.c */
  26#define MAX_PROG 6
  27
  28#include <bpf/bpf.h>
  29#include <bpf/libbpf.h>
  30
  31#include "bpf_util.h"
  32
  33static int ifindex = -1;
  34static char ifname_buf[IF_NAMESIZE];
  35static char *ifname;
  36static __u32 prog_id;
  37
  38static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
  39static int n_cpus;
  40static int cpu_map_fd;
  41static int rx_cnt_map_fd;
  42static int redirect_err_cnt_map_fd;
  43static int cpumap_enqueue_cnt_map_fd;
  44static int cpumap_kthread_cnt_map_fd;
  45static int cpus_available_map_fd;
  46static int cpus_count_map_fd;
  47static int cpus_iterator_map_fd;
  48static int exception_cnt_map_fd;
  49
  50#define NUM_TP 5
  51struct bpf_link *tp_links[NUM_TP] = { 0 };
  52static int tp_cnt = 0;
  53
  54/* Exit return codes */
  55#define EXIT_OK         0
  56#define EXIT_FAIL               1
  57#define EXIT_FAIL_OPTION        2
  58#define EXIT_FAIL_XDP           3
  59#define EXIT_FAIL_BPF           4
  60#define EXIT_FAIL_MEM           5
  61
  62static const struct option long_options[] = {
  63        {"help",        no_argument,            NULL, 'h' },
  64        {"dev",         required_argument,      NULL, 'd' },
  65        {"skb-mode",    no_argument,            NULL, 'S' },
  66        {"sec",         required_argument,      NULL, 's' },
  67        {"progname",    required_argument,      NULL, 'p' },
  68        {"qsize",       required_argument,      NULL, 'q' },
  69        {"cpu",         required_argument,      NULL, 'c' },
  70        {"stress-mode", no_argument,            NULL, 'x' },
  71        {"no-separators", no_argument,          NULL, 'z' },
  72        {"force",       no_argument,            NULL, 'F' },
  73        {"mprog-disable", no_argument,          NULL, 'n' },
  74        {"mprog-name",  required_argument,      NULL, 'e' },
  75        {"mprog-filename", required_argument,   NULL, 'f' },
  76        {"redirect-device", required_argument,  NULL, 'r' },
  77        {"redirect-map", required_argument,     NULL, 'm' },
  78        {0, 0, NULL,  0 }
  79};
  80
  81static void int_exit(int sig)
  82{
  83        __u32 curr_prog_id = 0;
  84
  85        if (ifindex > -1) {
  86                if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
  87                        printf("bpf_get_link_xdp_id failed\n");
  88                        exit(EXIT_FAIL);
  89                }
  90                if (prog_id == curr_prog_id) {
  91                        fprintf(stderr,
  92                                "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
  93                                ifindex, ifname);
  94                        bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
  95                } else if (!curr_prog_id) {
  96                        printf("couldn't find a prog id on a given iface\n");
  97                } else {
  98                        printf("program on interface changed, not removing\n");
  99                }
 100        }
 101        /* Detach tracepoints */
 102        while (tp_cnt)
 103                bpf_link__destroy(tp_links[--tp_cnt]);
 104
 105        exit(EXIT_OK);
 106}
 107
 108static void print_avail_progs(struct bpf_object *obj)
 109{
 110        struct bpf_program *pos;
 111
 112        bpf_object__for_each_program(pos, obj) {
 113                if (bpf_program__is_xdp(pos))
 114                        printf(" %s\n", bpf_program__title(pos, false));
 115        }
 116}
 117
 118static void usage(char *argv[], struct bpf_object *obj)
 119{
 120        int i;
 121
 122        printf("\nDOCUMENTATION:\n%s\n", __doc__);
 123        printf("\n");
 124        printf(" Usage: %s (options-see-below)\n", argv[0]);
 125        printf(" Listing options:\n");
 126        for (i = 0; long_options[i].name != 0; i++) {
 127                printf(" --%-12s", long_options[i].name);
 128                if (long_options[i].flag != NULL)
 129                        printf(" flag (internal value:%d)",
 130                                *long_options[i].flag);
 131                else
 132                        printf(" short-option: -%c",
 133                                long_options[i].val);
 134                printf("\n");
 135        }
 136        printf("\n Programs to be used for --progname:\n");
 137        print_avail_progs(obj);
 138        printf("\n");
 139}
 140
 141/* gettime returns the current time of day in nanoseconds.
 142 * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
 143 *       clock_gettime (ns) =>  9ns (CLOCK_MONOTONIC_COARSE)
 144 */
 145#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
 146static __u64 gettime(void)
 147{
 148        struct timespec t;
 149        int res;
 150
 151        res = clock_gettime(CLOCK_MONOTONIC, &t);
 152        if (res < 0) {
 153                fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
 154                exit(EXIT_FAIL);
 155        }
 156        return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
 157}
 158
 159/* Common stats data record shared with _kern.c */
 160struct datarec {
 161        __u64 processed;
 162        __u64 dropped;
 163        __u64 issue;
 164        __u64 xdp_pass;
 165        __u64 xdp_drop;
 166        __u64 xdp_redirect;
 167};
 168struct record {
 169        __u64 timestamp;
 170        struct datarec total;
 171        struct datarec *cpu;
 172};
 173struct stats_record {
 174        struct record rx_cnt;
 175        struct record redir_err;
 176        struct record kthread;
 177        struct record exception;
 178        struct record enq[];
 179};
 180
 181static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
 182{
 183        /* For percpu maps, userspace gets a value per possible CPU */
 184        unsigned int nr_cpus = bpf_num_possible_cpus();
 185        struct datarec values[nr_cpus];
 186        __u64 sum_xdp_redirect = 0;
 187        __u64 sum_xdp_pass = 0;
 188        __u64 sum_xdp_drop = 0;
 189        __u64 sum_processed = 0;
 190        __u64 sum_dropped = 0;
 191        __u64 sum_issue = 0;
 192        int i;
 193
 194        if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
 195                fprintf(stderr,
 196                        "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
 197                return false;
 198        }
 199        /* Get time as close as possible to reading map contents */
 200        rec->timestamp = gettime();
 201
 202        /* Record and sum values from each CPU */
 203        for (i = 0; i < nr_cpus; i++) {
 204                rec->cpu[i].processed = values[i].processed;
 205                sum_processed        += values[i].processed;
 206                rec->cpu[i].dropped = values[i].dropped;
 207                sum_dropped        += values[i].dropped;
 208                rec->cpu[i].issue = values[i].issue;
 209                sum_issue        += values[i].issue;
 210                rec->cpu[i].xdp_pass = values[i].xdp_pass;
 211                sum_xdp_pass += values[i].xdp_pass;
 212                rec->cpu[i].xdp_drop = values[i].xdp_drop;
 213                sum_xdp_drop += values[i].xdp_drop;
 214                rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
 215                sum_xdp_redirect += values[i].xdp_redirect;
 216        }
 217        rec->total.processed = sum_processed;
 218        rec->total.dropped   = sum_dropped;
 219        rec->total.issue     = sum_issue;
 220        rec->total.xdp_pass  = sum_xdp_pass;
 221        rec->total.xdp_drop  = sum_xdp_drop;
 222        rec->total.xdp_redirect = sum_xdp_redirect;
 223        return true;
 224}
 225
 226static struct datarec *alloc_record_per_cpu(void)
 227{
 228        unsigned int nr_cpus = bpf_num_possible_cpus();
 229        struct datarec *array;
 230
 231        array = calloc(nr_cpus, sizeof(struct datarec));
 232        if (!array) {
 233                fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
 234                exit(EXIT_FAIL_MEM);
 235        }
 236        return array;
 237}
 238
 239static struct stats_record *alloc_stats_record(void)
 240{
 241        struct stats_record *rec;
 242        int i, size;
 243
 244        size = sizeof(*rec) + n_cpus * sizeof(struct record);
 245        rec = malloc(size);
 246        if (!rec) {
 247                fprintf(stderr, "Mem alloc error\n");
 248                exit(EXIT_FAIL_MEM);
 249        }
 250        memset(rec, 0, size);
 251        rec->rx_cnt.cpu    = alloc_record_per_cpu();
 252        rec->redir_err.cpu = alloc_record_per_cpu();
 253        rec->kthread.cpu   = alloc_record_per_cpu();
 254        rec->exception.cpu = alloc_record_per_cpu();
 255        for (i = 0; i < n_cpus; i++)
 256                rec->enq[i].cpu = alloc_record_per_cpu();
 257
 258        return rec;
 259}
 260
 261static void free_stats_record(struct stats_record *r)
 262{
 263        int i;
 264
 265        for (i = 0; i < n_cpus; i++)
 266                free(r->enq[i].cpu);
 267        free(r->exception.cpu);
 268        free(r->kthread.cpu);
 269        free(r->redir_err.cpu);
 270        free(r->rx_cnt.cpu);
 271        free(r);
 272}
 273
 274static double calc_period(struct record *r, struct record *p)
 275{
 276        double period_ = 0;
 277        __u64 period = 0;
 278
 279        period = r->timestamp - p->timestamp;
 280        if (period > 0)
 281                period_ = ((double) period / NANOSEC_PER_SEC);
 282
 283        return period_;
 284}
 285
 286static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
 287{
 288        __u64 packets = 0;
 289        __u64 pps = 0;
 290
 291        if (period_ > 0) {
 292                packets = r->processed - p->processed;
 293                pps = packets / period_;
 294        }
 295        return pps;
 296}
 297
 298static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
 299{
 300        __u64 packets = 0;
 301        __u64 pps = 0;
 302
 303        if (period_ > 0) {
 304                packets = r->dropped - p->dropped;
 305                pps = packets / period_;
 306        }
 307        return pps;
 308}
 309
 310static __u64 calc_errs_pps(struct datarec *r,
 311                            struct datarec *p, double period_)
 312{
 313        __u64 packets = 0;
 314        __u64 pps = 0;
 315
 316        if (period_ > 0) {
 317                packets = r->issue - p->issue;
 318                pps = packets / period_;
 319        }
 320        return pps;
 321}
 322
 323static void calc_xdp_pps(struct datarec *r, struct datarec *p,
 324                         double *xdp_pass, double *xdp_drop,
 325                         double *xdp_redirect, double period_)
 326{
 327        *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
 328        if (period_ > 0) {
 329                *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
 330                *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
 331                *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
 332        }
 333}
 334
 335static void stats_print(struct stats_record *stats_rec,
 336                        struct stats_record *stats_prev,
 337                        char *prog_name, char *mprog_name, int mprog_fd)
 338{
 339        unsigned int nr_cpus = bpf_num_possible_cpus();
 340        double pps = 0, drop = 0, err = 0;
 341        bool mprog_enabled = false;
 342        struct record *rec, *prev;
 343        int to_cpu;
 344        double t;
 345        int i;
 346
 347        if (mprog_fd > 0)
 348                mprog_enabled = true;
 349
 350        /* Header */
 351        printf("Running XDP/eBPF prog_name:%s\n", prog_name);
 352        printf("%-15s %-7s %-14s %-11s %-9s\n",
 353               "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
 354
 355        /* XDP rx_cnt */
 356        {
 357                char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
 358                char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
 359                char *errstr = "";
 360
 361                rec  = &stats_rec->rx_cnt;
 362                prev = &stats_prev->rx_cnt;
 363                t = calc_period(rec, prev);
 364                for (i = 0; i < nr_cpus; i++) {
 365                        struct datarec *r = &rec->cpu[i];
 366                        struct datarec *p = &prev->cpu[i];
 367
 368                        pps = calc_pps(r, p, t);
 369                        drop = calc_drop_pps(r, p, t);
 370                        err  = calc_errs_pps(r, p, t);
 371                        if (err > 0)
 372                                errstr = "cpu-dest/err";
 373                        if (pps > 0)
 374                                printf(fmt_rx, "XDP-RX",
 375                                        i, pps, drop, err, errstr);
 376                }
 377                pps  = calc_pps(&rec->total, &prev->total, t);
 378                drop = calc_drop_pps(&rec->total, &prev->total, t);
 379                err  = calc_errs_pps(&rec->total, &prev->total, t);
 380                printf(fm2_rx, "XDP-RX", "total", pps, drop);
 381        }
 382
 383        /* cpumap enqueue stats */
 384        for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
 385                char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
 386                char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
 387                char *errstr = "";
 388
 389                rec  =  &stats_rec->enq[to_cpu];
 390                prev = &stats_prev->enq[to_cpu];
 391                t = calc_period(rec, prev);
 392                for (i = 0; i < nr_cpus; i++) {
 393                        struct datarec *r = &rec->cpu[i];
 394                        struct datarec *p = &prev->cpu[i];
 395
 396                        pps  = calc_pps(r, p, t);
 397                        drop = calc_drop_pps(r, p, t);
 398                        err  = calc_errs_pps(r, p, t);
 399                        if (err > 0) {
 400                                errstr = "bulk-average";
 401                                err = pps / err; /* calc average bulk size */
 402                        }
 403                        if (pps > 0)
 404                                printf(fmt, "cpumap-enqueue",
 405                                       i, to_cpu, pps, drop, err, errstr);
 406                }
 407                pps = calc_pps(&rec->total, &prev->total, t);
 408                if (pps > 0) {
 409                        drop = calc_drop_pps(&rec->total, &prev->total, t);
 410                        err  = calc_errs_pps(&rec->total, &prev->total, t);
 411                        if (err > 0) {
 412                                errstr = "bulk-average";
 413                                err = pps / err; /* calc average bulk size */
 414                        }
 415                        printf(fm2, "cpumap-enqueue",
 416                               "sum", to_cpu, pps, drop, err, errstr);
 417                }
 418        }
 419
 420        /* cpumap kthread stats */
 421        {
 422                char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
 423                char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
 424                char *e_str = "";
 425
 426                rec  = &stats_rec->kthread;
 427                prev = &stats_prev->kthread;
 428                t = calc_period(rec, prev);
 429                for (i = 0; i < nr_cpus; i++) {
 430                        struct datarec *r = &rec->cpu[i];
 431                        struct datarec *p = &prev->cpu[i];
 432
 433                        pps  = calc_pps(r, p, t);
 434                        drop = calc_drop_pps(r, p, t);
 435                        err  = calc_errs_pps(r, p, t);
 436                        if (err > 0)
 437                                e_str = "sched";
 438                        if (pps > 0)
 439                                printf(fmt_k, "cpumap_kthread",
 440                                       i, pps, drop, err, e_str);
 441                }
 442                pps = calc_pps(&rec->total, &prev->total, t);
 443                drop = calc_drop_pps(&rec->total, &prev->total, t);
 444                err  = calc_errs_pps(&rec->total, &prev->total, t);
 445                if (err > 0)
 446                        e_str = "sched-sum";
 447                printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
 448        }
 449
 450        /* XDP redirect err tracepoints (very unlikely) */
 451        {
 452                char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
 453                char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
 454
 455                rec  = &stats_rec->redir_err;
 456                prev = &stats_prev->redir_err;
 457                t = calc_period(rec, prev);
 458                for (i = 0; i < nr_cpus; i++) {
 459                        struct datarec *r = &rec->cpu[i];
 460                        struct datarec *p = &prev->cpu[i];
 461
 462                        pps  = calc_pps(r, p, t);
 463                        drop = calc_drop_pps(r, p, t);
 464                        if (pps > 0)
 465                                printf(fmt_err, "redirect_err", i, pps, drop);
 466                }
 467                pps = calc_pps(&rec->total, &prev->total, t);
 468                drop = calc_drop_pps(&rec->total, &prev->total, t);
 469                printf(fm2_err, "redirect_err", "total", pps, drop);
 470        }
 471
 472        /* XDP general exception tracepoints */
 473        {
 474                char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
 475                char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
 476
 477                rec  = &stats_rec->exception;
 478                prev = &stats_prev->exception;
 479                t = calc_period(rec, prev);
 480                for (i = 0; i < nr_cpus; i++) {
 481                        struct datarec *r = &rec->cpu[i];
 482                        struct datarec *p = &prev->cpu[i];
 483
 484                        pps  = calc_pps(r, p, t);
 485                        drop = calc_drop_pps(r, p, t);
 486                        if (pps > 0)
 487                                printf(fmt_err, "xdp_exception", i, pps, drop);
 488                }
 489                pps = calc_pps(&rec->total, &prev->total, t);
 490                drop = calc_drop_pps(&rec->total, &prev->total, t);
 491                printf(fm2_err, "xdp_exception", "total", pps, drop);
 492        }
 493
 494        /* CPUMAP attached XDP program that runs on remote/destination CPU */
 495        if (mprog_enabled) {
 496                char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
 497                char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
 498                double xdp_pass, xdp_drop, xdp_redirect;
 499
 500                printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
 501                printf("%-15s %-7s %-14s %-11s %-9s\n",
 502                       "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
 503
 504                rec  = &stats_rec->kthread;
 505                prev = &stats_prev->kthread;
 506                t = calc_period(rec, prev);
 507                for (i = 0; i < nr_cpus; i++) {
 508                        struct datarec *r = &rec->cpu[i];
 509                        struct datarec *p = &prev->cpu[i];
 510
 511                        calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
 512                                     &xdp_redirect, t);
 513                        if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
 514                                printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
 515                                       xdp_redirect);
 516                }
 517                calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
 518                             &xdp_redirect, t);
 519                printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
 520        }
 521
 522        printf("\n");
 523        fflush(stdout);
 524}
 525
 526static void stats_collect(struct stats_record *rec)
 527{
 528        int fd, i;
 529
 530        fd = rx_cnt_map_fd;
 531        map_collect_percpu(fd, 0, &rec->rx_cnt);
 532
 533        fd = redirect_err_cnt_map_fd;
 534        map_collect_percpu(fd, 1, &rec->redir_err);
 535
 536        fd = cpumap_enqueue_cnt_map_fd;
 537        for (i = 0; i < n_cpus; i++)
 538                map_collect_percpu(fd, i, &rec->enq[i]);
 539
 540        fd = cpumap_kthread_cnt_map_fd;
 541        map_collect_percpu(fd, 0, &rec->kthread);
 542
 543        fd = exception_cnt_map_fd;
 544        map_collect_percpu(fd, 0, &rec->exception);
 545}
 546
 547
 548/* Pointer swap trick */
 549static inline void swap(struct stats_record **a, struct stats_record **b)
 550{
 551        struct stats_record *tmp;
 552
 553        tmp = *a;
 554        *a = *b;
 555        *b = tmp;
 556}
 557
 558static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
 559                            __u32 avail_idx, bool new)
 560{
 561        __u32 curr_cpus_count = 0;
 562        __u32 key = 0;
 563        int ret;
 564
 565        /* Add a CPU entry to cpumap, as this allocate a cpu entry in
 566         * the kernel for the cpu.
 567         */
 568        ret = bpf_map_update_elem(cpu_map_fd, &cpu, value, 0);
 569        if (ret) {
 570                fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
 571                exit(EXIT_FAIL_BPF);
 572        }
 573
 574        /* Inform bpf_prog's that a new CPU is available to select
 575         * from via some control maps.
 576         */
 577        ret = bpf_map_update_elem(cpus_available_map_fd, &avail_idx, &cpu, 0);
 578        if (ret) {
 579                fprintf(stderr, "Add to avail CPUs failed\n");
 580                exit(EXIT_FAIL_BPF);
 581        }
 582
 583        /* When not replacing/updating existing entry, bump the count */
 584        ret = bpf_map_lookup_elem(cpus_count_map_fd, &key, &curr_cpus_count);
 585        if (ret) {
 586                fprintf(stderr, "Failed reading curr cpus_count\n");
 587                exit(EXIT_FAIL_BPF);
 588        }
 589        if (new) {
 590                curr_cpus_count++;
 591                ret = bpf_map_update_elem(cpus_count_map_fd, &key,
 592                                          &curr_cpus_count, 0);
 593                if (ret) {
 594                        fprintf(stderr, "Failed write curr cpus_count\n");
 595                        exit(EXIT_FAIL_BPF);
 596                }
 597        }
 598        /* map_fd[7] = cpus_iterator */
 599        printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
 600               new ? "Add-new":"Replace", cpu, avail_idx,
 601               value->qsize, value->bpf_prog.fd, curr_cpus_count);
 602
 603        return 0;
 604}
 605
 606/* CPUs are zero-indexed. Thus, add a special sentinel default value
 607 * in map cpus_available to mark CPU index'es not configured
 608 */
 609static void mark_cpus_unavailable(void)
 610{
 611        __u32 invalid_cpu = n_cpus;
 612        int ret, i;
 613
 614        for (i = 0; i < n_cpus; i++) {
 615                ret = bpf_map_update_elem(cpus_available_map_fd, &i,
 616                                          &invalid_cpu, 0);
 617                if (ret) {
 618                        fprintf(stderr, "Failed marking CPU unavailable\n");
 619                        exit(EXIT_FAIL_BPF);
 620                }
 621        }
 622}
 623
 624/* Stress cpumap management code by concurrently changing underlying cpumap */
 625static void stress_cpumap(struct bpf_cpumap_val *value)
 626{
 627        /* Changing qsize will cause kernel to free and alloc a new
 628         * bpf_cpu_map_entry, with an associated/complicated tear-down
 629         * procedure.
 630         */
 631        value->qsize = 1024;
 632        create_cpu_entry(1, value, 0, false);
 633        value->qsize = 8;
 634        create_cpu_entry(1, value, 0, false);
 635        value->qsize = 16000;
 636        create_cpu_entry(1, value, 0, false);
 637}
 638
 639static void stats_poll(int interval, bool use_separators, char *prog_name,
 640                       char *mprog_name, struct bpf_cpumap_val *value,
 641                       bool stress_mode)
 642{
 643        struct stats_record *record, *prev;
 644        int mprog_fd;
 645
 646        record = alloc_stats_record();
 647        prev   = alloc_stats_record();
 648        stats_collect(record);
 649
 650        /* Trick to pretty printf with thousands separators use %' */
 651        if (use_separators)
 652                setlocale(LC_NUMERIC, "en_US");
 653
 654        while (1) {
 655                swap(&prev, &record);
 656                mprog_fd = value->bpf_prog.fd;
 657                stats_collect(record);
 658                stats_print(record, prev, prog_name, mprog_name, mprog_fd);
 659                sleep(interval);
 660                if (stress_mode)
 661                        stress_cpumap(value);
 662        }
 663
 664        free_stats_record(record);
 665        free_stats_record(prev);
 666}
 667
 668static struct bpf_link * attach_tp(struct bpf_object *obj,
 669                                   const char *tp_category,
 670                                   const char* tp_name)
 671{
 672        struct bpf_program *prog;
 673        struct bpf_link *link;
 674        char sec_name[PATH_MAX];
 675        int len;
 676
 677        len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
 678                       tp_category, tp_name);
 679        if (len < 0)
 680                exit(EXIT_FAIL);
 681
 682        prog = bpf_object__find_program_by_title(obj, sec_name);
 683        if (!prog) {
 684                fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
 685                exit(EXIT_FAIL_BPF);
 686        }
 687
 688        link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
 689        if (libbpf_get_error(link))
 690                exit(EXIT_FAIL_BPF);
 691
 692        return link;
 693}
 694
 695static void init_tracepoints(struct bpf_object *obj) {
 696        tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
 697        tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
 698        tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
 699        tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
 700        tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
 701}
 702
 703static int init_map_fds(struct bpf_object *obj)
 704{
 705        /* Maps updated by tracepoints */
 706        redirect_err_cnt_map_fd =
 707                bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
 708        exception_cnt_map_fd =
 709                bpf_object__find_map_fd_by_name(obj, "exception_cnt");
 710        cpumap_enqueue_cnt_map_fd =
 711                bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
 712        cpumap_kthread_cnt_map_fd =
 713                bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
 714
 715        /* Maps used by XDP */
 716        rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
 717        cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
 718        cpus_available_map_fd =
 719                bpf_object__find_map_fd_by_name(obj, "cpus_available");
 720        cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
 721        cpus_iterator_map_fd =
 722                bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
 723
 724        if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
 725            redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
 726            cpumap_kthread_cnt_map_fd < 0 || cpus_available_map_fd < 0 ||
 727            cpus_count_map_fd < 0 || cpus_iterator_map_fd < 0 ||
 728            exception_cnt_map_fd < 0)
 729                return -ENOENT;
 730
 731        return 0;
 732}
 733
 734static int load_cpumap_prog(char *file_name, char *prog_name,
 735                            char *redir_interface, char *redir_map)
 736{
 737        struct bpf_prog_load_attr prog_load_attr = {
 738                .prog_type              = BPF_PROG_TYPE_XDP,
 739                .expected_attach_type   = BPF_XDP_CPUMAP,
 740                .file = file_name,
 741        };
 742        struct bpf_program *prog;
 743        struct bpf_object *obj;
 744        int fd;
 745
 746        if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
 747                return -1;
 748
 749        if (fd < 0) {
 750                fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
 751                        strerror(errno));
 752                return fd;
 753        }
 754
 755        if (redir_interface && redir_map) {
 756                int err, map_fd, ifindex_out, key = 0;
 757
 758                map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
 759                if (map_fd < 0)
 760                        return map_fd;
 761
 762                ifindex_out = if_nametoindex(redir_interface);
 763                if (!ifindex_out)
 764                        return -1;
 765
 766                err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
 767                if (err < 0)
 768                        return err;
 769        }
 770
 771        prog = bpf_object__find_program_by_title(obj, prog_name);
 772        if (!prog) {
 773                fprintf(stderr, "bpf_object__find_program_by_title failed\n");
 774                return EXIT_FAIL;
 775        }
 776
 777        return bpf_program__fd(prog);
 778}
 779
 780int main(int argc, char **argv)
 781{
 782        struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 783        char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
 784        char *mprog_filename = "xdp_redirect_kern.o";
 785        char *redir_interface = NULL, *redir_map = NULL;
 786        char *mprog_name = "xdp_redirect_dummy";
 787        bool mprog_disable = false;
 788        struct bpf_prog_load_attr prog_load_attr = {
 789                .prog_type      = BPF_PROG_TYPE_UNSPEC,
 790        };
 791        struct bpf_prog_info info = {};
 792        __u32 info_len = sizeof(info);
 793        struct bpf_cpumap_val value;
 794        bool use_separators = true;
 795        bool stress_mode = false;
 796        struct bpf_program *prog;
 797        struct bpf_object *obj;
 798        char filename[256];
 799        int added_cpus = 0;
 800        int longindex = 0;
 801        int interval = 2;
 802        int add_cpu = -1;
 803        int opt, err;
 804        int prog_fd;
 805        int *cpu, i;
 806        __u32 qsize;
 807
 808        n_cpus = get_nprocs_conf();
 809
 810        /* Notice: choosing he queue size is very important with the
 811         * ixgbe driver, because it's driver page recycling trick is
 812         * dependend on pages being returned quickly.  The number of
 813         * out-standing packets in the system must be less-than 2x
 814         * RX-ring size.
 815         */
 816        qsize = 128+64;
 817
 818        snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 819        prog_load_attr.file = filename;
 820
 821        if (setrlimit(RLIMIT_MEMLOCK, &r)) {
 822                perror("setrlimit(RLIMIT_MEMLOCK)");
 823                return 1;
 824        }
 825
 826        if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
 827                return EXIT_FAIL;
 828
 829        if (prog_fd < 0) {
 830                fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
 831                        strerror(errno));
 832                return EXIT_FAIL;
 833        }
 834        init_tracepoints(obj);
 835        if (init_map_fds(obj) < 0) {
 836                fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
 837                return EXIT_FAIL;
 838        }
 839        mark_cpus_unavailable();
 840
 841        cpu = malloc(n_cpus * sizeof(int));
 842        if (!cpu) {
 843                fprintf(stderr, "failed to allocate cpu array\n");
 844                return EXIT_FAIL;
 845        }
 846        memset(cpu, 0, n_cpus * sizeof(int));
 847
 848        /* Parse commands line args */
 849        while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
 850                                  long_options, &longindex)) != -1) {
 851                switch (opt) {
 852                case 'd':
 853                        if (strlen(optarg) >= IF_NAMESIZE) {
 854                                fprintf(stderr, "ERR: --dev name too long\n");
 855                                goto error;
 856                        }
 857                        ifname = (char *)&ifname_buf;
 858                        strncpy(ifname, optarg, IF_NAMESIZE);
 859                        ifindex = if_nametoindex(ifname);
 860                        if (ifindex == 0) {
 861                                fprintf(stderr,
 862                                        "ERR: --dev name unknown err(%d):%s\n",
 863                                        errno, strerror(errno));
 864                                goto error;
 865                        }
 866                        break;
 867                case 's':
 868                        interval = atoi(optarg);
 869                        break;
 870                case 'S':
 871                        xdp_flags |= XDP_FLAGS_SKB_MODE;
 872                        break;
 873                case 'x':
 874                        stress_mode = true;
 875                        break;
 876                case 'z':
 877                        use_separators = false;
 878                        break;
 879                case 'p':
 880                        /* Selecting eBPF prog to load */
 881                        prog_name = optarg;
 882                        break;
 883                case 'n':
 884                        mprog_disable = true;
 885                        break;
 886                case 'f':
 887                        mprog_filename = optarg;
 888                        break;
 889                case 'e':
 890                        mprog_name = optarg;
 891                        break;
 892                case 'r':
 893                        redir_interface = optarg;
 894                        break;
 895                case 'm':
 896                        redir_map = optarg;
 897                        break;
 898                case 'c':
 899                        /* Add multiple CPUs */
 900                        add_cpu = strtoul(optarg, NULL, 0);
 901                        if (add_cpu >= n_cpus) {
 902                                fprintf(stderr,
 903                                "--cpu nr too large for cpumap err(%d):%s\n",
 904                                        errno, strerror(errno));
 905                                goto error;
 906                        }
 907                        cpu[added_cpus++] = add_cpu;
 908                        break;
 909                case 'q':
 910                        qsize = atoi(optarg);
 911                        break;
 912                case 'F':
 913                        xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
 914                        break;
 915                case 'h':
 916                error:
 917                default:
 918                        free(cpu);
 919                        usage(argv, obj);
 920                        return EXIT_FAIL_OPTION;
 921                }
 922        }
 923
 924        if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
 925                xdp_flags |= XDP_FLAGS_DRV_MODE;
 926
 927        /* Required option */
 928        if (ifindex == -1) {
 929                fprintf(stderr, "ERR: required option --dev missing\n");
 930                usage(argv, obj);
 931                err = EXIT_FAIL_OPTION;
 932                goto out;
 933        }
 934        /* Required option */
 935        if (add_cpu == -1) {
 936                fprintf(stderr, "ERR: required option --cpu missing\n");
 937                fprintf(stderr, " Specify multiple --cpu option to add more\n");
 938                usage(argv, obj);
 939                err = EXIT_FAIL_OPTION;
 940                goto out;
 941        }
 942
 943        value.bpf_prog.fd = 0;
 944        if (!mprog_disable)
 945                value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
 946                                                     redir_interface, redir_map);
 947        if (value.bpf_prog.fd < 0) {
 948                err = value.bpf_prog.fd;
 949                goto out;
 950        }
 951        value.qsize = qsize;
 952
 953        for (i = 0; i < added_cpus; i++)
 954                create_cpu_entry(cpu[i], &value, i, true);
 955
 956        /* Remove XDP program when program is interrupted or killed */
 957        signal(SIGINT, int_exit);
 958        signal(SIGTERM, int_exit);
 959
 960        prog = bpf_object__find_program_by_title(obj, prog_name);
 961        if (!prog) {
 962                fprintf(stderr, "bpf_object__find_program_by_title failed\n");
 963                err = EXIT_FAIL;
 964                goto out;
 965        }
 966
 967        prog_fd = bpf_program__fd(prog);
 968        if (prog_fd < 0) {
 969                fprintf(stderr, "bpf_program__fd failed\n");
 970                err = EXIT_FAIL;
 971                goto out;
 972        }
 973
 974        if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
 975                fprintf(stderr, "link set xdp fd failed\n");
 976                err = EXIT_FAIL_XDP;
 977                goto out;
 978        }
 979
 980        err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
 981        if (err) {
 982                printf("can't get prog info - %s\n", strerror(errno));
 983                goto out;
 984        }
 985        prog_id = info.id;
 986
 987        stats_poll(interval, use_separators, prog_name, mprog_name,
 988                   &value, stress_mode);
 989out:
 990        free(cpu);
 991        return err;
 992}
 993