linux/samples/bpf/xdp_monitor_user.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0
   2 * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   3 */
   4static const char *__doc__=
   5 "XDP monitor tool, based on tracepoints\n"
   6;
   7
   8static const char *__doc_err_only__=
   9 " NOTICE: Only tracking XDP redirect errors\n"
  10 "         Enable TX success stats via '--stats'\n"
  11 "         (which comes with a per packet processing overhead)\n"
  12;
  13
  14#include <errno.h>
  15#include <stdio.h>
  16#include <stdlib.h>
  17#include <stdbool.h>
  18#include <stdint.h>
  19#include <string.h>
  20#include <ctype.h>
  21#include <unistd.h>
  22#include <locale.h>
  23
  24#include <sys/resource.h>
  25#include <getopt.h>
  26#include <net/if.h>
  27#include <time.h>
  28
  29#include "libbpf.h"
  30#include "bpf_load.h"
  31#include "bpf_util.h"
  32
  33static int verbose = 1;
  34static bool debug = false;
  35
  36static const struct option long_options[] = {
  37        {"help",        no_argument,            NULL, 'h' },
  38        {"debug",       no_argument,            NULL, 'D' },
  39        {"stats",       no_argument,            NULL, 'S' },
  40        {"sec",         required_argument,      NULL, 's' },
  41        {0, 0, NULL,  0 }
  42};
  43
  44/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
  45#define EXIT_FAIL_MEM   5
  46
  47static void usage(char *argv[])
  48{
  49        int i;
  50        printf("\nDOCUMENTATION:\n%s\n", __doc__);
  51        printf("\n");
  52        printf(" Usage: %s (options-see-below)\n",
  53               argv[0]);
  54        printf(" Listing options:\n");
  55        for (i = 0; long_options[i].name != 0; i++) {
  56                printf(" --%-15s", long_options[i].name);
  57                if (long_options[i].flag != NULL)
  58                        printf(" flag (internal value:%d)",
  59                               *long_options[i].flag);
  60                else
  61                        printf("(internal short-option: -%c)",
  62                               long_options[i].val);
  63                printf("\n");
  64        }
  65        printf("\n");
  66}
  67
  68#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
  69static __u64 gettime(void)
  70{
  71        struct timespec t;
  72        int res;
  73
  74        res = clock_gettime(CLOCK_MONOTONIC, &t);
  75        if (res < 0) {
  76                fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
  77                exit(EXIT_FAILURE);
  78        }
  79        return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
  80}
  81
  82enum {
  83        REDIR_SUCCESS = 0,
  84        REDIR_ERROR = 1,
  85};
  86#define REDIR_RES_MAX 2
  87static const char *redir_names[REDIR_RES_MAX] = {
  88        [REDIR_SUCCESS] = "Success",
  89        [REDIR_ERROR]   = "Error",
  90};
  91static const char *err2str(int err)
  92{
  93        if (err < REDIR_RES_MAX)
  94                return redir_names[err];
  95        return NULL;
  96}
  97/* enum xdp_action */
  98#define XDP_UNKNOWN     XDP_REDIRECT + 1
  99#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
 100static const char *xdp_action_names[XDP_ACTION_MAX] = {
 101        [XDP_ABORTED]   = "XDP_ABORTED",
 102        [XDP_DROP]      = "XDP_DROP",
 103        [XDP_PASS]      = "XDP_PASS",
 104        [XDP_TX]        = "XDP_TX",
 105        [XDP_REDIRECT]  = "XDP_REDIRECT",
 106        [XDP_UNKNOWN]   = "XDP_UNKNOWN",
 107};
 108static const char *action2str(int action)
 109{
 110        if (action < XDP_ACTION_MAX)
 111                return xdp_action_names[action];
 112        return NULL;
 113}
 114
 115/* Common stats data record shared with _kern.c */
 116struct datarec {
 117        __u64 processed;
 118        __u64 dropped;
 119        __u64 info;
 120};
 121#define MAX_CPUS 64
 122
 123/* Userspace structs for collection of stats from maps */
 124struct record {
 125        __u64 timestamp;
 126        struct datarec total;
 127        struct datarec *cpu;
 128};
 129struct u64rec {
 130        __u64 processed;
 131};
 132struct record_u64 {
 133        /* record for _kern side __u64 values */
 134        __u64 timestamp;
 135        struct u64rec total;
 136        struct u64rec *cpu;
 137};
 138
 139struct stats_record {
 140        struct record_u64 xdp_redirect[REDIR_RES_MAX];
 141        struct record_u64 xdp_exception[XDP_ACTION_MAX];
 142        struct record xdp_cpumap_kthread;
 143        struct record xdp_cpumap_enqueue[MAX_CPUS];
 144};
 145
 146static bool map_collect_record(int fd, __u32 key, struct record *rec)
 147{
 148        /* For percpu maps, userspace gets a value per possible CPU */
 149        unsigned int nr_cpus = bpf_num_possible_cpus();
 150        struct datarec values[nr_cpus];
 151        __u64 sum_processed = 0;
 152        __u64 sum_dropped = 0;
 153        __u64 sum_info = 0;
 154        int i;
 155
 156        if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
 157                fprintf(stderr,
 158                        "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
 159                return false;
 160        }
 161        /* Get time as close as possible to reading map contents */
 162        rec->timestamp = gettime();
 163
 164        /* Record and sum values from each CPU */
 165        for (i = 0; i < nr_cpus; i++) {
 166                rec->cpu[i].processed = values[i].processed;
 167                sum_processed        += values[i].processed;
 168                rec->cpu[i].dropped = values[i].dropped;
 169                sum_dropped        += values[i].dropped;
 170                rec->cpu[i].info = values[i].info;
 171                sum_info        += values[i].info;
 172        }
 173        rec->total.processed = sum_processed;
 174        rec->total.dropped   = sum_dropped;
 175        rec->total.info      = sum_info;
 176        return true;
 177}
 178
 179static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
 180{
 181        /* For percpu maps, userspace gets a value per possible CPU */
 182        unsigned int nr_cpus = bpf_num_possible_cpus();
 183        struct u64rec values[nr_cpus];
 184        __u64 sum_total = 0;
 185        int i;
 186
 187        if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
 188                fprintf(stderr,
 189                        "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
 190                return false;
 191        }
 192        /* Get time as close as possible to reading map contents */
 193        rec->timestamp = gettime();
 194
 195        /* Record and sum values from each CPU */
 196        for (i = 0; i < nr_cpus; i++) {
 197                rec->cpu[i].processed = values[i].processed;
 198                sum_total            += values[i].processed;
 199        }
 200        rec->total.processed = sum_total;
 201        return true;
 202}
 203
 204static double calc_period(struct record *r, struct record *p)
 205{
 206        double period_ = 0;
 207        __u64 period = 0;
 208
 209        period = r->timestamp - p->timestamp;
 210        if (period > 0)
 211                period_ = ((double) period / NANOSEC_PER_SEC);
 212
 213        return period_;
 214}
 215
 216static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
 217{
 218        double period_ = 0;
 219        __u64 period = 0;
 220
 221        period = r->timestamp - p->timestamp;
 222        if (period > 0)
 223                period_ = ((double) period / NANOSEC_PER_SEC);
 224
 225        return period_;
 226}
 227
 228static double calc_pps(struct datarec *r, struct datarec *p, double period)
 229{
 230        __u64 packets = 0;
 231        double pps = 0;
 232
 233        if (period > 0) {
 234                packets = r->processed - p->processed;
 235                pps = packets / period;
 236        }
 237        return pps;
 238}
 239
 240static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
 241{
 242        __u64 packets = 0;
 243        double pps = 0;
 244
 245        if (period > 0) {
 246                packets = r->processed - p->processed;
 247                pps = packets / period;
 248        }
 249        return pps;
 250}
 251
 252static double calc_drop(struct datarec *r, struct datarec *p, double period)
 253{
 254        __u64 packets = 0;
 255        double pps = 0;
 256
 257        if (period > 0) {
 258                packets = r->dropped - p->dropped;
 259                pps = packets / period;
 260        }
 261        return pps;
 262}
 263
 264static double calc_info(struct datarec *r, struct datarec *p, double period)
 265{
 266        __u64 packets = 0;
 267        double pps = 0;
 268
 269        if (period > 0) {
 270                packets = r->info - p->info;
 271                pps = packets / period;
 272        }
 273        return pps;
 274}
 275
 276static void stats_print(struct stats_record *stats_rec,
 277                        struct stats_record *stats_prev,
 278                        bool err_only)
 279{
 280        unsigned int nr_cpus = bpf_num_possible_cpus();
 281        int rec_i = 0, i, to_cpu;
 282        double t = 0, pps = 0;
 283
 284        /* Header */
 285        printf("%-15s %-7s %-12s %-12s %-9s\n",
 286               "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
 287
 288        /* tracepoint: xdp:xdp_redirect_* */
 289        if (err_only)
 290                rec_i = REDIR_ERROR;
 291
 292        for (; rec_i < REDIR_RES_MAX; rec_i++) {
 293                struct record_u64 *rec, *prev;
 294                char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
 295                char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
 296
 297                rec  =  &stats_rec->xdp_redirect[rec_i];
 298                prev = &stats_prev->xdp_redirect[rec_i];
 299                t = calc_period_u64(rec, prev);
 300
 301                for (i = 0; i < nr_cpus; i++) {
 302                        struct u64rec *r = &rec->cpu[i];
 303                        struct u64rec *p = &prev->cpu[i];
 304
 305                        pps = calc_pps_u64(r, p, t);
 306                        if (pps > 0)
 307                                printf(fmt1, "XDP_REDIRECT", i,
 308                                       rec_i ? 0.0: pps, rec_i ? pps : 0.0,
 309                                       err2str(rec_i));
 310                }
 311                pps = calc_pps_u64(&rec->total, &prev->total, t);
 312                printf(fmt2, "XDP_REDIRECT", "total",
 313                       rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
 314        }
 315
 316        /* tracepoint: xdp:xdp_exception */
 317        for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
 318                struct record_u64 *rec, *prev;
 319                char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
 320                char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
 321
 322                rec  =  &stats_rec->xdp_exception[rec_i];
 323                prev = &stats_prev->xdp_exception[rec_i];
 324                t = calc_period_u64(rec, prev);
 325
 326                for (i = 0; i < nr_cpus; i++) {
 327                        struct u64rec *r = &rec->cpu[i];
 328                        struct u64rec *p = &prev->cpu[i];
 329
 330                        pps = calc_pps_u64(r, p, t);
 331                        if (pps > 0)
 332                                printf(fmt1, "Exception", i,
 333                                       0.0, pps, err2str(rec_i));
 334                }
 335                pps = calc_pps_u64(&rec->total, &prev->total, t);
 336                if (pps > 0)
 337                        printf(fmt2, "Exception", "total",
 338                               0.0, pps, action2str(rec_i));
 339        }
 340
 341        /* cpumap enqueue stats */
 342        for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
 343                char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
 344                char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
 345                struct record *rec, *prev;
 346                char *info_str = "";
 347                double drop, info;
 348
 349                rec  =  &stats_rec->xdp_cpumap_enqueue[to_cpu];
 350                prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
 351                t = calc_period(rec, prev);
 352                for (i = 0; i < nr_cpus; i++) {
 353                        struct datarec *r = &rec->cpu[i];
 354                        struct datarec *p = &prev->cpu[i];
 355
 356                        pps  = calc_pps(r, p, t);
 357                        drop = calc_drop(r, p, t);
 358                        info = calc_info(r, p, t);
 359                        if (info > 0) {
 360                                info_str = "bulk-average";
 361                                info = pps / info; /* calc average bulk size */
 362                        }
 363                        if (pps > 0)
 364                                printf(fmt1, "cpumap-enqueue",
 365                                       i, to_cpu, pps, drop, info, info_str);
 366                }
 367                pps = calc_pps(&rec->total, &prev->total, t);
 368                if (pps > 0) {
 369                        drop = calc_drop(&rec->total, &prev->total, t);
 370                        info = calc_info(&rec->total, &prev->total, t);
 371                        if (info > 0) {
 372                                info_str = "bulk-average";
 373                                info = pps / info; /* calc average bulk size */
 374                        }
 375                        printf(fmt2, "cpumap-enqueue",
 376                               "sum", to_cpu, pps, drop, info, info_str);
 377                }
 378        }
 379
 380        /* cpumap kthread stats */
 381        {
 382                char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
 383                char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
 384                struct record *rec, *prev;
 385                double drop, info;
 386                char *i_str = "";
 387
 388                rec  =  &stats_rec->xdp_cpumap_kthread;
 389                prev = &stats_prev->xdp_cpumap_kthread;
 390                t = calc_period(rec, prev);
 391                for (i = 0; i < nr_cpus; i++) {
 392                        struct datarec *r = &rec->cpu[i];
 393                        struct datarec *p = &prev->cpu[i];
 394
 395                        pps  = calc_pps(r, p, t);
 396                        drop = calc_drop(r, p, t);
 397                        info = calc_info(r, p, t);
 398                        if (info > 0)
 399                                i_str = "sched";
 400                        if (pps > 0)
 401                                printf(fmt1, "cpumap-kthread",
 402                                       i, pps, drop, info, i_str);
 403                }
 404                pps = calc_pps(&rec->total, &prev->total, t);
 405                drop = calc_drop(&rec->total, &prev->total, t);
 406                info = calc_info(&rec->total, &prev->total, t);
 407                if (info > 0)
 408                        i_str = "sched-sum";
 409                printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
 410        }
 411
 412        printf("\n");
 413}
 414
 415static bool stats_collect(struct stats_record *rec)
 416{
 417        int fd;
 418        int i;
 419
 420        /* TODO: Detect if someone unloaded the perf event_fd's, as
 421         * this can happen by someone running perf-record -e
 422         */
 423
 424        fd = map_data[0].fd; /* map0: redirect_err_cnt */
 425        for (i = 0; i < REDIR_RES_MAX; i++)
 426                map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
 427
 428        fd = map_data[1].fd; /* map1: exception_cnt */
 429        for (i = 0; i < XDP_ACTION_MAX; i++) {
 430                map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
 431        }
 432
 433        fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */
 434        for (i = 0; i < MAX_CPUS; i++)
 435                map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
 436
 437        fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
 438        map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
 439
 440        return true;
 441}
 442
 443static void *alloc_rec_per_cpu(int record_size)
 444{
 445        unsigned int nr_cpus = bpf_num_possible_cpus();
 446        void *array;
 447        size_t size;
 448
 449        size = record_size * nr_cpus;
 450        array = malloc(size);
 451        memset(array, 0, size);
 452        if (!array) {
 453                fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
 454                exit(EXIT_FAIL_MEM);
 455        }
 456        return array;
 457}
 458
 459static struct stats_record *alloc_stats_record(void)
 460{
 461        struct stats_record *rec;
 462        int rec_sz;
 463        int i;
 464
 465        /* Alloc main stats_record structure */
 466        rec = malloc(sizeof(*rec));
 467        memset(rec, 0, sizeof(*rec));
 468        if (!rec) {
 469                fprintf(stderr, "Mem alloc error\n");
 470                exit(EXIT_FAIL_MEM);
 471        }
 472
 473        /* Alloc stats stored per CPU for each record */
 474        rec_sz = sizeof(struct u64rec);
 475        for (i = 0; i < REDIR_RES_MAX; i++)
 476                rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
 477
 478        for (i = 0; i < XDP_ACTION_MAX; i++)
 479                rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
 480
 481        rec_sz = sizeof(struct datarec);
 482        rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
 483
 484        for (i = 0; i < MAX_CPUS; i++)
 485                rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
 486
 487        return rec;
 488}
 489
 490static void free_stats_record(struct stats_record *r)
 491{
 492        int i;
 493
 494        for (i = 0; i < REDIR_RES_MAX; i++)
 495                free(r->xdp_redirect[i].cpu);
 496
 497        for (i = 0; i < XDP_ACTION_MAX; i++)
 498                free(r->xdp_exception[i].cpu);
 499
 500        free(r->xdp_cpumap_kthread.cpu);
 501
 502        for (i = 0; i < MAX_CPUS; i++)
 503                free(r->xdp_cpumap_enqueue[i].cpu);
 504
 505        free(r);
 506}
 507
 508/* Pointer swap trick */
 509static inline void swap(struct stats_record **a, struct stats_record **b)
 510{
 511        struct stats_record *tmp;
 512
 513        tmp = *a;
 514        *a = *b;
 515        *b = tmp;
 516}
 517
 518static void stats_poll(int interval, bool err_only)
 519{
 520        struct stats_record *rec, *prev;
 521
 522        rec  = alloc_stats_record();
 523        prev = alloc_stats_record();
 524        stats_collect(rec);
 525
 526        if (err_only)
 527                printf("\n%s\n", __doc_err_only__);
 528
 529        /* Trick to pretty printf with thousands separators use %' */
 530        setlocale(LC_NUMERIC, "en_US");
 531
 532        /* Header */
 533        if (verbose)
 534                printf("\n%s", __doc__);
 535
 536        /* TODO Need more advanced stats on error types */
 537        if (verbose) {
 538                printf(" - Stats map0: %s\n", map_data[0].name);
 539                printf(" - Stats map1: %s\n", map_data[1].name);
 540                printf("\n");
 541        }
 542        fflush(stdout);
 543
 544        while (1) {
 545                swap(&prev, &rec);
 546                stats_collect(rec);
 547                stats_print(rec, prev, err_only);
 548                fflush(stdout);
 549                sleep(interval);
 550        }
 551
 552        free_stats_record(rec);
 553        free_stats_record(prev);
 554}
 555
 556static void print_bpf_prog_info(void)
 557{
 558        int i;
 559
 560        /* Prog info */
 561        printf("Loaded BPF prog have %d bpf program(s)\n", prog_cnt);
 562        for (i = 0; i < prog_cnt; i++) {
 563                printf(" - prog_fd[%d] = fd(%d)\n", i, prog_fd[i]);
 564        }
 565
 566        /* Maps info */
 567        printf("Loaded BPF prog have %d map(s)\n", map_data_count);
 568        for (i = 0; i < map_data_count; i++) {
 569                char *name = map_data[i].name;
 570                int fd     = map_data[i].fd;
 571
 572                printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
 573        }
 574
 575        /* Event info */
 576        printf("Searching for (max:%d) event file descriptor(s)\n", prog_cnt);
 577        for (i = 0; i < prog_cnt; i++) {
 578                if (event_fd[i] != -1)
 579                        printf(" - event_fd[%d] = fd(%d)\n", i, event_fd[i]);
 580        }
 581}
 582
 583int main(int argc, char **argv)
 584{
 585        struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 586        int longindex = 0, opt;
 587        int ret = EXIT_SUCCESS;
 588        char bpf_obj_file[256];
 589
 590        /* Default settings: */
 591        bool errors_only = true;
 592        int interval = 2;
 593
 594        snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
 595
 596        /* Parse commands line args */
 597        while ((opt = getopt_long(argc, argv, "h",
 598                                  long_options, &longindex)) != -1) {
 599                switch (opt) {
 600                case 'D':
 601                        debug = true;
 602                        break;
 603                case 'S':
 604                        errors_only = false;
 605                        break;
 606                case 's':
 607                        interval = atoi(optarg);
 608                        break;
 609                case 'h':
 610                default:
 611                        usage(argv);
 612                        return EXIT_FAILURE;
 613                }
 614        }
 615
 616        if (setrlimit(RLIMIT_MEMLOCK, &r)) {
 617                perror("setrlimit(RLIMIT_MEMLOCK)");
 618                return EXIT_FAILURE;
 619        }
 620
 621        if (load_bpf_file(bpf_obj_file)) {
 622                printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
 623                return EXIT_FAILURE;
 624        }
 625        if (!prog_fd[0]) {
 626                printf("ERROR - load_bpf_file: %s\n", strerror(errno));
 627                return EXIT_FAILURE;
 628        }
 629
 630        if (debug) {
 631                print_bpf_prog_info();
 632        }
 633
 634        /* Unload/stop tracepoint event by closing fd's */
 635        if (errors_only) {
 636                /* The prog_fd[i] and event_fd[i] depend on the
 637                 * order the functions was defined in _kern.c
 638                 */
 639                close(event_fd[2]); /* tracepoint/xdp/xdp_redirect */
 640                close(prog_fd[2]);  /* func: trace_xdp_redirect */
 641                close(event_fd[3]); /* tracepoint/xdp/xdp_redirect_map */
 642                close(prog_fd[3]);  /* func: trace_xdp_redirect_map */
 643        }
 644
 645        stats_poll(interval, errors_only);
 646
 647        return ret;
 648}
 649