linux/samples/bpf/xdp_redirect_cpu_kern.c
<<
>>
Prefs
   1/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
   2 *
   3 *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   4 */
   5#include <uapi/linux/if_ether.h>
   6#include <uapi/linux/if_packet.h>
   7#include <uapi/linux/if_vlan.h>
   8#include <uapi/linux/ip.h>
   9#include <uapi/linux/ipv6.h>
  10#include <uapi/linux/in.h>
  11#include <uapi/linux/tcp.h>
  12#include <uapi/linux/udp.h>
  13
  14#include <uapi/linux/bpf.h>
  15#include "bpf_helpers.h"
  16#include "hash_func01.h"
  17
  18#define MAX_CPUS 64 /* WARNING - sync with _user.c */
  19
  20/* Special map type that can XDP_REDIRECT frames to another CPU */
  21struct bpf_map_def SEC("maps") cpu_map = {
  22        .type           = BPF_MAP_TYPE_CPUMAP,
  23        .key_size       = sizeof(u32),
  24        .value_size     = sizeof(u32),
  25        .max_entries    = MAX_CPUS,
  26};
  27
  28/* Common stats data record to keep userspace more simple */
  29struct datarec {
  30        __u64 processed;
  31        __u64 dropped;
  32        __u64 issue;
  33};
  34
  35/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
  36 * feedback.  Redirect TX errors can be caught via a tracepoint.
  37 */
  38struct bpf_map_def SEC("maps") rx_cnt = {
  39        .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  40        .key_size       = sizeof(u32),
  41        .value_size     = sizeof(struct datarec),
  42        .max_entries    = 1,
  43};
  44
  45/* Used by trace point */
  46struct bpf_map_def SEC("maps") redirect_err_cnt = {
  47        .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  48        .key_size       = sizeof(u32),
  49        .value_size     = sizeof(struct datarec),
  50        .max_entries    = 2,
  51        /* TODO: have entries for all possible errno's */
  52};
  53
  54/* Used by trace point */
  55struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
  56        .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  57        .key_size       = sizeof(u32),
  58        .value_size     = sizeof(struct datarec),
  59        .max_entries    = MAX_CPUS,
  60};
  61
  62/* Used by trace point */
  63struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
  64        .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  65        .key_size       = sizeof(u32),
  66        .value_size     = sizeof(struct datarec),
  67        .max_entries    = 1,
  68};
  69
  70/* Set of maps controlling available CPU, and for iterating through
  71 * selectable redirect CPUs.
  72 */
  73struct bpf_map_def SEC("maps") cpus_available = {
  74        .type           = BPF_MAP_TYPE_ARRAY,
  75        .key_size       = sizeof(u32),
  76        .value_size     = sizeof(u32),
  77        .max_entries    = MAX_CPUS,
  78};
  79struct bpf_map_def SEC("maps") cpus_count = {
  80        .type           = BPF_MAP_TYPE_ARRAY,
  81        .key_size       = sizeof(u32),
  82        .value_size     = sizeof(u32),
  83        .max_entries    = 1,
  84};
  85struct bpf_map_def SEC("maps") cpus_iterator = {
  86        .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  87        .key_size       = sizeof(u32),
  88        .value_size     = sizeof(u32),
  89        .max_entries    = 1,
  90};
  91
  92/* Used by trace point */
  93struct bpf_map_def SEC("maps") exception_cnt = {
  94        .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  95        .key_size       = sizeof(u32),
  96        .value_size     = sizeof(struct datarec),
  97        .max_entries    = 1,
  98};
  99
 100/* Helper parse functions */
 101
 102/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
 103 *
 104 * Returns false on error and non-supported ether-type
 105 */
 106struct vlan_hdr {
 107        __be16 h_vlan_TCI;
 108        __be16 h_vlan_encapsulated_proto;
 109};
 110
 111static __always_inline
 112bool parse_eth(struct ethhdr *eth, void *data_end,
 113               u16 *eth_proto, u64 *l3_offset)
 114{
 115        u16 eth_type;
 116        u64 offset;
 117
 118        offset = sizeof(*eth);
 119        if ((void *)eth + offset > data_end)
 120                return false;
 121
 122        eth_type = eth->h_proto;
 123
 124        /* Skip non 802.3 Ethertypes */
 125        if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
 126                return false;
 127
 128        /* Handle VLAN tagged packet */
 129        if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 130                struct vlan_hdr *vlan_hdr;
 131
 132                vlan_hdr = (void *)eth + offset;
 133                offset += sizeof(*vlan_hdr);
 134                if ((void *)eth + offset > data_end)
 135                        return false;
 136                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 137        }
 138        /* Handle double VLAN tagged packet */
 139        if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 140                struct vlan_hdr *vlan_hdr;
 141
 142                vlan_hdr = (void *)eth + offset;
 143                offset += sizeof(*vlan_hdr);
 144                if ((void *)eth + offset > data_end)
 145                        return false;
 146                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 147        }
 148
 149        *eth_proto = ntohs(eth_type);
 150        *l3_offset = offset;
 151        return true;
 152}
 153
 154static __always_inline
 155u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
 156{
 157        void *data_end = (void *)(long)ctx->data_end;
 158        void *data     = (void *)(long)ctx->data;
 159        struct iphdr *iph = data + nh_off;
 160        struct udphdr *udph;
 161        u16 dport;
 162
 163        if (iph + 1 > data_end)
 164                return 0;
 165        if (!(iph->protocol == IPPROTO_UDP))
 166                return 0;
 167
 168        udph = (void *)(iph + 1);
 169        if (udph + 1 > data_end)
 170                return 0;
 171
 172        dport = ntohs(udph->dest);
 173        return dport;
 174}
 175
 176static __always_inline
 177int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
 178{
 179        void *data_end = (void *)(long)ctx->data_end;
 180        void *data     = (void *)(long)ctx->data;
 181        struct iphdr *iph = data + nh_off;
 182
 183        if (iph + 1 > data_end)
 184                return 0;
 185        return iph->protocol;
 186}
 187
 188static __always_inline
 189int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
 190{
 191        void *data_end = (void *)(long)ctx->data_end;
 192        void *data     = (void *)(long)ctx->data;
 193        struct ipv6hdr *ip6h = data + nh_off;
 194
 195        if (ip6h + 1 > data_end)
 196                return 0;
 197        return ip6h->nexthdr;
 198}
 199
 200SEC("xdp_cpu_map0")
 201int  xdp_prognum0_no_touch(struct xdp_md *ctx)
 202{
 203        void *data_end = (void *)(long)ctx->data_end;
 204        void *data     = (void *)(long)ctx->data;
 205        struct datarec *rec;
 206        u32 *cpu_selected;
 207        u32 cpu_dest;
 208        u32 key = 0;
 209
 210        /* Only use first entry in cpus_available */
 211        cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 212        if (!cpu_selected)
 213                return XDP_ABORTED;
 214        cpu_dest = *cpu_selected;
 215
 216        /* Count RX packet in map */
 217        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 218        if (!rec)
 219                return XDP_ABORTED;
 220        rec->processed++;
 221
 222        if (cpu_dest >= MAX_CPUS) {
 223                rec->issue++;
 224                return XDP_ABORTED;
 225        }
 226
 227        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 228}
 229
 230SEC("xdp_cpu_map1_touch_data")
 231int  xdp_prognum1_touch_data(struct xdp_md *ctx)
 232{
 233        void *data_end = (void *)(long)ctx->data_end;
 234        void *data     = (void *)(long)ctx->data;
 235        struct ethhdr *eth = data;
 236        struct datarec *rec;
 237        u32 *cpu_selected;
 238        u32 cpu_dest;
 239        u16 eth_type;
 240        u32 key = 0;
 241
 242        /* Only use first entry in cpus_available */
 243        cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 244        if (!cpu_selected)
 245                return XDP_ABORTED;
 246        cpu_dest = *cpu_selected;
 247
 248        /* Validate packet length is minimum Eth header size */
 249        if (eth + 1 > data_end)
 250                return XDP_ABORTED;
 251
 252        /* Count RX packet in map */
 253        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 254        if (!rec)
 255                return XDP_ABORTED;
 256        rec->processed++;
 257
 258        /* Read packet data, and use it (drop non 802.3 Ethertypes) */
 259        eth_type = eth->h_proto;
 260        if (ntohs(eth_type) < ETH_P_802_3_MIN) {
 261                rec->dropped++;
 262                return XDP_DROP;
 263        }
 264
 265        if (cpu_dest >= MAX_CPUS) {
 266                rec->issue++;
 267                return XDP_ABORTED;
 268        }
 269
 270        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 271}
 272
 273SEC("xdp_cpu_map2_round_robin")
 274int  xdp_prognum2_round_robin(struct xdp_md *ctx)
 275{
 276        void *data_end = (void *)(long)ctx->data_end;
 277        void *data     = (void *)(long)ctx->data;
 278        struct ethhdr *eth = data;
 279        struct datarec *rec;
 280        u32 cpu_dest;
 281        u32 *cpu_lookup;
 282        u32 key0 = 0;
 283
 284        u32 *cpu_selected;
 285        u32 *cpu_iterator;
 286        u32 *cpu_max;
 287        u32 cpu_idx;
 288
 289        cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
 290        if (!cpu_max)
 291                return XDP_ABORTED;
 292
 293        cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
 294        if (!cpu_iterator)
 295                return XDP_ABORTED;
 296        cpu_idx = *cpu_iterator;
 297
 298        *cpu_iterator += 1;
 299        if (*cpu_iterator == *cpu_max)
 300                *cpu_iterator = 0;
 301
 302        cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 303        if (!cpu_selected)
 304                return XDP_ABORTED;
 305        cpu_dest = *cpu_selected;
 306
 307        /* Count RX packet in map */
 308        rec = bpf_map_lookup_elem(&rx_cnt, &key0);
 309        if (!rec)
 310                return XDP_ABORTED;
 311        rec->processed++;
 312
 313        if (cpu_dest >= MAX_CPUS) {
 314                rec->issue++;
 315                return XDP_ABORTED;
 316        }
 317
 318        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 319}
 320
 321SEC("xdp_cpu_map3_proto_separate")
 322int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
 323{
 324        void *data_end = (void *)(long)ctx->data_end;
 325        void *data     = (void *)(long)ctx->data;
 326        struct ethhdr *eth = data;
 327        u8 ip_proto = IPPROTO_UDP;
 328        struct datarec *rec;
 329        u16 eth_proto = 0;
 330        u64 l3_offset = 0;
 331        u32 cpu_dest = 0;
 332        u32 cpu_idx = 0;
 333        u32 *cpu_lookup;
 334        u32 key = 0;
 335
 336        /* Count RX packet in map */
 337        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 338        if (!rec)
 339                return XDP_ABORTED;
 340        rec->processed++;
 341
 342        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 343                return XDP_PASS; /* Just skip */
 344
 345        /* Extract L4 protocol */
 346        switch (eth_proto) {
 347        case ETH_P_IP:
 348                ip_proto = get_proto_ipv4(ctx, l3_offset);
 349                break;
 350        case ETH_P_IPV6:
 351                ip_proto = get_proto_ipv6(ctx, l3_offset);
 352                break;
 353        case ETH_P_ARP:
 354                cpu_idx = 0; /* ARP packet handled on separate CPU */
 355                break;
 356        default:
 357                cpu_idx = 0;
 358        }
 359
 360        /* Choose CPU based on L4 protocol */
 361        switch (ip_proto) {
 362        case IPPROTO_ICMP:
 363        case IPPROTO_ICMPV6:
 364                cpu_idx = 2;
 365                break;
 366        case IPPROTO_TCP:
 367                cpu_idx = 0;
 368                break;
 369        case IPPROTO_UDP:
 370                cpu_idx = 1;
 371                break;
 372        default:
 373                cpu_idx = 0;
 374        }
 375
 376        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 377        if (!cpu_lookup)
 378                return XDP_ABORTED;
 379        cpu_dest = *cpu_lookup;
 380
 381        if (cpu_dest >= MAX_CPUS) {
 382                rec->issue++;
 383                return XDP_ABORTED;
 384        }
 385
 386        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 387}
 388
 389SEC("xdp_cpu_map4_ddos_filter_pktgen")
 390int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
 391{
 392        void *data_end = (void *)(long)ctx->data_end;
 393        void *data     = (void *)(long)ctx->data;
 394        struct ethhdr *eth = data;
 395        u8 ip_proto = IPPROTO_UDP;
 396        struct datarec *rec;
 397        u16 eth_proto = 0;
 398        u64 l3_offset = 0;
 399        u32 cpu_dest = 0;
 400        u32 cpu_idx = 0;
 401        u16 dest_port;
 402        u32 *cpu_lookup;
 403        u32 key = 0;
 404
 405        /* Count RX packet in map */
 406        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 407        if (!rec)
 408                return XDP_ABORTED;
 409        rec->processed++;
 410
 411        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 412                return XDP_PASS; /* Just skip */
 413
 414        /* Extract L4 protocol */
 415        switch (eth_proto) {
 416        case ETH_P_IP:
 417                ip_proto = get_proto_ipv4(ctx, l3_offset);
 418                break;
 419        case ETH_P_IPV6:
 420                ip_proto = get_proto_ipv6(ctx, l3_offset);
 421                break;
 422        case ETH_P_ARP:
 423                cpu_idx = 0; /* ARP packet handled on separate CPU */
 424                break;
 425        default:
 426                cpu_idx = 0;
 427        }
 428
 429        /* Choose CPU based on L4 protocol */
 430        switch (ip_proto) {
 431        case IPPROTO_ICMP:
 432        case IPPROTO_ICMPV6:
 433                cpu_idx = 2;
 434                break;
 435        case IPPROTO_TCP:
 436                cpu_idx = 0;
 437                break;
 438        case IPPROTO_UDP:
 439                cpu_idx = 1;
 440                /* DDoS filter UDP port 9 (pktgen) */
 441                dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
 442                if (dest_port == 9) {
 443                        if (rec)
 444                                rec->dropped++;
 445                        return XDP_DROP;
 446                }
 447                break;
 448        default:
 449                cpu_idx = 0;
 450        }
 451
 452        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 453        if (!cpu_lookup)
 454                return XDP_ABORTED;
 455        cpu_dest = *cpu_lookup;
 456
 457        if (cpu_dest >= MAX_CPUS) {
 458                rec->issue++;
 459                return XDP_ABORTED;
 460        }
 461
 462        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 463}
 464
 465/* Hashing initval */
 466#define INITVAL 15485863
 467
 468static __always_inline
 469u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 470{
 471        void *data_end = (void *)(long)ctx->data_end;
 472        void *data     = (void *)(long)ctx->data;
 473        struct iphdr *iph = data + nh_off;
 474        u32 cpu_hash;
 475
 476        if (iph + 1 > data_end)
 477                return 0;
 478
 479        cpu_hash = iph->saddr + iph->daddr;
 480        cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
 481
 482        return cpu_hash;
 483}
 484
 485static __always_inline
 486u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 487{
 488        void *data_end = (void *)(long)ctx->data_end;
 489        void *data     = (void *)(long)ctx->data;
 490        struct ipv6hdr *ip6h = data + nh_off;
 491        u32 cpu_hash;
 492
 493        if (ip6h + 1 > data_end)
 494                return 0;
 495
 496        cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
 497        cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
 498        cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
 499        cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
 500        cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
 501
 502        return cpu_hash;
 503}
 504
 505/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
 506 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
 507 * same CPU.
 508 */
 509SEC("xdp_cpu_map5_lb_hash_ip_pairs")
 510int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
 511{
 512        void *data_end = (void *)(long)ctx->data_end;
 513        void *data     = (void *)(long)ctx->data;
 514        struct ethhdr *eth = data;
 515        u8 ip_proto = IPPROTO_UDP;
 516        struct datarec *rec;
 517        u16 eth_proto = 0;
 518        u64 l3_offset = 0;
 519        u32 cpu_dest = 0;
 520        u32 cpu_idx = 0;
 521        u32 *cpu_lookup;
 522        u32 *cpu_max;
 523        u32 cpu_hash;
 524        u32 key = 0;
 525
 526        /* Count RX packet in map */
 527        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 528        if (!rec)
 529                return XDP_ABORTED;
 530        rec->processed++;
 531
 532        cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
 533        if (!cpu_max)
 534                return XDP_ABORTED;
 535
 536        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 537                return XDP_PASS; /* Just skip */
 538
 539        /* Hash for IPv4 and IPv6 */
 540        switch (eth_proto) {
 541        case ETH_P_IP:
 542                cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
 543                break;
 544        case ETH_P_IPV6:
 545                cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
 546                break;
 547        case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
 548        default:
 549                cpu_hash = 0;
 550        }
 551
 552        /* Choose CPU based on hash */
 553        cpu_idx = cpu_hash % *cpu_max;
 554
 555        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 556        if (!cpu_lookup)
 557                return XDP_ABORTED;
 558        cpu_dest = *cpu_lookup;
 559
 560        if (cpu_dest >= MAX_CPUS) {
 561                rec->issue++;
 562                return XDP_ABORTED;
 563        }
 564
 565        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 566}
 567
 568char _license[] SEC("license") = "GPL";
 569
 570/*** Trace point code ***/
 571
 572/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
 573 * Code in:                kernel/include/trace/events/xdp.h
 574 */
 575struct xdp_redirect_ctx {
 576        u64 __pad;      // First 8 bytes are not accessible by bpf code
 577        int prog_id;    //      offset:8;  size:4; signed:1;
 578        u32 act;        //      offset:12  size:4; signed:0;
 579        int ifindex;    //      offset:16  size:4; signed:1;
 580        int err;        //      offset:20  size:4; signed:1;
 581        int to_ifindex; //      offset:24  size:4; signed:1;
 582        u32 map_id;     //      offset:28  size:4; signed:0;
 583        int map_index;  //      offset:32  size:4; signed:1;
 584};                      //      offset:36
 585
 586enum {
 587        XDP_REDIRECT_SUCCESS = 0,
 588        XDP_REDIRECT_ERROR = 1
 589};
 590
 591static __always_inline
 592int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
 593{
 594        u32 key = XDP_REDIRECT_ERROR;
 595        struct datarec *rec;
 596        int err = ctx->err;
 597
 598        if (!err)
 599                key = XDP_REDIRECT_SUCCESS;
 600
 601        rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
 602        if (!rec)
 603                return 0;
 604        rec->dropped += 1;
 605
 606        return 0; /* Indicate event was filtered (no further processing)*/
 607        /*
 608         * Returning 1 here would allow e.g. a perf-record tracepoint
 609         * to see and record these events, but it doesn't work well
 610         * in-practice as stopping perf-record also unload this
 611         * bpf_prog.  Plus, there is additional overhead of doing so.
 612         */
 613}
 614
 615SEC("tracepoint/xdp/xdp_redirect_err")
 616int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
 617{
 618        return xdp_redirect_collect_stat(ctx);
 619}
 620
 621SEC("tracepoint/xdp/xdp_redirect_map_err")
 622int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
 623{
 624        return xdp_redirect_collect_stat(ctx);
 625}
 626
 627/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
 628 * Code in:                kernel/include/trace/events/xdp.h
 629 */
 630struct xdp_exception_ctx {
 631        u64 __pad;      // First 8 bytes are not accessible by bpf code
 632        int prog_id;    //      offset:8;  size:4; signed:1;
 633        u32 act;        //      offset:12; size:4; signed:0;
 634        int ifindex;    //      offset:16; size:4; signed:1;
 635};
 636
 637SEC("tracepoint/xdp/xdp_exception")
 638int trace_xdp_exception(struct xdp_exception_ctx *ctx)
 639{
 640        struct datarec *rec;
 641        u32 key = 0;
 642
 643        rec = bpf_map_lookup_elem(&exception_cnt, &key);
 644        if (!rec)
 645                return 1;
 646        rec->dropped += 1;
 647
 648        return 0;
 649}
 650
 651/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
 652 * Code in:         kernel/include/trace/events/xdp.h
 653 */
 654struct cpumap_enqueue_ctx {
 655        u64 __pad;              // First 8 bytes are not accessible by bpf code
 656        int map_id;             //      offset:8;  size:4; signed:1;
 657        u32 act;                //      offset:12; size:4; signed:0;
 658        int cpu;                //      offset:16; size:4; signed:1;
 659        unsigned int drops;     //      offset:20; size:4; signed:0;
 660        unsigned int processed; //      offset:24; size:4; signed:0;
 661        int to_cpu;             //      offset:28; size:4; signed:1;
 662};
 663
 664SEC("tracepoint/xdp/xdp_cpumap_enqueue")
 665int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
 666{
 667        u32 to_cpu = ctx->to_cpu;
 668        struct datarec *rec;
 669
 670        if (to_cpu >= MAX_CPUS)
 671                return 1;
 672
 673        rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
 674        if (!rec)
 675                return 0;
 676        rec->processed += ctx->processed;
 677        rec->dropped   += ctx->drops;
 678
 679        /* Record bulk events, then userspace can calc average bulk size */
 680        if (ctx->processed > 0)
 681                rec->issue += 1;
 682
 683        /* Inception: It's possible to detect overload situations, via
 684         * this tracepoint.  This can be used for creating a feedback
 685         * loop to XDP, which can take appropriate actions to mitigate
 686         * this overload situation.
 687         */
 688        return 0;
 689}
 690
 691/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
 692 * Code in:         kernel/include/trace/events/xdp.h
 693 */
 694struct cpumap_kthread_ctx {
 695        u64 __pad;              // First 8 bytes are not accessible by bpf code
 696        int map_id;             //      offset:8;  size:4; signed:1;
 697        u32 act;                //      offset:12; size:4; signed:0;
 698        int cpu;                //      offset:16; size:4; signed:1;
 699        unsigned int drops;     //      offset:20; size:4; signed:0;
 700        unsigned int processed; //      offset:24; size:4; signed:0;
 701        int sched;              //      offset:28; size:4; signed:1;
 702};
 703
 704SEC("tracepoint/xdp/xdp_cpumap_kthread")
 705int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 706{
 707        struct datarec *rec;
 708        u32 key = 0;
 709
 710        rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
 711        if (!rec)
 712                return 0;
 713        rec->processed += ctx->processed;
 714        rec->dropped   += ctx->drops;
 715
 716        /* Count times kthread yielded CPU via schedule call */
 717        if (ctx->sched)
 718                rec->issue++;
 719
 720        return 0;
 721}
 722