linux/samples/bpf/xdp_redirect_cpu_kern.c
<<
>>
Prefs
   1/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
   2 *
   3 *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   4 */
   5#include <uapi/linux/if_ether.h>
   6#include <uapi/linux/if_packet.h>
   7#include <uapi/linux/if_vlan.h>
   8#include <uapi/linux/ip.h>
   9#include <uapi/linux/ipv6.h>
  10#include <uapi/linux/in.h>
  11#include <uapi/linux/tcp.h>
  12#include <uapi/linux/udp.h>
  13
  14#include <uapi/linux/bpf.h>
  15#include <bpf/bpf_helpers.h>
  16#include "hash_func01.h"
  17
  18#define MAX_CPUS NR_CPUS
  19
  20/* Special map type that can XDP_REDIRECT frames to another CPU */
  21struct {
  22        __uint(type, BPF_MAP_TYPE_CPUMAP);
  23        __uint(key_size, sizeof(u32));
  24        __uint(value_size, sizeof(struct bpf_cpumap_val));
  25        __uint(max_entries, MAX_CPUS);
  26} cpu_map SEC(".maps");
  27
  28/* Common stats data record to keep userspace more simple */
  29struct datarec {
  30        __u64 processed;
  31        __u64 dropped;
  32        __u64 issue;
  33        __u64 xdp_pass;
  34        __u64 xdp_drop;
  35        __u64 xdp_redirect;
  36};
  37
  38/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
  39 * feedback.  Redirect TX errors can be caught via a tracepoint.
  40 */
  41struct {
  42        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  43        __type(key, u32);
  44        __type(value, struct datarec);
  45        __uint(max_entries, 1);
  46} rx_cnt SEC(".maps");
  47
  48/* Used by trace point */
  49struct {
  50        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  51        __type(key, u32);
  52        __type(value, struct datarec);
  53        __uint(max_entries, 2);
  54        /* TODO: have entries for all possible errno's */
  55} redirect_err_cnt SEC(".maps");
  56
  57/* Used by trace point */
  58struct {
  59        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  60        __type(key, u32);
  61        __type(value, struct datarec);
  62        __uint(max_entries, MAX_CPUS);
  63} cpumap_enqueue_cnt SEC(".maps");
  64
  65/* Used by trace point */
  66struct {
  67        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  68        __type(key, u32);
  69        __type(value, struct datarec);
  70        __uint(max_entries, 1);
  71} cpumap_kthread_cnt SEC(".maps");
  72
  73/* Set of maps controlling available CPU, and for iterating through
  74 * selectable redirect CPUs.
  75 */
  76struct {
  77        __uint(type, BPF_MAP_TYPE_ARRAY);
  78        __type(key, u32);
  79        __type(value, u32);
  80        __uint(max_entries, MAX_CPUS);
  81} cpus_available SEC(".maps");
  82struct {
  83        __uint(type, BPF_MAP_TYPE_ARRAY);
  84        __type(key, u32);
  85        __type(value, u32);
  86        __uint(max_entries, 1);
  87} cpus_count SEC(".maps");
  88struct {
  89        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  90        __type(key, u32);
  91        __type(value, u32);
  92        __uint(max_entries, 1);
  93} cpus_iterator SEC(".maps");
  94
  95/* Used by trace point */
  96struct {
  97        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  98        __type(key, u32);
  99        __type(value, struct datarec);
 100        __uint(max_entries, 1);
 101} exception_cnt SEC(".maps");
 102
 103/* Helper parse functions */
 104
 105/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
 106 *
 107 * Returns false on error and non-supported ether-type
 108 */
 109struct vlan_hdr {
 110        __be16 h_vlan_TCI;
 111        __be16 h_vlan_encapsulated_proto;
 112};
 113
 114static __always_inline
 115bool parse_eth(struct ethhdr *eth, void *data_end,
 116               u16 *eth_proto, u64 *l3_offset)
 117{
 118        u16 eth_type;
 119        u64 offset;
 120
 121        offset = sizeof(*eth);
 122        if ((void *)eth + offset > data_end)
 123                return false;
 124
 125        eth_type = eth->h_proto;
 126
 127        /* Skip non 802.3 Ethertypes */
 128        if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
 129                return false;
 130
 131        /* Handle VLAN tagged packet */
 132        if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 133                struct vlan_hdr *vlan_hdr;
 134
 135                vlan_hdr = (void *)eth + offset;
 136                offset += sizeof(*vlan_hdr);
 137                if ((void *)eth + offset > data_end)
 138                        return false;
 139                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 140        }
 141        /* Handle double VLAN tagged packet */
 142        if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 143                struct vlan_hdr *vlan_hdr;
 144
 145                vlan_hdr = (void *)eth + offset;
 146                offset += sizeof(*vlan_hdr);
 147                if ((void *)eth + offset > data_end)
 148                        return false;
 149                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 150        }
 151
 152        *eth_proto = ntohs(eth_type);
 153        *l3_offset = offset;
 154        return true;
 155}
 156
 157static __always_inline
 158u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
 159{
 160        void *data_end = (void *)(long)ctx->data_end;
 161        void *data     = (void *)(long)ctx->data;
 162        struct iphdr *iph = data + nh_off;
 163        struct udphdr *udph;
 164        u16 dport;
 165
 166        if (iph + 1 > data_end)
 167                return 0;
 168        if (!(iph->protocol == IPPROTO_UDP))
 169                return 0;
 170
 171        udph = (void *)(iph + 1);
 172        if (udph + 1 > data_end)
 173                return 0;
 174
 175        dport = ntohs(udph->dest);
 176        return dport;
 177}
 178
 179static __always_inline
 180int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
 181{
 182        void *data_end = (void *)(long)ctx->data_end;
 183        void *data     = (void *)(long)ctx->data;
 184        struct iphdr *iph = data + nh_off;
 185
 186        if (iph + 1 > data_end)
 187                return 0;
 188        return iph->protocol;
 189}
 190
 191static __always_inline
 192int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
 193{
 194        void *data_end = (void *)(long)ctx->data_end;
 195        void *data     = (void *)(long)ctx->data;
 196        struct ipv6hdr *ip6h = data + nh_off;
 197
 198        if (ip6h + 1 > data_end)
 199                return 0;
 200        return ip6h->nexthdr;
 201}
 202
 203SEC("xdp_cpu_map0")
 204int  xdp_prognum0_no_touch(struct xdp_md *ctx)
 205{
 206        void *data_end = (void *)(long)ctx->data_end;
 207        void *data     = (void *)(long)ctx->data;
 208        struct datarec *rec;
 209        u32 *cpu_selected;
 210        u32 cpu_dest;
 211        u32 key = 0;
 212
 213        /* Only use first entry in cpus_available */
 214        cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 215        if (!cpu_selected)
 216                return XDP_ABORTED;
 217        cpu_dest = *cpu_selected;
 218
 219        /* Count RX packet in map */
 220        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 221        if (!rec)
 222                return XDP_ABORTED;
 223        rec->processed++;
 224
 225        if (cpu_dest >= MAX_CPUS) {
 226                rec->issue++;
 227                return XDP_ABORTED;
 228        }
 229
 230        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 231}
 232
 233SEC("xdp_cpu_map1_touch_data")
 234int  xdp_prognum1_touch_data(struct xdp_md *ctx)
 235{
 236        void *data_end = (void *)(long)ctx->data_end;
 237        void *data     = (void *)(long)ctx->data;
 238        struct ethhdr *eth = data;
 239        struct datarec *rec;
 240        u32 *cpu_selected;
 241        u32 cpu_dest;
 242        u16 eth_type;
 243        u32 key = 0;
 244
 245        /* Only use first entry in cpus_available */
 246        cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 247        if (!cpu_selected)
 248                return XDP_ABORTED;
 249        cpu_dest = *cpu_selected;
 250
 251        /* Validate packet length is minimum Eth header size */
 252        if (eth + 1 > data_end)
 253                return XDP_ABORTED;
 254
 255        /* Count RX packet in map */
 256        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 257        if (!rec)
 258                return XDP_ABORTED;
 259        rec->processed++;
 260
 261        /* Read packet data, and use it (drop non 802.3 Ethertypes) */
 262        eth_type = eth->h_proto;
 263        if (ntohs(eth_type) < ETH_P_802_3_MIN) {
 264                rec->dropped++;
 265                return XDP_DROP;
 266        }
 267
 268        if (cpu_dest >= MAX_CPUS) {
 269                rec->issue++;
 270                return XDP_ABORTED;
 271        }
 272
 273        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 274}
 275
 276SEC("xdp_cpu_map2_round_robin")
 277int  xdp_prognum2_round_robin(struct xdp_md *ctx)
 278{
 279        void *data_end = (void *)(long)ctx->data_end;
 280        void *data     = (void *)(long)ctx->data;
 281        struct ethhdr *eth = data;
 282        struct datarec *rec;
 283        u32 cpu_dest;
 284        u32 *cpu_lookup;
 285        u32 key0 = 0;
 286
 287        u32 *cpu_selected;
 288        u32 *cpu_iterator;
 289        u32 *cpu_max;
 290        u32 cpu_idx;
 291
 292        cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
 293        if (!cpu_max)
 294                return XDP_ABORTED;
 295
 296        cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
 297        if (!cpu_iterator)
 298                return XDP_ABORTED;
 299        cpu_idx = *cpu_iterator;
 300
 301        *cpu_iterator += 1;
 302        if (*cpu_iterator == *cpu_max)
 303                *cpu_iterator = 0;
 304
 305        cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 306        if (!cpu_selected)
 307                return XDP_ABORTED;
 308        cpu_dest = *cpu_selected;
 309
 310        /* Count RX packet in map */
 311        rec = bpf_map_lookup_elem(&rx_cnt, &key0);
 312        if (!rec)
 313                return XDP_ABORTED;
 314        rec->processed++;
 315
 316        if (cpu_dest >= MAX_CPUS) {
 317                rec->issue++;
 318                return XDP_ABORTED;
 319        }
 320
 321        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 322}
 323
 324SEC("xdp_cpu_map3_proto_separate")
 325int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
 326{
 327        void *data_end = (void *)(long)ctx->data_end;
 328        void *data     = (void *)(long)ctx->data;
 329        struct ethhdr *eth = data;
 330        u8 ip_proto = IPPROTO_UDP;
 331        struct datarec *rec;
 332        u16 eth_proto = 0;
 333        u64 l3_offset = 0;
 334        u32 cpu_dest = 0;
 335        u32 cpu_idx = 0;
 336        u32 *cpu_lookup;
 337        u32 key = 0;
 338
 339        /* Count RX packet in map */
 340        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 341        if (!rec)
 342                return XDP_ABORTED;
 343        rec->processed++;
 344
 345        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 346                return XDP_PASS; /* Just skip */
 347
 348        /* Extract L4 protocol */
 349        switch (eth_proto) {
 350        case ETH_P_IP:
 351                ip_proto = get_proto_ipv4(ctx, l3_offset);
 352                break;
 353        case ETH_P_IPV6:
 354                ip_proto = get_proto_ipv6(ctx, l3_offset);
 355                break;
 356        case ETH_P_ARP:
 357                cpu_idx = 0; /* ARP packet handled on separate CPU */
 358                break;
 359        default:
 360                cpu_idx = 0;
 361        }
 362
 363        /* Choose CPU based on L4 protocol */
 364        switch (ip_proto) {
 365        case IPPROTO_ICMP:
 366        case IPPROTO_ICMPV6:
 367                cpu_idx = 2;
 368                break;
 369        case IPPROTO_TCP:
 370                cpu_idx = 0;
 371                break;
 372        case IPPROTO_UDP:
 373                cpu_idx = 1;
 374                break;
 375        default:
 376                cpu_idx = 0;
 377        }
 378
 379        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 380        if (!cpu_lookup)
 381                return XDP_ABORTED;
 382        cpu_dest = *cpu_lookup;
 383
 384        if (cpu_dest >= MAX_CPUS) {
 385                rec->issue++;
 386                return XDP_ABORTED;
 387        }
 388
 389        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 390}
 391
 392SEC("xdp_cpu_map4_ddos_filter_pktgen")
 393int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
 394{
 395        void *data_end = (void *)(long)ctx->data_end;
 396        void *data     = (void *)(long)ctx->data;
 397        struct ethhdr *eth = data;
 398        u8 ip_proto = IPPROTO_UDP;
 399        struct datarec *rec;
 400        u16 eth_proto = 0;
 401        u64 l3_offset = 0;
 402        u32 cpu_dest = 0;
 403        u32 cpu_idx = 0;
 404        u16 dest_port;
 405        u32 *cpu_lookup;
 406        u32 key = 0;
 407
 408        /* Count RX packet in map */
 409        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 410        if (!rec)
 411                return XDP_ABORTED;
 412        rec->processed++;
 413
 414        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 415                return XDP_PASS; /* Just skip */
 416
 417        /* Extract L4 protocol */
 418        switch (eth_proto) {
 419        case ETH_P_IP:
 420                ip_proto = get_proto_ipv4(ctx, l3_offset);
 421                break;
 422        case ETH_P_IPV6:
 423                ip_proto = get_proto_ipv6(ctx, l3_offset);
 424                break;
 425        case ETH_P_ARP:
 426                cpu_idx = 0; /* ARP packet handled on separate CPU */
 427                break;
 428        default:
 429                cpu_idx = 0;
 430        }
 431
 432        /* Choose CPU based on L4 protocol */
 433        switch (ip_proto) {
 434        case IPPROTO_ICMP:
 435        case IPPROTO_ICMPV6:
 436                cpu_idx = 2;
 437                break;
 438        case IPPROTO_TCP:
 439                cpu_idx = 0;
 440                break;
 441        case IPPROTO_UDP:
 442                cpu_idx = 1;
 443                /* DDoS filter UDP port 9 (pktgen) */
 444                dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
 445                if (dest_port == 9) {
 446                        if (rec)
 447                                rec->dropped++;
 448                        return XDP_DROP;
 449                }
 450                break;
 451        default:
 452                cpu_idx = 0;
 453        }
 454
 455        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 456        if (!cpu_lookup)
 457                return XDP_ABORTED;
 458        cpu_dest = *cpu_lookup;
 459
 460        if (cpu_dest >= MAX_CPUS) {
 461                rec->issue++;
 462                return XDP_ABORTED;
 463        }
 464
 465        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 466}
 467
 468/* Hashing initval */
 469#define INITVAL 15485863
 470
 471static __always_inline
 472u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 473{
 474        void *data_end = (void *)(long)ctx->data_end;
 475        void *data     = (void *)(long)ctx->data;
 476        struct iphdr *iph = data + nh_off;
 477        u32 cpu_hash;
 478
 479        if (iph + 1 > data_end)
 480                return 0;
 481
 482        cpu_hash = iph->saddr + iph->daddr;
 483        cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
 484
 485        return cpu_hash;
 486}
 487
 488static __always_inline
 489u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 490{
 491        void *data_end = (void *)(long)ctx->data_end;
 492        void *data     = (void *)(long)ctx->data;
 493        struct ipv6hdr *ip6h = data + nh_off;
 494        u32 cpu_hash;
 495
 496        if (ip6h + 1 > data_end)
 497                return 0;
 498
 499        cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
 500        cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
 501        cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
 502        cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
 503        cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
 504
 505        return cpu_hash;
 506}
 507
 508/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
 509 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
 510 * same CPU.
 511 */
 512SEC("xdp_cpu_map5_lb_hash_ip_pairs")
 513int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
 514{
 515        void *data_end = (void *)(long)ctx->data_end;
 516        void *data     = (void *)(long)ctx->data;
 517        struct ethhdr *eth = data;
 518        u8 ip_proto = IPPROTO_UDP;
 519        struct datarec *rec;
 520        u16 eth_proto = 0;
 521        u64 l3_offset = 0;
 522        u32 cpu_dest = 0;
 523        u32 cpu_idx = 0;
 524        u32 *cpu_lookup;
 525        u32 *cpu_max;
 526        u32 cpu_hash;
 527        u32 key = 0;
 528
 529        /* Count RX packet in map */
 530        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 531        if (!rec)
 532                return XDP_ABORTED;
 533        rec->processed++;
 534
 535        cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
 536        if (!cpu_max)
 537                return XDP_ABORTED;
 538
 539        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 540                return XDP_PASS; /* Just skip */
 541
 542        /* Hash for IPv4 and IPv6 */
 543        switch (eth_proto) {
 544        case ETH_P_IP:
 545                cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
 546                break;
 547        case ETH_P_IPV6:
 548                cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
 549                break;
 550        case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
 551        default:
 552                cpu_hash = 0;
 553        }
 554
 555        /* Choose CPU based on hash */
 556        cpu_idx = cpu_hash % *cpu_max;
 557
 558        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 559        if (!cpu_lookup)
 560                return XDP_ABORTED;
 561        cpu_dest = *cpu_lookup;
 562
 563        if (cpu_dest >= MAX_CPUS) {
 564                rec->issue++;
 565                return XDP_ABORTED;
 566        }
 567
 568        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 569}
 570
 571char _license[] SEC("license") = "GPL";
 572
 573/*** Trace point code ***/
 574
 575/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
 576 * Code in:                kernel/include/trace/events/xdp.h
 577 */
 578struct xdp_redirect_ctx {
 579        u64 __pad;      // First 8 bytes are not accessible by bpf code
 580        int prog_id;    //      offset:8;  size:4; signed:1;
 581        u32 act;        //      offset:12  size:4; signed:0;
 582        int ifindex;    //      offset:16  size:4; signed:1;
 583        int err;        //      offset:20  size:4; signed:1;
 584        int to_ifindex; //      offset:24  size:4; signed:1;
 585        u32 map_id;     //      offset:28  size:4; signed:0;
 586        int map_index;  //      offset:32  size:4; signed:1;
 587};                      //      offset:36
 588
 589enum {
 590        XDP_REDIRECT_SUCCESS = 0,
 591        XDP_REDIRECT_ERROR = 1
 592};
 593
 594static __always_inline
 595int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
 596{
 597        u32 key = XDP_REDIRECT_ERROR;
 598        struct datarec *rec;
 599        int err = ctx->err;
 600
 601        if (!err)
 602                key = XDP_REDIRECT_SUCCESS;
 603
 604        rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
 605        if (!rec)
 606                return 0;
 607        rec->dropped += 1;
 608
 609        return 0; /* Indicate event was filtered (no further processing)*/
 610        /*
 611         * Returning 1 here would allow e.g. a perf-record tracepoint
 612         * to see and record these events, but it doesn't work well
 613         * in-practice as stopping perf-record also unload this
 614         * bpf_prog.  Plus, there is additional overhead of doing so.
 615         */
 616}
 617
 618SEC("tracepoint/xdp/xdp_redirect_err")
 619int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
 620{
 621        return xdp_redirect_collect_stat(ctx);
 622}
 623
 624SEC("tracepoint/xdp/xdp_redirect_map_err")
 625int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
 626{
 627        return xdp_redirect_collect_stat(ctx);
 628}
 629
 630/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
 631 * Code in:                kernel/include/trace/events/xdp.h
 632 */
 633struct xdp_exception_ctx {
 634        u64 __pad;      // First 8 bytes are not accessible by bpf code
 635        int prog_id;    //      offset:8;  size:4; signed:1;
 636        u32 act;        //      offset:12; size:4; signed:0;
 637        int ifindex;    //      offset:16; size:4; signed:1;
 638};
 639
 640SEC("tracepoint/xdp/xdp_exception")
 641int trace_xdp_exception(struct xdp_exception_ctx *ctx)
 642{
 643        struct datarec *rec;
 644        u32 key = 0;
 645
 646        rec = bpf_map_lookup_elem(&exception_cnt, &key);
 647        if (!rec)
 648                return 1;
 649        rec->dropped += 1;
 650
 651        return 0;
 652}
 653
 654/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
 655 * Code in:         kernel/include/trace/events/xdp.h
 656 */
 657struct cpumap_enqueue_ctx {
 658        u64 __pad;              // First 8 bytes are not accessible by bpf code
 659        int map_id;             //      offset:8;  size:4; signed:1;
 660        u32 act;                //      offset:12; size:4; signed:0;
 661        int cpu;                //      offset:16; size:4; signed:1;
 662        unsigned int drops;     //      offset:20; size:4; signed:0;
 663        unsigned int processed; //      offset:24; size:4; signed:0;
 664        int to_cpu;             //      offset:28; size:4; signed:1;
 665};
 666
 667SEC("tracepoint/xdp/xdp_cpumap_enqueue")
 668int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
 669{
 670        u32 to_cpu = ctx->to_cpu;
 671        struct datarec *rec;
 672
 673        if (to_cpu >= MAX_CPUS)
 674                return 1;
 675
 676        rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
 677        if (!rec)
 678                return 0;
 679        rec->processed += ctx->processed;
 680        rec->dropped   += ctx->drops;
 681
 682        /* Record bulk events, then userspace can calc average bulk size */
 683        if (ctx->processed > 0)
 684                rec->issue += 1;
 685
 686        /* Inception: It's possible to detect overload situations, via
 687         * this tracepoint.  This can be used for creating a feedback
 688         * loop to XDP, which can take appropriate actions to mitigate
 689         * this overload situation.
 690         */
 691        return 0;
 692}
 693
 694/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
 695 * Code in:         kernel/include/trace/events/xdp.h
 696 */
 697struct cpumap_kthread_ctx {
 698        u64 __pad;                      // First 8 bytes are not accessible
 699        int map_id;                     //      offset:8;  size:4; signed:1;
 700        u32 act;                        //      offset:12; size:4; signed:0;
 701        int cpu;                        //      offset:16; size:4; signed:1;
 702        unsigned int drops;             //      offset:20; size:4; signed:0;
 703        unsigned int processed;         //      offset:24; size:4; signed:0;
 704        int sched;                      //      offset:28; size:4; signed:1;
 705        unsigned int xdp_pass;          //      offset:32; size:4; signed:0;
 706        unsigned int xdp_drop;          //      offset:36; size:4; signed:0;
 707        unsigned int xdp_redirect;      //      offset:40; size:4; signed:0;
 708};
 709
 710SEC("tracepoint/xdp/xdp_cpumap_kthread")
 711int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 712{
 713        struct datarec *rec;
 714        u32 key = 0;
 715
 716        rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
 717        if (!rec)
 718                return 0;
 719        rec->processed += ctx->processed;
 720        rec->dropped   += ctx->drops;
 721        rec->xdp_pass  += ctx->xdp_pass;
 722        rec->xdp_drop  += ctx->xdp_drop;
 723        rec->xdp_redirect  += ctx->xdp_redirect;
 724
 725        /* Count times kthread yielded CPU via schedule call */
 726        if (ctx->sched)
 727                rec->issue++;
 728
 729        return 0;
 730}
 731