linux/samples/bpf/xdp_redirect_cpu.bpf.c
<<
>>
Prefs
   1/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
   2 *
   3 *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   4 */
   5#include "vmlinux.h"
   6#include "xdp_sample.bpf.h"
   7#include "xdp_sample_shared.h"
   8#include "hash_func01.h"
   9
  10/* Special map type that can XDP_REDIRECT frames to another CPU */
  11struct {
  12        __uint(type, BPF_MAP_TYPE_CPUMAP);
  13        __uint(key_size, sizeof(u32));
  14        __uint(value_size, sizeof(struct bpf_cpumap_val));
  15} cpu_map SEC(".maps");
  16
  17/* Set of maps controlling available CPU, and for iterating through
  18 * selectable redirect CPUs.
  19 */
  20struct {
  21        __uint(type, BPF_MAP_TYPE_ARRAY);
  22        __type(key, u32);
  23        __type(value, u32);
  24} cpus_available SEC(".maps");
  25
  26struct {
  27        __uint(type, BPF_MAP_TYPE_ARRAY);
  28        __type(key, u32);
  29        __type(value, u32);
  30        __uint(max_entries, 1);
  31} cpus_count SEC(".maps");
  32
  33struct {
  34        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  35        __type(key, u32);
  36        __type(value, u32);
  37        __uint(max_entries, 1);
  38} cpus_iterator SEC(".maps");
  39
  40struct {
  41        __uint(type, BPF_MAP_TYPE_DEVMAP);
  42        __uint(key_size, sizeof(int));
  43        __uint(value_size, sizeof(struct bpf_devmap_val));
  44        __uint(max_entries, 1);
  45} tx_port SEC(".maps");
  46
  47char tx_mac_addr[ETH_ALEN];
  48
  49/* Helper parse functions */
  50
  51static __always_inline
  52bool parse_eth(struct ethhdr *eth, void *data_end,
  53               u16 *eth_proto, u64 *l3_offset)
  54{
  55        u16 eth_type;
  56        u64 offset;
  57
  58        offset = sizeof(*eth);
  59        if ((void *)eth + offset > data_end)
  60                return false;
  61
  62        eth_type = eth->h_proto;
  63
  64        /* Skip non 802.3 Ethertypes */
  65        if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
  66                return false;
  67
  68        /* Handle VLAN tagged packet */
  69        if (eth_type == bpf_htons(ETH_P_8021Q) ||
  70            eth_type == bpf_htons(ETH_P_8021AD)) {
  71                struct vlan_hdr *vlan_hdr;
  72
  73                vlan_hdr = (void *)eth + offset;
  74                offset += sizeof(*vlan_hdr);
  75                if ((void *)eth + offset > data_end)
  76                        return false;
  77                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
  78        }
  79        /* Handle double VLAN tagged packet */
  80        if (eth_type == bpf_htons(ETH_P_8021Q) ||
  81            eth_type == bpf_htons(ETH_P_8021AD)) {
  82                struct vlan_hdr *vlan_hdr;
  83
  84                vlan_hdr = (void *)eth + offset;
  85                offset += sizeof(*vlan_hdr);
  86                if ((void *)eth + offset > data_end)
  87                        return false;
  88                eth_type = vlan_hdr->h_vlan_encapsulated_proto;
  89        }
  90
  91        *eth_proto = bpf_ntohs(eth_type);
  92        *l3_offset = offset;
  93        return true;
  94}
  95
  96static __always_inline
  97u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
  98{
  99        void *data_end = (void *)(long)ctx->data_end;
 100        void *data     = (void *)(long)ctx->data;
 101        struct iphdr *iph = data + nh_off;
 102        struct udphdr *udph;
 103        u16 dport;
 104
 105        if (iph + 1 > data_end)
 106                return 0;
 107        if (!(iph->protocol == IPPROTO_UDP))
 108                return 0;
 109
 110        udph = (void *)(iph + 1);
 111        if (udph + 1 > data_end)
 112                return 0;
 113
 114        dport = bpf_ntohs(udph->dest);
 115        return dport;
 116}
 117
 118static __always_inline
 119int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
 120{
 121        void *data_end = (void *)(long)ctx->data_end;
 122        void *data     = (void *)(long)ctx->data;
 123        struct iphdr *iph = data + nh_off;
 124
 125        if (iph + 1 > data_end)
 126                return 0;
 127        return iph->protocol;
 128}
 129
 130static __always_inline
 131int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
 132{
 133        void *data_end = (void *)(long)ctx->data_end;
 134        void *data     = (void *)(long)ctx->data;
 135        struct ipv6hdr *ip6h = data + nh_off;
 136
 137        if (ip6h + 1 > data_end)
 138                return 0;
 139        return ip6h->nexthdr;
 140}
 141
 142SEC("xdp")
 143int  xdp_prognum0_no_touch(struct xdp_md *ctx)
 144{
 145        u32 key = bpf_get_smp_processor_id();
 146        struct datarec *rec;
 147        u32 *cpu_selected;
 148        u32 cpu_dest = 0;
 149        u32 key0 = 0;
 150
 151        /* Only use first entry in cpus_available */
 152        cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
 153        if (!cpu_selected)
 154                return XDP_ABORTED;
 155        cpu_dest = *cpu_selected;
 156
 157        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 158        if (!rec)
 159                return XDP_PASS;
 160        NO_TEAR_INC(rec->processed);
 161
 162        if (cpu_dest >= nr_cpus) {
 163                NO_TEAR_INC(rec->issue);
 164                return XDP_ABORTED;
 165        }
 166        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 167}
 168
 169SEC("xdp")
 170int  xdp_prognum1_touch_data(struct xdp_md *ctx)
 171{
 172        void *data_end = (void *)(long)ctx->data_end;
 173        void *data     = (void *)(long)ctx->data;
 174        u32 key = bpf_get_smp_processor_id();
 175        struct ethhdr *eth = data;
 176        struct datarec *rec;
 177        u32 *cpu_selected;
 178        u32 cpu_dest = 0;
 179        u32 key0 = 0;
 180        u16 eth_type;
 181
 182        /* Only use first entry in cpus_available */
 183        cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
 184        if (!cpu_selected)
 185                return XDP_ABORTED;
 186        cpu_dest = *cpu_selected;
 187
 188        /* Validate packet length is minimum Eth header size */
 189        if (eth + 1 > data_end)
 190                return XDP_ABORTED;
 191
 192        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 193        if (!rec)
 194                return XDP_PASS;
 195        NO_TEAR_INC(rec->processed);
 196
 197        /* Read packet data, and use it (drop non 802.3 Ethertypes) */
 198        eth_type = eth->h_proto;
 199        if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
 200                NO_TEAR_INC(rec->dropped);
 201                return XDP_DROP;
 202        }
 203
 204        if (cpu_dest >= nr_cpus) {
 205                NO_TEAR_INC(rec->issue);
 206                return XDP_ABORTED;
 207        }
 208        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 209}
 210
 211SEC("xdp")
 212int  xdp_prognum2_round_robin(struct xdp_md *ctx)
 213{
 214        void *data_end = (void *)(long)ctx->data_end;
 215        void *data     = (void *)(long)ctx->data;
 216        u32 key = bpf_get_smp_processor_id();
 217        struct datarec *rec;
 218        u32 cpu_dest = 0;
 219        u32 key0 = 0;
 220
 221        u32 *cpu_selected;
 222        u32 *cpu_iterator;
 223        u32 *cpu_max;
 224        u32 cpu_idx;
 225
 226        cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
 227        if (!cpu_max)
 228                return XDP_ABORTED;
 229
 230        cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
 231        if (!cpu_iterator)
 232                return XDP_ABORTED;
 233        cpu_idx = *cpu_iterator;
 234
 235        *cpu_iterator += 1;
 236        if (*cpu_iterator == *cpu_max)
 237                *cpu_iterator = 0;
 238
 239        cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 240        if (!cpu_selected)
 241                return XDP_ABORTED;
 242        cpu_dest = *cpu_selected;
 243
 244        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 245        if (!rec)
 246                return XDP_PASS;
 247        NO_TEAR_INC(rec->processed);
 248
 249        if (cpu_dest >= nr_cpus) {
 250                NO_TEAR_INC(rec->issue);
 251                return XDP_ABORTED;
 252        }
 253        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 254}
 255
 256SEC("xdp")
 257int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
 258{
 259        void *data_end = (void *)(long)ctx->data_end;
 260        void *data     = (void *)(long)ctx->data;
 261        u32 key = bpf_get_smp_processor_id();
 262        struct ethhdr *eth = data;
 263        u8 ip_proto = IPPROTO_UDP;
 264        struct datarec *rec;
 265        u16 eth_proto = 0;
 266        u64 l3_offset = 0;
 267        u32 cpu_dest = 0;
 268        u32 *cpu_lookup;
 269        u32 cpu_idx = 0;
 270
 271        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 272        if (!rec)
 273                return XDP_PASS;
 274        NO_TEAR_INC(rec->processed);
 275
 276        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 277                return XDP_PASS; /* Just skip */
 278
 279        /* Extract L4 protocol */
 280        switch (eth_proto) {
 281        case ETH_P_IP:
 282                ip_proto = get_proto_ipv4(ctx, l3_offset);
 283                break;
 284        case ETH_P_IPV6:
 285                ip_proto = get_proto_ipv6(ctx, l3_offset);
 286                break;
 287        case ETH_P_ARP:
 288                cpu_idx = 0; /* ARP packet handled on separate CPU */
 289                break;
 290        default:
 291                cpu_idx = 0;
 292        }
 293
 294        /* Choose CPU based on L4 protocol */
 295        switch (ip_proto) {
 296        case IPPROTO_ICMP:
 297        case IPPROTO_ICMPV6:
 298                cpu_idx = 2;
 299                break;
 300        case IPPROTO_TCP:
 301                cpu_idx = 0;
 302                break;
 303        case IPPROTO_UDP:
 304                cpu_idx = 1;
 305                break;
 306        default:
 307                cpu_idx = 0;
 308        }
 309
 310        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 311        if (!cpu_lookup)
 312                return XDP_ABORTED;
 313        cpu_dest = *cpu_lookup;
 314
 315        if (cpu_dest >= nr_cpus) {
 316                NO_TEAR_INC(rec->issue);
 317                return XDP_ABORTED;
 318        }
 319        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 320}
 321
 322SEC("xdp")
 323int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
 324{
 325        void *data_end = (void *)(long)ctx->data_end;
 326        void *data     = (void *)(long)ctx->data;
 327        u32 key = bpf_get_smp_processor_id();
 328        struct ethhdr *eth = data;
 329        u8 ip_proto = IPPROTO_UDP;
 330        struct datarec *rec;
 331        u16 eth_proto = 0;
 332        u64 l3_offset = 0;
 333        u32 cpu_dest = 0;
 334        u32 *cpu_lookup;
 335        u32 cpu_idx = 0;
 336        u16 dest_port;
 337
 338        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 339        if (!rec)
 340                return XDP_PASS;
 341        NO_TEAR_INC(rec->processed);
 342
 343        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 344                return XDP_PASS; /* Just skip */
 345
 346        /* Extract L4 protocol */
 347        switch (eth_proto) {
 348        case ETH_P_IP:
 349                ip_proto = get_proto_ipv4(ctx, l3_offset);
 350                break;
 351        case ETH_P_IPV6:
 352                ip_proto = get_proto_ipv6(ctx, l3_offset);
 353                break;
 354        case ETH_P_ARP:
 355                cpu_idx = 0; /* ARP packet handled on separate CPU */
 356                break;
 357        default:
 358                cpu_idx = 0;
 359        }
 360
 361        /* Choose CPU based on L4 protocol */
 362        switch (ip_proto) {
 363        case IPPROTO_ICMP:
 364        case IPPROTO_ICMPV6:
 365                cpu_idx = 2;
 366                break;
 367        case IPPROTO_TCP:
 368                cpu_idx = 0;
 369                break;
 370        case IPPROTO_UDP:
 371                cpu_idx = 1;
 372                /* DDoS filter UDP port 9 (pktgen) */
 373                dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
 374                if (dest_port == 9) {
 375                        NO_TEAR_INC(rec->dropped);
 376                        return XDP_DROP;
 377                }
 378                break;
 379        default:
 380                cpu_idx = 0;
 381        }
 382
 383        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 384        if (!cpu_lookup)
 385                return XDP_ABORTED;
 386        cpu_dest = *cpu_lookup;
 387
 388        if (cpu_dest >= nr_cpus) {
 389                NO_TEAR_INC(rec->issue);
 390                return XDP_ABORTED;
 391        }
 392        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 393}
 394
 395/* Hashing initval */
 396#define INITVAL 15485863
 397
 398static __always_inline
 399u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 400{
 401        void *data_end = (void *)(long)ctx->data_end;
 402        void *data     = (void *)(long)ctx->data;
 403        struct iphdr *iph = data + nh_off;
 404        u32 cpu_hash;
 405
 406        if (iph + 1 > data_end)
 407                return 0;
 408
 409        cpu_hash = iph->saddr + iph->daddr;
 410        cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
 411
 412        return cpu_hash;
 413}
 414
 415static __always_inline
 416u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 417{
 418        void *data_end = (void *)(long)ctx->data_end;
 419        void *data     = (void *)(long)ctx->data;
 420        struct ipv6hdr *ip6h = data + nh_off;
 421        u32 cpu_hash;
 422
 423        if (ip6h + 1 > data_end)
 424                return 0;
 425
 426        cpu_hash  = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
 427        cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
 428        cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
 429        cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
 430        cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
 431
 432        return cpu_hash;
 433}
 434
 435/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
 436 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
 437 * same CPU.
 438 */
 439SEC("xdp")
 440int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
 441{
 442        void *data_end = (void *)(long)ctx->data_end;
 443        void *data     = (void *)(long)ctx->data;
 444        u32 key = bpf_get_smp_processor_id();
 445        struct ethhdr *eth = data;
 446        struct datarec *rec;
 447        u16 eth_proto = 0;
 448        u64 l3_offset = 0;
 449        u32 cpu_dest = 0;
 450        u32 cpu_idx = 0;
 451        u32 *cpu_lookup;
 452        u32 key0 = 0;
 453        u32 *cpu_max;
 454        u32 cpu_hash;
 455
 456        rec = bpf_map_lookup_elem(&rx_cnt, &key);
 457        if (!rec)
 458                return XDP_PASS;
 459        NO_TEAR_INC(rec->processed);
 460
 461        cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
 462        if (!cpu_max)
 463                return XDP_ABORTED;
 464
 465        if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 466                return XDP_PASS; /* Just skip */
 467
 468        /* Hash for IPv4 and IPv6 */
 469        switch (eth_proto) {
 470        case ETH_P_IP:
 471                cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
 472                break;
 473        case ETH_P_IPV6:
 474                cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
 475                break;
 476        case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
 477        default:
 478                cpu_hash = 0;
 479        }
 480
 481        /* Choose CPU based on hash */
 482        cpu_idx = cpu_hash % *cpu_max;
 483
 484        cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 485        if (!cpu_lookup)
 486                return XDP_ABORTED;
 487        cpu_dest = *cpu_lookup;
 488
 489        if (cpu_dest >= nr_cpus) {
 490                NO_TEAR_INC(rec->issue);
 491                return XDP_ABORTED;
 492        }
 493        return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 494}
 495
 496SEC("xdp_cpumap/redirect")
 497int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
 498{
 499        void *data_end = (void *)(long)ctx->data_end;
 500        void *data = (void *)(long)ctx->data;
 501        struct ethhdr *eth = data;
 502        u64 nh_off;
 503
 504        nh_off = sizeof(*eth);
 505        if (data + nh_off > data_end)
 506                return XDP_DROP;
 507
 508        swap_src_dst_mac(data);
 509        return bpf_redirect_map(&tx_port, 0, 0);
 510}
 511
 512SEC("xdp_cpumap/pass")
 513int xdp_redirect_cpu_pass(struct xdp_md *ctx)
 514{
 515        return XDP_PASS;
 516}
 517
 518SEC("xdp_cpumap/drop")
 519int xdp_redirect_cpu_drop(struct xdp_md *ctx)
 520{
 521        return XDP_DROP;
 522}
 523
 524SEC("xdp_devmap/egress")
 525int xdp_redirect_egress_prog(struct xdp_md *ctx)
 526{
 527        void *data_end = (void *)(long)ctx->data_end;
 528        void *data = (void *)(long)ctx->data;
 529        struct ethhdr *eth = data;
 530        u64 nh_off;
 531
 532        nh_off = sizeof(*eth);
 533        if (data + nh_off > data_end)
 534                return XDP_DROP;
 535
 536        __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
 537
 538        return XDP_PASS;
 539}
 540
 541char _license[] SEC("license") = "GPL";
 542