linux/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2// Copyright (c) 2017 Facebook
   3#include <stddef.h>
   4#include <stdbool.h>
   5#include <string.h>
   6#include <linux/pkt_cls.h>
   7#include <linux/bpf.h>
   8#include <linux/in.h>
   9#include <linux/if_ether.h>
  10#include <linux/ip.h>
  11#include <linux/ipv6.h>
  12#include <linux/icmp.h>
  13#include <linux/icmpv6.h>
  14#include <linux/tcp.h>
  15#include <linux/udp.h>
  16#include <bpf/bpf_helpers.h>
  17#include <bpf/bpf_endian.h>
  18
  19static __always_inline __u32 rol32(__u32 word, unsigned int shift)
  20{
  21        return (word << shift) | (word >> ((-shift) & 31));
  22}
  23
  24/* copy paste of jhash from kernel sources to make sure llvm
  25 * can compile it into valid sequence of bpf instructions
  26 */
  27#define __jhash_mix(a, b, c)                    \
  28{                                               \
  29        a -= c;  a ^= rol32(c, 4);  c += b;     \
  30        b -= a;  b ^= rol32(a, 6);  a += c;     \
  31        c -= b;  c ^= rol32(b, 8);  b += a;     \
  32        a -= c;  a ^= rol32(c, 16); c += b;     \
  33        b -= a;  b ^= rol32(a, 19); a += c;     \
  34        c -= b;  c ^= rol32(b, 4);  b += a;     \
  35}
  36
  37#define __jhash_final(a, b, c)                  \
  38{                                               \
  39        c ^= b; c -= rol32(b, 14);              \
  40        a ^= c; a -= rol32(c, 11);              \
  41        b ^= a; b -= rol32(a, 25);              \
  42        c ^= b; c -= rol32(b, 16);              \
  43        a ^= c; a -= rol32(c, 4);               \
  44        b ^= a; b -= rol32(a, 14);              \
  45        c ^= b; c -= rol32(b, 24);              \
  46}
  47
  48#define JHASH_INITVAL           0xdeadbeef
  49
  50typedef unsigned int u32;
  51
  52static __noinline
  53u32 jhash(const void *key, u32 length, u32 initval)
  54{
  55        u32 a, b, c;
  56        const unsigned char *k = key;
  57
  58        a = b = c = JHASH_INITVAL + length + initval;
  59
  60        while (length > 12) {
  61                a += *(u32 *)(k);
  62                b += *(u32 *)(k + 4);
  63                c += *(u32 *)(k + 8);
  64                __jhash_mix(a, b, c);
  65                length -= 12;
  66                k += 12;
  67        }
  68        switch (length) {
  69        case 12: c += (u32)k[11]<<24;
  70        case 11: c += (u32)k[10]<<16;
  71        case 10: c += (u32)k[9]<<8;
  72        case 9:  c += k[8];
  73        case 8:  b += (u32)k[7]<<24;
  74        case 7:  b += (u32)k[6]<<16;
  75        case 6:  b += (u32)k[5]<<8;
  76        case 5:  b += k[4];
  77        case 4:  a += (u32)k[3]<<24;
  78        case 3:  a += (u32)k[2]<<16;
  79        case 2:  a += (u32)k[1]<<8;
  80        case 1:  a += k[0];
  81                 __jhash_final(a, b, c);
  82        case 0: /* Nothing left to add */
  83                break;
  84        }
  85
  86        return c;
  87}
  88
  89__noinline
  90u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
  91{
  92        a += initval;
  93        b += initval;
  94        c += initval;
  95        __jhash_final(a, b, c);
  96        return c;
  97}
  98
  99__noinline
 100u32 jhash_2words(u32 a, u32 b, u32 initval)
 101{
 102        return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
 103}
 104
 105struct flow_key {
 106        union {
 107                __be32 src;
 108                __be32 srcv6[4];
 109        };
 110        union {
 111                __be32 dst;
 112                __be32 dstv6[4];
 113        };
 114        union {
 115                __u32 ports;
 116                __u16 port16[2];
 117        };
 118        __u8 proto;
 119};
 120
 121struct packet_description {
 122        struct flow_key flow;
 123        __u8 flags;
 124};
 125
 126struct ctl_value {
 127        union {
 128                __u64 value;
 129                __u32 ifindex;
 130                __u8 mac[6];
 131        };
 132};
 133
 134struct vip_definition {
 135        union {
 136                __be32 vip;
 137                __be32 vipv6[4];
 138        };
 139        __u16 port;
 140        __u16 family;
 141        __u8 proto;
 142};
 143
 144struct vip_meta {
 145        __u32 flags;
 146        __u32 vip_num;
 147};
 148
 149struct real_pos_lru {
 150        __u32 pos;
 151        __u64 atime;
 152};
 153
 154struct real_definition {
 155        union {
 156                __be32 dst;
 157                __be32 dstv6[4];
 158        };
 159        __u8 flags;
 160};
 161
 162struct lb_stats {
 163        __u64 v2;
 164        __u64 v1;
 165};
 166
 167struct {
 168        __uint(type, BPF_MAP_TYPE_HASH);
 169        __uint(max_entries, 512);
 170        __type(key, struct vip_definition);
 171        __type(value, struct vip_meta);
 172} vip_map SEC(".maps");
 173
 174struct {
 175        __uint(type, BPF_MAP_TYPE_LRU_HASH);
 176        __uint(max_entries, 300);
 177        __uint(map_flags, 1U << 1);
 178        __type(key, struct flow_key);
 179        __type(value, struct real_pos_lru);
 180} lru_cache SEC(".maps");
 181
 182struct {
 183        __uint(type, BPF_MAP_TYPE_ARRAY);
 184        __uint(max_entries, 12 * 655);
 185        __type(key, __u32);
 186        __type(value, __u32);
 187} ch_rings SEC(".maps");
 188
 189struct {
 190        __uint(type, BPF_MAP_TYPE_ARRAY);
 191        __uint(max_entries, 40);
 192        __type(key, __u32);
 193        __type(value, struct real_definition);
 194} reals SEC(".maps");
 195
 196struct {
 197        __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 198        __uint(max_entries, 515);
 199        __type(key, __u32);
 200        __type(value, struct lb_stats);
 201} stats SEC(".maps");
 202
 203struct {
 204        __uint(type, BPF_MAP_TYPE_ARRAY);
 205        __uint(max_entries, 16);
 206        __type(key, __u32);
 207        __type(value, struct ctl_value);
 208} ctl_array SEC(".maps");
 209
 210struct eth_hdr {
 211        unsigned char eth_dest[6];
 212        unsigned char eth_source[6];
 213        unsigned short eth_proto;
 214};
 215
 216static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
 217{
 218        __u64 off = sizeof(struct eth_hdr);
 219        if (is_ipv6) {
 220                off += sizeof(struct ipv6hdr);
 221                if (is_icmp)
 222                        off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
 223        } else {
 224                off += sizeof(struct iphdr);
 225                if (is_icmp)
 226                        off += sizeof(struct icmphdr) + sizeof(struct iphdr);
 227        }
 228        return off;
 229}
 230
 231static __attribute__ ((noinline))
 232bool parse_udp(void *data, void *data_end,
 233               bool is_ipv6, struct packet_description *pckt)
 234{
 235
 236        bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
 237        __u64 off = calc_offset(is_ipv6, is_icmp);
 238        struct udphdr *udp;
 239        udp = data + off;
 240
 241        if (udp + 1 > data_end)
 242                return 0;
 243        if (!is_icmp) {
 244                pckt->flow.port16[0] = udp->source;
 245                pckt->flow.port16[1] = udp->dest;
 246        } else {
 247                pckt->flow.port16[0] = udp->dest;
 248                pckt->flow.port16[1] = udp->source;
 249        }
 250        return 1;
 251}
 252
 253static __attribute__ ((noinline))
 254bool parse_tcp(void *data, void *data_end,
 255               bool is_ipv6, struct packet_description *pckt)
 256{
 257
 258        bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
 259        __u64 off = calc_offset(is_ipv6, is_icmp);
 260        struct tcphdr *tcp;
 261
 262        tcp = data + off;
 263        if (tcp + 1 > data_end)
 264                return 0;
 265        if (tcp->syn)
 266                pckt->flags |= (1 << 1);
 267        if (!is_icmp) {
 268                pckt->flow.port16[0] = tcp->source;
 269                pckt->flow.port16[1] = tcp->dest;
 270        } else {
 271                pckt->flow.port16[0] = tcp->dest;
 272                pckt->flow.port16[1] = tcp->source;
 273        }
 274        return 1;
 275}
 276
 277static __attribute__ ((noinline))
 278bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
 279              struct packet_description *pckt,
 280              struct real_definition *dst, __u32 pkt_bytes)
 281{
 282        struct eth_hdr *new_eth;
 283        struct eth_hdr *old_eth;
 284        struct ipv6hdr *ip6h;
 285        __u32 ip_suffix;
 286        void *data_end;
 287        void *data;
 288
 289        if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
 290                return 0;
 291        data = (void *)(long)xdp->data;
 292        data_end = (void *)(long)xdp->data_end;
 293        new_eth = data;
 294        ip6h = data + sizeof(struct eth_hdr);
 295        old_eth = data + sizeof(struct ipv6hdr);
 296        if (new_eth + 1 > data_end ||
 297            old_eth + 1 > data_end || ip6h + 1 > data_end)
 298                return 0;
 299        memcpy(new_eth->eth_dest, cval->mac, 6);
 300        memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
 301        new_eth->eth_proto = 56710;
 302        ip6h->version = 6;
 303        ip6h->priority = 0;
 304        memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
 305
 306        ip6h->nexthdr = IPPROTO_IPV6;
 307        ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
 308        ip6h->payload_len =
 309            bpf_htons(pkt_bytes + sizeof(struct ipv6hdr));
 310        ip6h->hop_limit = 4;
 311
 312        ip6h->saddr.in6_u.u6_addr32[0] = 1;
 313        ip6h->saddr.in6_u.u6_addr32[1] = 2;
 314        ip6h->saddr.in6_u.u6_addr32[2] = 3;
 315        ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
 316        memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
 317        return 1;
 318}
 319
 320static __attribute__ ((noinline))
 321bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
 322              struct packet_description *pckt,
 323              struct real_definition *dst, __u32 pkt_bytes)
 324{
 325
 326        __u32 ip_suffix = bpf_ntohs(pckt->flow.port16[0]);
 327        struct eth_hdr *new_eth;
 328        struct eth_hdr *old_eth;
 329        __u16 *next_iph_u16;
 330        struct iphdr *iph;
 331        __u32 csum = 0;
 332        void *data_end;
 333        void *data;
 334
 335        ip_suffix <<= 15;
 336        ip_suffix ^= pckt->flow.src;
 337        if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
 338                return 0;
 339        data = (void *)(long)xdp->data;
 340        data_end = (void *)(long)xdp->data_end;
 341        new_eth = data;
 342        iph = data + sizeof(struct eth_hdr);
 343        old_eth = data + sizeof(struct iphdr);
 344        if (new_eth + 1 > data_end ||
 345            old_eth + 1 > data_end || iph + 1 > data_end)
 346                return 0;
 347        memcpy(new_eth->eth_dest, cval->mac, 6);
 348        memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
 349        new_eth->eth_proto = 8;
 350        iph->version = 4;
 351        iph->ihl = 5;
 352        iph->frag_off = 0;
 353        iph->protocol = IPPROTO_IPIP;
 354        iph->check = 0;
 355        iph->tos = 1;
 356        iph->tot_len = bpf_htons(pkt_bytes + sizeof(struct iphdr));
 357        /* don't update iph->daddr, since it will overwrite old eth_proto
 358         * and multiple iterations of bpf_prog_run() will fail
 359         */
 360
 361        iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
 362        iph->ttl = 4;
 363
 364        next_iph_u16 = (__u16 *) iph;
 365#pragma clang loop unroll(full)
 366        for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
 367                csum += *next_iph_u16++;
 368        iph->check = ~((csum & 0xffff) + (csum >> 16));
 369        if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
 370                return 0;
 371        return 1;
 372}
 373
 374static __attribute__ ((noinline))
 375bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
 376{
 377        struct eth_hdr *new_eth;
 378        struct eth_hdr *old_eth;
 379
 380        old_eth = *data;
 381        new_eth = *data + sizeof(struct ipv6hdr);
 382        memcpy(new_eth->eth_source, old_eth->eth_source, 6);
 383        memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
 384        if (inner_v4)
 385                new_eth->eth_proto = 8;
 386        else
 387                new_eth->eth_proto = 56710;
 388        if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
 389                return 0;
 390        *data = (void *)(long)xdp->data;
 391        *data_end = (void *)(long)xdp->data_end;
 392        return 1;
 393}
 394
 395static __attribute__ ((noinline))
 396bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
 397{
 398        struct eth_hdr *new_eth;
 399        struct eth_hdr *old_eth;
 400
 401        old_eth = *data;
 402        new_eth = *data + sizeof(struct iphdr);
 403        memcpy(new_eth->eth_source, old_eth->eth_source, 6);
 404        memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
 405        new_eth->eth_proto = 8;
 406        if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
 407                return 0;
 408        *data = (void *)(long)xdp->data;
 409        *data_end = (void *)(long)xdp->data_end;
 410        return 1;
 411}
 412
 413static __attribute__ ((noinline))
 414int swap_mac_and_send(void *data, void *data_end)
 415{
 416        unsigned char tmp_mac[6];
 417        struct eth_hdr *eth;
 418
 419        eth = data;
 420        memcpy(tmp_mac, eth->eth_source, 6);
 421        memcpy(eth->eth_source, eth->eth_dest, 6);
 422        memcpy(eth->eth_dest, tmp_mac, 6);
 423        return XDP_TX;
 424}
 425
 426static __attribute__ ((noinline))
 427int send_icmp_reply(void *data, void *data_end)
 428{
 429        struct icmphdr *icmp_hdr;
 430        __u16 *next_iph_u16;
 431        __u32 tmp_addr = 0;
 432        struct iphdr *iph;
 433        __u32 csum1 = 0;
 434        __u32 csum = 0;
 435        __u64 off = 0;
 436
 437        if (data + sizeof(struct eth_hdr)
 438             + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
 439                return XDP_DROP;
 440        off += sizeof(struct eth_hdr);
 441        iph = data + off;
 442        off += sizeof(struct iphdr);
 443        icmp_hdr = data + off;
 444        icmp_hdr->type = 0;
 445        icmp_hdr->checksum += 0x0007;
 446        iph->ttl = 4;
 447        tmp_addr = iph->daddr;
 448        iph->daddr = iph->saddr;
 449        iph->saddr = tmp_addr;
 450        iph->check = 0;
 451        next_iph_u16 = (__u16 *) iph;
 452#pragma clang loop unroll(full)
 453        for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
 454                csum += *next_iph_u16++;
 455        iph->check = ~((csum & 0xffff) + (csum >> 16));
 456        return swap_mac_and_send(data, data_end);
 457}
 458
 459static __attribute__ ((noinline))
 460int send_icmp6_reply(void *data, void *data_end)
 461{
 462        struct icmp6hdr *icmp_hdr;
 463        struct ipv6hdr *ip6h;
 464        __be32 tmp_addr[4];
 465        __u64 off = 0;
 466
 467        if (data + sizeof(struct eth_hdr)
 468             + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
 469                return XDP_DROP;
 470        off += sizeof(struct eth_hdr);
 471        ip6h = data + off;
 472        off += sizeof(struct ipv6hdr);
 473        icmp_hdr = data + off;
 474        icmp_hdr->icmp6_type = 129;
 475        icmp_hdr->icmp6_cksum -= 0x0001;
 476        ip6h->hop_limit = 4;
 477        memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
 478        memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
 479        memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
 480        return swap_mac_and_send(data, data_end);
 481}
 482
 483static __attribute__ ((noinline))
 484int parse_icmpv6(void *data, void *data_end, __u64 off,
 485                 struct packet_description *pckt)
 486{
 487        struct icmp6hdr *icmp_hdr;
 488        struct ipv6hdr *ip6h;
 489
 490        icmp_hdr = data + off;
 491        if (icmp_hdr + 1 > data_end)
 492                return XDP_DROP;
 493        if (icmp_hdr->icmp6_type == 128)
 494                return send_icmp6_reply(data, data_end);
 495        if (icmp_hdr->icmp6_type != 3)
 496                return XDP_PASS;
 497        off += sizeof(struct icmp6hdr);
 498        ip6h = data + off;
 499        if (ip6h + 1 > data_end)
 500                return XDP_DROP;
 501        pckt->flow.proto = ip6h->nexthdr;
 502        pckt->flags |= (1 << 0);
 503        memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
 504        memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
 505        return -1;
 506}
 507
 508static __attribute__ ((noinline))
 509int parse_icmp(void *data, void *data_end, __u64 off,
 510               struct packet_description *pckt)
 511{
 512        struct icmphdr *icmp_hdr;
 513        struct iphdr *iph;
 514
 515        icmp_hdr = data + off;
 516        if (icmp_hdr + 1 > data_end)
 517                return XDP_DROP;
 518        if (icmp_hdr->type == 8)
 519                return send_icmp_reply(data, data_end);
 520        if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
 521                return XDP_PASS;
 522        off += sizeof(struct icmphdr);
 523        iph = data + off;
 524        if (iph + 1 > data_end)
 525                return XDP_DROP;
 526        if (iph->ihl != 5)
 527                return XDP_DROP;
 528        pckt->flow.proto = iph->protocol;
 529        pckt->flags |= (1 << 0);
 530        pckt->flow.src = iph->daddr;
 531        pckt->flow.dst = iph->saddr;
 532        return -1;
 533}
 534
 535static __attribute__ ((noinline))
 536__u32 get_packet_hash(struct packet_description *pckt,
 537                      bool hash_16bytes)
 538{
 539        if (hash_16bytes)
 540                return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
 541                                    pckt->flow.ports, 24);
 542        else
 543                return jhash_2words(pckt->flow.src, pckt->flow.ports,
 544                                    24);
 545}
 546
 547__attribute__ ((noinline))
 548static bool get_packet_dst(struct real_definition **real,
 549                           struct packet_description *pckt,
 550                           struct vip_meta *vip_info,
 551                           bool is_ipv6, void *lru_map)
 552{
 553        struct real_pos_lru new_dst_lru = { };
 554        bool hash_16bytes = is_ipv6;
 555        __u32 *real_pos, hash, key;
 556        __u64 cur_time;
 557
 558        if (vip_info->flags & (1 << 2))
 559                hash_16bytes = 1;
 560        if (vip_info->flags & (1 << 3)) {
 561                pckt->flow.port16[0] = pckt->flow.port16[1];
 562                memset(pckt->flow.srcv6, 0, 16);
 563        }
 564        hash = get_packet_hash(pckt, hash_16bytes);
 565        if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
 566            hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
 567                return 0;
 568        key = 2 * vip_info->vip_num + hash % 2;
 569        real_pos = bpf_map_lookup_elem(&ch_rings, &key);
 570        if (!real_pos)
 571                return 0;
 572        key = *real_pos;
 573        *real = bpf_map_lookup_elem(&reals, &key);
 574        if (!(*real))
 575                return 0;
 576        if (!(vip_info->flags & (1 << 1))) {
 577                __u32 conn_rate_key = 512 + 2;
 578                struct lb_stats *conn_rate_stats =
 579                    bpf_map_lookup_elem(&stats, &conn_rate_key);
 580
 581                if (!conn_rate_stats)
 582                        return 1;
 583                cur_time = bpf_ktime_get_ns();
 584                if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
 585                        conn_rate_stats->v1 = 1;
 586                        conn_rate_stats->v2 = cur_time;
 587                } else {
 588                        conn_rate_stats->v1 += 1;
 589                        if (conn_rate_stats->v1 >= 1)
 590                                return 1;
 591                }
 592                if (pckt->flow.proto == IPPROTO_UDP)
 593                        new_dst_lru.atime = cur_time;
 594                new_dst_lru.pos = key;
 595                bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
 596        }
 597        return 1;
 598}
 599
 600__attribute__ ((noinline))
 601static void connection_table_lookup(struct real_definition **real,
 602                                    struct packet_description *pckt,
 603                                    void *lru_map)
 604{
 605
 606        struct real_pos_lru *dst_lru;
 607        __u64 cur_time;
 608        __u32 key;
 609
 610        dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
 611        if (!dst_lru)
 612                return;
 613        if (pckt->flow.proto == IPPROTO_UDP) {
 614                cur_time = bpf_ktime_get_ns();
 615                if (cur_time - dst_lru->atime > 300000)
 616                        return;
 617                dst_lru->atime = cur_time;
 618        }
 619        key = dst_lru->pos;
 620        *real = bpf_map_lookup_elem(&reals, &key);
 621}
 622
 623/* don't believe your eyes!
 624 * below function has 6 arguments whereas bpf and llvm allow maximum of 5
 625 * but since it's _static_ llvm can optimize one argument away
 626 */
 627__attribute__ ((noinline))
 628static int process_l3_headers_v6(struct packet_description *pckt,
 629                                 __u8 *protocol, __u64 off,
 630                                 __u16 *pkt_bytes, void *data,
 631                                 void *data_end)
 632{
 633        struct ipv6hdr *ip6h;
 634        __u64 iph_len;
 635        int action;
 636
 637        ip6h = data + off;
 638        if (ip6h + 1 > data_end)
 639                return XDP_DROP;
 640        iph_len = sizeof(struct ipv6hdr);
 641        *protocol = ip6h->nexthdr;
 642        pckt->flow.proto = *protocol;
 643        *pkt_bytes = bpf_ntohs(ip6h->payload_len);
 644        off += iph_len;
 645        if (*protocol == 45) {
 646                return XDP_DROP;
 647        } else if (*protocol == 59) {
 648                action = parse_icmpv6(data, data_end, off, pckt);
 649                if (action >= 0)
 650                        return action;
 651        } else {
 652                memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
 653                memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
 654        }
 655        return -1;
 656}
 657
 658__attribute__ ((noinline))
 659static int process_l3_headers_v4(struct packet_description *pckt,
 660                                 __u8 *protocol, __u64 off,
 661                                 __u16 *pkt_bytes, void *data,
 662                                 void *data_end)
 663{
 664        struct iphdr *iph;
 665        __u64 iph_len;
 666        int action;
 667
 668        iph = data + off;
 669        if (iph + 1 > data_end)
 670                return XDP_DROP;
 671        if (iph->ihl != 5)
 672                return XDP_DROP;
 673        *protocol = iph->protocol;
 674        pckt->flow.proto = *protocol;
 675        *pkt_bytes = bpf_ntohs(iph->tot_len);
 676        off += 20;
 677        if (iph->frag_off & 65343)
 678                return XDP_DROP;
 679        if (*protocol == IPPROTO_ICMP) {
 680                action = parse_icmp(data, data_end, off, pckt);
 681                if (action >= 0)
 682                        return action;
 683        } else {
 684                pckt->flow.src = iph->saddr;
 685                pckt->flow.dst = iph->daddr;
 686        }
 687        return -1;
 688}
 689
 690__attribute__ ((noinline))
 691static int process_packet(void *data, __u64 off, void *data_end,
 692                          bool is_ipv6, struct xdp_md *xdp)
 693{
 694
 695        struct real_definition *dst = NULL;
 696        struct packet_description pckt = { };
 697        struct vip_definition vip = { };
 698        struct lb_stats *data_stats;
 699        struct eth_hdr *eth = data;
 700        void *lru_map = &lru_cache;
 701        struct vip_meta *vip_info;
 702        __u32 lru_stats_key = 513;
 703        __u32 mac_addr_pos = 0;
 704        __u32 stats_key = 512;
 705        struct ctl_value *cval;
 706        __u16 pkt_bytes;
 707        __u64 iph_len;
 708        __u8 protocol;
 709        __u32 vip_num;
 710        int action;
 711
 712        if (is_ipv6)
 713                action = process_l3_headers_v6(&pckt, &protocol, off,
 714                                               &pkt_bytes, data, data_end);
 715        else
 716                action = process_l3_headers_v4(&pckt, &protocol, off,
 717                                               &pkt_bytes, data, data_end);
 718        if (action >= 0)
 719                return action;
 720        protocol = pckt.flow.proto;
 721        if (protocol == IPPROTO_TCP) {
 722                if (!parse_tcp(data, data_end, is_ipv6, &pckt))
 723                        return XDP_DROP;
 724        } else if (protocol == IPPROTO_UDP) {
 725                if (!parse_udp(data, data_end, is_ipv6, &pckt))
 726                        return XDP_DROP;
 727        } else {
 728                return XDP_TX;
 729        }
 730
 731        if (is_ipv6)
 732                memcpy(vip.vipv6, pckt.flow.dstv6, 16);
 733        else
 734                vip.vip = pckt.flow.dst;
 735        vip.port = pckt.flow.port16[1];
 736        vip.proto = pckt.flow.proto;
 737        vip_info = bpf_map_lookup_elem(&vip_map, &vip);
 738        if (!vip_info) {
 739                vip.port = 0;
 740                vip_info = bpf_map_lookup_elem(&vip_map, &vip);
 741                if (!vip_info)
 742                        return XDP_PASS;
 743                if (!(vip_info->flags & (1 << 4)))
 744                        pckt.flow.port16[1] = 0;
 745        }
 746        if (data_end - data > 1400)
 747                return XDP_DROP;
 748        data_stats = bpf_map_lookup_elem(&stats, &stats_key);
 749        if (!data_stats)
 750                return XDP_DROP;
 751        data_stats->v1 += 1;
 752        if (!dst) {
 753                if (vip_info->flags & (1 << 0))
 754                        pckt.flow.port16[0] = 0;
 755                if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
 756                        connection_table_lookup(&dst, &pckt, lru_map);
 757                if (dst)
 758                        goto out;
 759                if (pckt.flow.proto == IPPROTO_TCP) {
 760                        struct lb_stats *lru_stats =
 761                            bpf_map_lookup_elem(&stats, &lru_stats_key);
 762
 763                        if (!lru_stats)
 764                                return XDP_DROP;
 765                        if (pckt.flags & (1 << 1))
 766                                lru_stats->v1 += 1;
 767                        else
 768                                lru_stats->v2 += 1;
 769                }
 770                if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
 771                        return XDP_DROP;
 772                data_stats->v2 += 1;
 773        }
 774out:
 775        cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
 776        if (!cval)
 777                return XDP_DROP;
 778        if (dst->flags & (1 << 0)) {
 779                if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
 780                        return XDP_DROP;
 781        } else {
 782                if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
 783                        return XDP_DROP;
 784        }
 785        vip_num = vip_info->vip_num;
 786        data_stats = bpf_map_lookup_elem(&stats, &vip_num);
 787        if (!data_stats)
 788                return XDP_DROP;
 789        data_stats->v1 += 1;
 790        data_stats->v2 += pkt_bytes;
 791
 792        data = (void *)(long)xdp->data;
 793        data_end = (void *)(long)xdp->data_end;
 794        if (data + 4 > data_end)
 795                return XDP_DROP;
 796        *(u32 *)data = dst->dst;
 797        return XDP_DROP;
 798}
 799
 800SEC("xdp-test-v4")
 801int balancer_ingress_v4(struct xdp_md *ctx)
 802{
 803        void *data = (void *)(long)ctx->data;
 804        void *data_end = (void *)(long)ctx->data_end;
 805        struct eth_hdr *eth = data;
 806        __u32 eth_proto;
 807        __u32 nh_off;
 808
 809        nh_off = sizeof(struct eth_hdr);
 810        if (data + nh_off > data_end)
 811                return XDP_DROP;
 812        eth_proto = bpf_ntohs(eth->eth_proto);
 813        if (eth_proto == ETH_P_IP)
 814                return process_packet(data, nh_off, data_end, 0, ctx);
 815        else
 816                return XDP_DROP;
 817}
 818
 819SEC("xdp-test-v6")
 820int balancer_ingress_v6(struct xdp_md *ctx)
 821{
 822        void *data = (void *)(long)ctx->data;
 823        void *data_end = (void *)(long)ctx->data_end;
 824        struct eth_hdr *eth = data;
 825        __u32 eth_proto;
 826        __u32 nh_off;
 827
 828        nh_off = sizeof(struct eth_hdr);
 829        if (data + nh_off > data_end)
 830                return XDP_DROP;
 831        eth_proto = bpf_ntohs(eth->eth_proto);
 832        if (eth_proto == ETH_P_IPV6)
 833                return process_packet(data, nh_off, data_end, 1, ctx);
 834        else
 835                return XDP_DROP;
 836}
 837
 838char _license[] SEC("license") = "GPL";
 839