linux/tools/testing/selftests/bpf/progs/bpf_flow.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <limits.h>
   3#include <stddef.h>
   4#include <stdbool.h>
   5#include <string.h>
   6#include <linux/pkt_cls.h>
   7#include <linux/bpf.h>
   8#include <linux/in.h>
   9#include <linux/if_ether.h>
  10#include <linux/icmp.h>
  11#include <linux/ip.h>
  12#include <linux/ipv6.h>
  13#include <linux/tcp.h>
  14#include <linux/udp.h>
  15#include <linux/if_packet.h>
  16#include <sys/socket.h>
  17#include <linux/if_tunnel.h>
  18#include <linux/mpls.h>
  19#include "bpf_helpers.h"
  20#include "bpf_endian.h"
  21
  22int _version SEC("version") = 1;
  23#define PROG(F) SEC(#F) int bpf_func_##F
  24
  25/* These are the identifiers of the BPF programs that will be used in tail
  26 * calls. Name is limited to 16 characters, with the terminating character and
  27 * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
  28 */
  29enum {
  30        IP,
  31        IPV6,
  32        IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
  33        IPV6FR, /* Fragmentation IPv6 Extension Header */
  34        MPLS,
  35        VLAN,
  36};
  37
  38#define IP_MF           0x2000
  39#define IP_OFFSET       0x1FFF
  40#define IP6_MF          0x0001
  41#define IP6_OFFSET      0xFFF8
  42
  43struct vlan_hdr {
  44        __be16 h_vlan_TCI;
  45        __be16 h_vlan_encapsulated_proto;
  46};
  47
  48struct gre_hdr {
  49        __be16 flags;
  50        __be16 proto;
  51};
  52
  53struct frag_hdr {
  54        __u8 nexthdr;
  55        __u8 reserved;
  56        __be16 frag_off;
  57        __be32 identification;
  58};
  59
  60struct {
  61        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
  62        __uint(max_entries, 8);
  63        __uint(key_size, sizeof(__u32));
  64        __uint(value_size, sizeof(__u32));
  65} jmp_table SEC(".maps");
  66
  67struct {
  68        __uint(type, BPF_MAP_TYPE_HASH);
  69        __uint(max_entries, 1024);
  70        __type(key, __u32);
  71        __type(value, struct bpf_flow_keys);
  72} last_dissection SEC(".maps");
  73
  74static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
  75                                            int ret)
  76{
  77        __u32 key = (__u32)(keys->sport) << 16 | keys->dport;
  78        struct bpf_flow_keys val;
  79
  80        memcpy(&val, keys, sizeof(val));
  81        bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
  82        return ret;
  83}
  84
  85#define IPV6_FLOWLABEL_MASK             __bpf_constant_htonl(0x000FFFFF)
  86static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
  87{
  88        return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
  89}
  90
  91static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
  92                                                         __u16 hdr_size,
  93                                                         void *buffer)
  94{
  95        void *data_end = (void *)(long)skb->data_end;
  96        void *data = (void *)(long)skb->data;
  97        __u16 thoff = skb->flow_keys->thoff;
  98        __u8 *hdr;
  99
 100        /* Verifies this variable offset does not overflow */
 101        if (thoff > (USHRT_MAX - hdr_size))
 102                return NULL;
 103
 104        hdr = data + thoff;
 105        if (hdr + hdr_size <= data_end)
 106                return hdr;
 107
 108        if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
 109                return NULL;
 110
 111        return buffer;
 112}
 113
 114/* Dispatches on ETHERTYPE */
 115static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 116{
 117        struct bpf_flow_keys *keys = skb->flow_keys;
 118
 119        switch (proto) {
 120        case bpf_htons(ETH_P_IP):
 121                bpf_tail_call(skb, &jmp_table, IP);
 122                break;
 123        case bpf_htons(ETH_P_IPV6):
 124                bpf_tail_call(skb, &jmp_table, IPV6);
 125                break;
 126        case bpf_htons(ETH_P_MPLS_MC):
 127        case bpf_htons(ETH_P_MPLS_UC):
 128                bpf_tail_call(skb, &jmp_table, MPLS);
 129                break;
 130        case bpf_htons(ETH_P_8021Q):
 131        case bpf_htons(ETH_P_8021AD):
 132                bpf_tail_call(skb, &jmp_table, VLAN);
 133                break;
 134        default:
 135                /* Protocol not supported */
 136                return export_flow_keys(keys, BPF_DROP);
 137        }
 138
 139        return export_flow_keys(keys, BPF_DROP);
 140}
 141
 142SEC("flow_dissector")
 143int _dissect(struct __sk_buff *skb)
 144{
 145        struct bpf_flow_keys *keys = skb->flow_keys;
 146
 147        return parse_eth_proto(skb, keys->n_proto);
 148}
 149
 150/* Parses on IPPROTO_* */
 151static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
 152{
 153        struct bpf_flow_keys *keys = skb->flow_keys;
 154        void *data_end = (void *)(long)skb->data_end;
 155        struct icmphdr *icmp, _icmp;
 156        struct gre_hdr *gre, _gre;
 157        struct ethhdr *eth, _eth;
 158        struct tcphdr *tcp, _tcp;
 159        struct udphdr *udp, _udp;
 160
 161        switch (proto) {
 162        case IPPROTO_ICMP:
 163                icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
 164                if (!icmp)
 165                        return export_flow_keys(keys, BPF_DROP);
 166                return export_flow_keys(keys, BPF_OK);
 167        case IPPROTO_IPIP:
 168                keys->is_encap = true;
 169                if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 170                        return export_flow_keys(keys, BPF_OK);
 171
 172                return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
 173        case IPPROTO_IPV6:
 174                keys->is_encap = true;
 175                if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 176                        return export_flow_keys(keys, BPF_OK);
 177
 178                return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
 179        case IPPROTO_GRE:
 180                gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
 181                if (!gre)
 182                        return export_flow_keys(keys, BPF_DROP);
 183
 184                if (bpf_htons(gre->flags & GRE_VERSION))
 185                        /* Only inspect standard GRE packets with version 0 */
 186                        return export_flow_keys(keys, BPF_OK);
 187
 188                keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
 189                if (GRE_IS_CSUM(gre->flags))
 190                        keys->thoff += 4; /* Step over chksum and Padding */
 191                if (GRE_IS_KEY(gre->flags))
 192                        keys->thoff += 4; /* Step over key */
 193                if (GRE_IS_SEQ(gre->flags))
 194                        keys->thoff += 4; /* Step over sequence number */
 195
 196                keys->is_encap = true;
 197                if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 198                        return export_flow_keys(keys, BPF_OK);
 199
 200                if (gre->proto == bpf_htons(ETH_P_TEB)) {
 201                        eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
 202                                                          &_eth);
 203                        if (!eth)
 204                                return export_flow_keys(keys, BPF_DROP);
 205
 206                        keys->thoff += sizeof(*eth);
 207
 208                        return parse_eth_proto(skb, eth->h_proto);
 209                } else {
 210                        return parse_eth_proto(skb, gre->proto);
 211                }
 212        case IPPROTO_TCP:
 213                tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
 214                if (!tcp)
 215                        return export_flow_keys(keys, BPF_DROP);
 216
 217                if (tcp->doff < 5)
 218                        return export_flow_keys(keys, BPF_DROP);
 219
 220                if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
 221                        return export_flow_keys(keys, BPF_DROP);
 222
 223                keys->sport = tcp->source;
 224                keys->dport = tcp->dest;
 225                return export_flow_keys(keys, BPF_OK);
 226        case IPPROTO_UDP:
 227        case IPPROTO_UDPLITE:
 228                udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
 229                if (!udp)
 230                        return export_flow_keys(keys, BPF_DROP);
 231
 232                keys->sport = udp->source;
 233                keys->dport = udp->dest;
 234                return export_flow_keys(keys, BPF_OK);
 235        default:
 236                return export_flow_keys(keys, BPF_DROP);
 237        }
 238
 239        return export_flow_keys(keys, BPF_DROP);
 240}
 241
 242static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
 243{
 244        struct bpf_flow_keys *keys = skb->flow_keys;
 245
 246        switch (nexthdr) {
 247        case IPPROTO_HOPOPTS:
 248        case IPPROTO_DSTOPTS:
 249                bpf_tail_call(skb, &jmp_table, IPV6OP);
 250                break;
 251        case IPPROTO_FRAGMENT:
 252                bpf_tail_call(skb, &jmp_table, IPV6FR);
 253                break;
 254        default:
 255                return parse_ip_proto(skb, nexthdr);
 256        }
 257
 258        return export_flow_keys(keys, BPF_DROP);
 259}
 260
 261PROG(IP)(struct __sk_buff *skb)
 262{
 263        void *data_end = (void *)(long)skb->data_end;
 264        struct bpf_flow_keys *keys = skb->flow_keys;
 265        void *data = (void *)(long)skb->data;
 266        struct iphdr *iph, _iph;
 267        bool done = false;
 268
 269        iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
 270        if (!iph)
 271                return export_flow_keys(keys, BPF_DROP);
 272
 273        /* IP header cannot be smaller than 20 bytes */
 274        if (iph->ihl < 5)
 275                return export_flow_keys(keys, BPF_DROP);
 276
 277        keys->addr_proto = ETH_P_IP;
 278        keys->ipv4_src = iph->saddr;
 279        keys->ipv4_dst = iph->daddr;
 280        keys->ip_proto = iph->protocol;
 281
 282        keys->thoff += iph->ihl << 2;
 283        if (data + keys->thoff > data_end)
 284                return export_flow_keys(keys, BPF_DROP);
 285
 286        if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
 287                keys->is_frag = true;
 288                if (iph->frag_off & bpf_htons(IP_OFFSET)) {
 289                        /* From second fragment on, packets do not have headers
 290                         * we can parse.
 291                         */
 292                        done = true;
 293                } else {
 294                        keys->is_first_frag = true;
 295                        /* No need to parse fragmented packet unless
 296                         * explicitly asked for.
 297                         */
 298                        if (!(keys->flags &
 299                              BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
 300                                done = true;
 301                }
 302        }
 303
 304        if (done)
 305                return export_flow_keys(keys, BPF_OK);
 306
 307        return parse_ip_proto(skb, iph->protocol);
 308}
 309
 310PROG(IPV6)(struct __sk_buff *skb)
 311{
 312        struct bpf_flow_keys *keys = skb->flow_keys;
 313        struct ipv6hdr *ip6h, _ip6h;
 314
 315        ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
 316        if (!ip6h)
 317                return export_flow_keys(keys, BPF_DROP);
 318
 319        keys->addr_proto = ETH_P_IPV6;
 320        memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
 321
 322        keys->thoff += sizeof(struct ipv6hdr);
 323        keys->ip_proto = ip6h->nexthdr;
 324        keys->flow_label = ip6_flowlabel(ip6h);
 325
 326        if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
 327                return export_flow_keys(keys, BPF_OK);
 328
 329        return parse_ipv6_proto(skb, ip6h->nexthdr);
 330}
 331
 332PROG(IPV6OP)(struct __sk_buff *skb)
 333{
 334        struct bpf_flow_keys *keys = skb->flow_keys;
 335        struct ipv6_opt_hdr *ip6h, _ip6h;
 336
 337        ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
 338        if (!ip6h)
 339                return export_flow_keys(keys, BPF_DROP);
 340
 341        /* hlen is in 8-octets and does not include the first 8 bytes
 342         * of the header
 343         */
 344        keys->thoff += (1 + ip6h->hdrlen) << 3;
 345        keys->ip_proto = ip6h->nexthdr;
 346
 347        return parse_ipv6_proto(skb, ip6h->nexthdr);
 348}
 349
 350PROG(IPV6FR)(struct __sk_buff *skb)
 351{
 352        struct bpf_flow_keys *keys = skb->flow_keys;
 353        struct frag_hdr *fragh, _fragh;
 354
 355        fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
 356        if (!fragh)
 357                return export_flow_keys(keys, BPF_DROP);
 358
 359        keys->thoff += sizeof(*fragh);
 360        keys->is_frag = true;
 361        keys->ip_proto = fragh->nexthdr;
 362
 363        if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
 364                keys->is_first_frag = true;
 365
 366                /* No need to parse fragmented packet unless
 367                 * explicitly asked for.
 368                 */
 369                if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
 370                        return export_flow_keys(keys, BPF_OK);
 371        }
 372
 373        return parse_ipv6_proto(skb, fragh->nexthdr);
 374}
 375
 376PROG(MPLS)(struct __sk_buff *skb)
 377{
 378        struct bpf_flow_keys *keys = skb->flow_keys;
 379        struct mpls_label *mpls, _mpls;
 380
 381        mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
 382        if (!mpls)
 383                return export_flow_keys(keys, BPF_DROP);
 384
 385        return export_flow_keys(keys, BPF_OK);
 386}
 387
 388PROG(VLAN)(struct __sk_buff *skb)
 389{
 390        struct bpf_flow_keys *keys = skb->flow_keys;
 391        struct vlan_hdr *vlan, _vlan;
 392
 393        /* Account for double-tagging */
 394        if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
 395                vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
 396                if (!vlan)
 397                        return export_flow_keys(keys, BPF_DROP);
 398
 399                if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
 400                        return export_flow_keys(keys, BPF_DROP);
 401
 402                keys->nhoff += sizeof(*vlan);
 403                keys->thoff += sizeof(*vlan);
 404        }
 405
 406        vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
 407        if (!vlan)
 408                return export_flow_keys(keys, BPF_DROP);
 409
 410        keys->nhoff += sizeof(*vlan);
 411        keys->thoff += sizeof(*vlan);
 412        /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
 413        if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
 414            vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
 415                return export_flow_keys(keys, BPF_DROP);
 416
 417        keys->n_proto = vlan->h_vlan_encapsulated_proto;
 418        return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
 419}
 420
 421char __license[] SEC("license") = "GPL";
 422