linux/samples/bpf/sockex3_kern.c
<<
>>
Prefs
   1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 */
   7#include <uapi/linux/bpf.h>
   8#include <uapi/linux/in.h>
   9#include <uapi/linux/if.h>
  10#include <uapi/linux/if_ether.h>
  11#include <uapi/linux/ip.h>
  12#include <uapi/linux/ipv6.h>
  13#include <uapi/linux/if_tunnel.h>
  14#include <uapi/linux/mpls.h>
  15#include <bpf/bpf_helpers.h>
  16#include "bpf_legacy.h"
  17#define IP_MF           0x2000
  18#define IP_OFFSET       0x1FFF
  19
  20#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
  21
  22struct {
  23        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
  24        __uint(key_size, sizeof(u32));
  25        __uint(value_size, sizeof(u32));
  26        __uint(max_entries, 8);
  27} jmp_table SEC(".maps");
  28
  29#define PARSE_VLAN 1
  30#define PARSE_MPLS 2
  31#define PARSE_IP 3
  32#define PARSE_IPV6 4
  33
  34/* Protocol dispatch routine. It tail-calls next BPF program depending
  35 * on eth proto. Note, we could have used ...
  36 *
  37 *   bpf_tail_call(skb, &jmp_table, proto);
  38 *
  39 * ... but it would need large prog_array and cannot be optimised given
  40 * the map key is not static.
  41 */
  42static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
  43{
  44        switch (proto) {
  45        case ETH_P_8021Q:
  46        case ETH_P_8021AD:
  47                bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
  48                break;
  49        case ETH_P_MPLS_UC:
  50        case ETH_P_MPLS_MC:
  51                bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
  52                break;
  53        case ETH_P_IP:
  54                bpf_tail_call(skb, &jmp_table, PARSE_IP);
  55                break;
  56        case ETH_P_IPV6:
  57                bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
  58                break;
  59        }
  60}
  61
  62struct vlan_hdr {
  63        __be16 h_vlan_TCI;
  64        __be16 h_vlan_encapsulated_proto;
  65};
  66
  67struct flow_key_record {
  68        __be32 src;
  69        __be32 dst;
  70        union {
  71                __be32 ports;
  72                __be16 port16[2];
  73        };
  74        __u32 ip_proto;
  75};
  76
  77static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
  78{
  79        return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
  80                & (IP_MF | IP_OFFSET);
  81}
  82
  83static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
  84{
  85        __u64 w0 = load_word(ctx, off);
  86        __u64 w1 = load_word(ctx, off + 4);
  87        __u64 w2 = load_word(ctx, off + 8);
  88        __u64 w3 = load_word(ctx, off + 12);
  89
  90        return (__u32)(w0 ^ w1 ^ w2 ^ w3);
  91}
  92
  93struct globals {
  94        struct flow_key_record flow;
  95};
  96
  97struct {
  98        __uint(type, BPF_MAP_TYPE_ARRAY);
  99        __type(key, __u32);
 100        __type(value, struct globals);
 101        __uint(max_entries, 32);
 102} percpu_map SEC(".maps");
 103
 104/* user poor man's per_cpu until native support is ready */
 105static struct globals *this_cpu_globals(void)
 106{
 107        u32 key = bpf_get_smp_processor_id();
 108
 109        return bpf_map_lookup_elem(&percpu_map, &key);
 110}
 111
 112/* some simple stats for user space consumption */
 113struct pair {
 114        __u64 packets;
 115        __u64 bytes;
 116};
 117
 118struct {
 119        __uint(type, BPF_MAP_TYPE_HASH);
 120        __type(key, struct flow_key_record);
 121        __type(value, struct pair);
 122        __uint(max_entries, 1024);
 123} hash_map SEC(".maps");
 124
 125static void update_stats(struct __sk_buff *skb, struct globals *g)
 126{
 127        struct flow_key_record key = g->flow;
 128        struct pair *value;
 129
 130        value = bpf_map_lookup_elem(&hash_map, &key);
 131        if (value) {
 132                __sync_fetch_and_add(&value->packets, 1);
 133                __sync_fetch_and_add(&value->bytes, skb->len);
 134        } else {
 135                struct pair val = {1, skb->len};
 136
 137                bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
 138        }
 139}
 140
 141static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 142                                           struct globals *g, __u32 ip_proto)
 143{
 144        __u32 nhoff = skb->cb[0];
 145        int poff;
 146
 147        switch (ip_proto) {
 148        case IPPROTO_GRE: {
 149                struct gre_hdr {
 150                        __be16 flags;
 151                        __be16 proto;
 152                };
 153
 154                __u32 gre_flags = load_half(skb,
 155                                            nhoff + offsetof(struct gre_hdr, flags));
 156                __u32 gre_proto = load_half(skb,
 157                                            nhoff + offsetof(struct gre_hdr, proto));
 158
 159                if (gre_flags & (GRE_VERSION|GRE_ROUTING))
 160                        break;
 161
 162                nhoff += 4;
 163                if (gre_flags & GRE_CSUM)
 164                        nhoff += 4;
 165                if (gre_flags & GRE_KEY)
 166                        nhoff += 4;
 167                if (gre_flags & GRE_SEQ)
 168                        nhoff += 4;
 169
 170                skb->cb[0] = nhoff;
 171                parse_eth_proto(skb, gre_proto);
 172                break;
 173        }
 174        case IPPROTO_IPIP:
 175                parse_eth_proto(skb, ETH_P_IP);
 176                break;
 177        case IPPROTO_IPV6:
 178                parse_eth_proto(skb, ETH_P_IPV6);
 179                break;
 180        case IPPROTO_TCP:
 181        case IPPROTO_UDP:
 182                g->flow.ports = load_word(skb, nhoff);
 183        case IPPROTO_ICMP:
 184                g->flow.ip_proto = ip_proto;
 185                update_stats(skb, g);
 186                break;
 187        default:
 188                break;
 189        }
 190}
 191
 192PROG(PARSE_IP)(struct __sk_buff *skb)
 193{
 194        struct globals *g = this_cpu_globals();
 195        __u32 nhoff, verlen, ip_proto;
 196
 197        if (!g)
 198                return 0;
 199
 200        nhoff = skb->cb[0];
 201
 202        if (unlikely(ip_is_fragment(skb, nhoff)))
 203                return 0;
 204
 205        ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
 206
 207        if (ip_proto != IPPROTO_GRE) {
 208                g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
 209                g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
 210        }
 211
 212        verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
 213        nhoff += (verlen & 0xF) << 2;
 214
 215        skb->cb[0] = nhoff;
 216        parse_ip_proto(skb, g, ip_proto);
 217        return 0;
 218}
 219
 220PROG(PARSE_IPV6)(struct __sk_buff *skb)
 221{
 222        struct globals *g = this_cpu_globals();
 223        __u32 nhoff, ip_proto;
 224
 225        if (!g)
 226                return 0;
 227
 228        nhoff = skb->cb[0];
 229
 230        ip_proto = load_byte(skb,
 231                             nhoff + offsetof(struct ipv6hdr, nexthdr));
 232        g->flow.src = ipv6_addr_hash(skb,
 233                                     nhoff + offsetof(struct ipv6hdr, saddr));
 234        g->flow.dst = ipv6_addr_hash(skb,
 235                                     nhoff + offsetof(struct ipv6hdr, daddr));
 236        nhoff += sizeof(struct ipv6hdr);
 237
 238        skb->cb[0] = nhoff;
 239        parse_ip_proto(skb, g, ip_proto);
 240        return 0;
 241}
 242
 243PROG(PARSE_VLAN)(struct __sk_buff *skb)
 244{
 245        __u32 nhoff, proto;
 246
 247        nhoff = skb->cb[0];
 248
 249        proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
 250                                                h_vlan_encapsulated_proto));
 251        nhoff += sizeof(struct vlan_hdr);
 252        skb->cb[0] = nhoff;
 253
 254        parse_eth_proto(skb, proto);
 255
 256        return 0;
 257}
 258
 259PROG(PARSE_MPLS)(struct __sk_buff *skb)
 260{
 261        __u32 nhoff, label;
 262
 263        nhoff = skb->cb[0];
 264
 265        label = load_word(skb, nhoff);
 266        nhoff += sizeof(struct mpls_label);
 267        skb->cb[0] = nhoff;
 268
 269        if (label & MPLS_LS_S_MASK) {
 270                __u8 verlen = load_byte(skb, nhoff);
 271                if ((verlen & 0xF0) == 4)
 272                        parse_eth_proto(skb, ETH_P_IP);
 273                else
 274                        parse_eth_proto(skb, ETH_P_IPV6);
 275        } else {
 276                parse_eth_proto(skb, ETH_P_MPLS_UC);
 277        }
 278
 279        return 0;
 280}
 281
 282SEC("socket/0")
 283int main_prog(struct __sk_buff *skb)
 284{
 285        __u32 nhoff = ETH_HLEN;
 286        __u32 proto = load_half(skb, 12);
 287
 288        skb->cb[0] = nhoff;
 289        parse_eth_proto(skb, proto);
 290        return 0;
 291}
 292
 293char _license[] SEC("license") = "GPL";
 294