linux/samples/bpf/sockex3_kern.c
<<
>>
Prefs
   1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 */
   7#include <uapi/linux/bpf.h>
   8#include "bpf_helpers.h"
   9#include <uapi/linux/in.h>
  10#include <uapi/linux/if.h>
  11#include <uapi/linux/if_ether.h>
  12#include <uapi/linux/ip.h>
  13#include <uapi/linux/ipv6.h>
  14#include <uapi/linux/if_tunnel.h>
  15#include <uapi/linux/mpls.h>
  16#define IP_MF           0x2000
  17#define IP_OFFSET       0x1FFF
  18
  19#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
  20
  21struct bpf_map_def SEC("maps") jmp_table = {
  22        .type = BPF_MAP_TYPE_PROG_ARRAY,
  23        .key_size = sizeof(u32),
  24        .value_size = sizeof(u32),
  25        .max_entries = 8,
  26};
  27
  28#define PARSE_VLAN 1
  29#define PARSE_MPLS 2
  30#define PARSE_IP 3
  31#define PARSE_IPV6 4
  32
  33/* protocol dispatch routine.
  34 * It tail-calls next BPF program depending on eth proto
  35 * Note, we could have used:
  36 * bpf_tail_call(skb, &jmp_table, proto);
  37 * but it would need large prog_array
  38 */
  39static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
  40{
  41        switch (proto) {
  42        case ETH_P_8021Q:
  43        case ETH_P_8021AD:
  44                bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
  45                break;
  46        case ETH_P_MPLS_UC:
  47        case ETH_P_MPLS_MC:
  48                bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
  49                break;
  50        case ETH_P_IP:
  51                bpf_tail_call(skb, &jmp_table, PARSE_IP);
  52                break;
  53        case ETH_P_IPV6:
  54                bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
  55                break;
  56        }
  57}
  58
  59struct vlan_hdr {
  60        __be16 h_vlan_TCI;
  61        __be16 h_vlan_encapsulated_proto;
  62};
  63
  64struct flow_key_record {
  65        __be32 src;
  66        __be32 dst;
  67        union {
  68                __be32 ports;
  69                __be16 port16[2];
  70        };
  71        __u32 ip_proto;
  72};
  73
  74static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
  75{
  76        return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
  77                & (IP_MF | IP_OFFSET);
  78}
  79
  80static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
  81{
  82        __u64 w0 = load_word(ctx, off);
  83        __u64 w1 = load_word(ctx, off + 4);
  84        __u64 w2 = load_word(ctx, off + 8);
  85        __u64 w3 = load_word(ctx, off + 12);
  86
  87        return (__u32)(w0 ^ w1 ^ w2 ^ w3);
  88}
  89
  90struct globals {
  91        struct flow_key_record flow;
  92};
  93
  94struct bpf_map_def SEC("maps") percpu_map = {
  95        .type = BPF_MAP_TYPE_ARRAY,
  96        .key_size = sizeof(__u32),
  97        .value_size = sizeof(struct globals),
  98        .max_entries = 32,
  99};
 100
 101/* user poor man's per_cpu until native support is ready */
 102static struct globals *this_cpu_globals(void)
 103{
 104        u32 key = bpf_get_smp_processor_id();
 105
 106        return bpf_map_lookup_elem(&percpu_map, &key);
 107}
 108
 109/* some simple stats for user space consumption */
 110struct pair {
 111        __u64 packets;
 112        __u64 bytes;
 113};
 114
 115struct bpf_map_def SEC("maps") hash_map = {
 116        .type = BPF_MAP_TYPE_HASH,
 117        .key_size = sizeof(struct flow_key_record),
 118        .value_size = sizeof(struct pair),
 119        .max_entries = 1024,
 120};
 121
 122static void update_stats(struct __sk_buff *skb, struct globals *g)
 123{
 124        struct flow_key_record key = g->flow;
 125        struct pair *value;
 126
 127        value = bpf_map_lookup_elem(&hash_map, &key);
 128        if (value) {
 129                __sync_fetch_and_add(&value->packets, 1);
 130                __sync_fetch_and_add(&value->bytes, skb->len);
 131        } else {
 132                struct pair val = {1, skb->len};
 133
 134                bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
 135        }
 136}
 137
 138static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 139                                           struct globals *g, __u32 ip_proto)
 140{
 141        __u32 nhoff = skb->cb[0];
 142        int poff;
 143
 144        switch (ip_proto) {
 145        case IPPROTO_GRE: {
 146                struct gre_hdr {
 147                        __be16 flags;
 148                        __be16 proto;
 149                };
 150
 151                __u32 gre_flags = load_half(skb,
 152                                            nhoff + offsetof(struct gre_hdr, flags));
 153                __u32 gre_proto = load_half(skb,
 154                                            nhoff + offsetof(struct gre_hdr, proto));
 155
 156                if (gre_flags & (GRE_VERSION|GRE_ROUTING))
 157                        break;
 158
 159                nhoff += 4;
 160                if (gre_flags & GRE_CSUM)
 161                        nhoff += 4;
 162                if (gre_flags & GRE_KEY)
 163                        nhoff += 4;
 164                if (gre_flags & GRE_SEQ)
 165                        nhoff += 4;
 166
 167                skb->cb[0] = nhoff;
 168                parse_eth_proto(skb, gre_proto);
 169                break;
 170        }
 171        case IPPROTO_IPIP:
 172                parse_eth_proto(skb, ETH_P_IP);
 173                break;
 174        case IPPROTO_IPV6:
 175                parse_eth_proto(skb, ETH_P_IPV6);
 176                break;
 177        case IPPROTO_TCP:
 178        case IPPROTO_UDP:
 179                g->flow.ports = load_word(skb, nhoff);
 180        case IPPROTO_ICMP:
 181                g->flow.ip_proto = ip_proto;
 182                update_stats(skb, g);
 183                break;
 184        default:
 185                break;
 186        }
 187}
 188
 189PROG(PARSE_IP)(struct __sk_buff *skb)
 190{
 191        struct globals *g = this_cpu_globals();
 192        __u32 nhoff, verlen, ip_proto;
 193
 194        if (!g)
 195                return 0;
 196
 197        nhoff = skb->cb[0];
 198
 199        if (unlikely(ip_is_fragment(skb, nhoff)))
 200                return 0;
 201
 202        ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
 203
 204        if (ip_proto != IPPROTO_GRE) {
 205                g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
 206                g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
 207        }
 208
 209        verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
 210        nhoff += (verlen & 0xF) << 2;
 211
 212        skb->cb[0] = nhoff;
 213        parse_ip_proto(skb, g, ip_proto);
 214        return 0;
 215}
 216
 217PROG(PARSE_IPV6)(struct __sk_buff *skb)
 218{
 219        struct globals *g = this_cpu_globals();
 220        __u32 nhoff, ip_proto;
 221
 222        if (!g)
 223                return 0;
 224
 225        nhoff = skb->cb[0];
 226
 227        ip_proto = load_byte(skb,
 228                             nhoff + offsetof(struct ipv6hdr, nexthdr));
 229        g->flow.src = ipv6_addr_hash(skb,
 230                                     nhoff + offsetof(struct ipv6hdr, saddr));
 231        g->flow.dst = ipv6_addr_hash(skb,
 232                                     nhoff + offsetof(struct ipv6hdr, daddr));
 233        nhoff += sizeof(struct ipv6hdr);
 234
 235        skb->cb[0] = nhoff;
 236        parse_ip_proto(skb, g, ip_proto);
 237        return 0;
 238}
 239
 240PROG(PARSE_VLAN)(struct __sk_buff *skb)
 241{
 242        __u32 nhoff, proto;
 243
 244        nhoff = skb->cb[0];
 245
 246        proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
 247                                                h_vlan_encapsulated_proto));
 248        nhoff += sizeof(struct vlan_hdr);
 249        skb->cb[0] = nhoff;
 250
 251        parse_eth_proto(skb, proto);
 252
 253        return 0;
 254}
 255
 256PROG(PARSE_MPLS)(struct __sk_buff *skb)
 257{
 258        __u32 nhoff, label;
 259
 260        nhoff = skb->cb[0];
 261
 262        label = load_word(skb, nhoff);
 263        nhoff += sizeof(struct mpls_label);
 264        skb->cb[0] = nhoff;
 265
 266        if (label & MPLS_LS_S_MASK) {
 267                __u8 verlen = load_byte(skb, nhoff);
 268                if ((verlen & 0xF0) == 4)
 269                        parse_eth_proto(skb, ETH_P_IP);
 270                else
 271                        parse_eth_proto(skb, ETH_P_IPV6);
 272        } else {
 273                parse_eth_proto(skb, ETH_P_MPLS_UC);
 274        }
 275
 276        return 0;
 277}
 278
 279SEC("socket/0")
 280int main_prog(struct __sk_buff *skb)
 281{
 282        __u32 nhoff = ETH_HLEN;
 283        __u32 proto = load_half(skb, 12);
 284
 285        skb->cb[0] = nhoff;
 286        parse_eth_proto(skb, proto);
 287        return 0;
 288}
 289
 290char _license[] SEC("license") = "GPL";
 291