linux/samples/bpf/sockex3_kern.c
<<
>>
Prefs
   1/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
   2 *
   3 * This program is free software; you can redistribute it and/or
   4 * modify it under the terms of version 2 of the GNU General Public
   5 * License as published by the Free Software Foundation.
   6 */
   7#include <uapi/linux/bpf.h>
   8#include <uapi/linux/in.h>
   9#include <uapi/linux/if.h>
  10#include <uapi/linux/if_ether.h>
  11#include <uapi/linux/ip.h>
  12#include <uapi/linux/ipv6.h>
  13#include <uapi/linux/if_tunnel.h>
  14#include <uapi/linux/mpls.h>
  15#include <bpf/bpf_helpers.h>
  16#include "bpf_legacy.h"
  17#define IP_MF           0x2000
  18#define IP_OFFSET       0x1FFF
  19
  20#define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
  21
  22struct {
  23        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
  24        __uint(key_size, sizeof(u32));
  25        __uint(value_size, sizeof(u32));
  26        __uint(max_entries, 8);
  27} jmp_table SEC(".maps");
  28
  29#define PARSE_VLAN 1
  30#define PARSE_MPLS 2
  31#define PARSE_IP 3
  32#define PARSE_IPV6 4
  33
  34/* protocol dispatch routine.
  35 * It tail-calls next BPF program depending on eth proto
  36 * Note, we could have used:
  37 * bpf_tail_call(skb, &jmp_table, proto);
  38 * but it would need large prog_array
  39 */
  40static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
  41{
  42        switch (proto) {
  43        case ETH_P_8021Q:
  44        case ETH_P_8021AD:
  45                bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
  46                break;
  47        case ETH_P_MPLS_UC:
  48        case ETH_P_MPLS_MC:
  49                bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
  50                break;
  51        case ETH_P_IP:
  52                bpf_tail_call(skb, &jmp_table, PARSE_IP);
  53                break;
  54        case ETH_P_IPV6:
  55                bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
  56                break;
  57        }
  58}
  59
  60struct vlan_hdr {
  61        __be16 h_vlan_TCI;
  62        __be16 h_vlan_encapsulated_proto;
  63};
  64
  65struct flow_key_record {
  66        __be32 src;
  67        __be32 dst;
  68        union {
  69                __be32 ports;
  70                __be16 port16[2];
  71        };
  72        __u32 ip_proto;
  73};
  74
  75static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
  76{
  77        return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
  78                & (IP_MF | IP_OFFSET);
  79}
  80
  81static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
  82{
  83        __u64 w0 = load_word(ctx, off);
  84        __u64 w1 = load_word(ctx, off + 4);
  85        __u64 w2 = load_word(ctx, off + 8);
  86        __u64 w3 = load_word(ctx, off + 12);
  87
  88        return (__u32)(w0 ^ w1 ^ w2 ^ w3);
  89}
  90
  91struct globals {
  92        struct flow_key_record flow;
  93};
  94
  95struct {
  96        __uint(type, BPF_MAP_TYPE_ARRAY);
  97        __type(key, __u32);
  98        __type(value, struct globals);
  99        __uint(max_entries, 32);
 100} percpu_map SEC(".maps");
 101
 102/* user poor man's per_cpu until native support is ready */
 103static struct globals *this_cpu_globals(void)
 104{
 105        u32 key = bpf_get_smp_processor_id();
 106
 107        return bpf_map_lookup_elem(&percpu_map, &key);
 108}
 109
 110/* some simple stats for user space consumption */
 111struct pair {
 112        __u64 packets;
 113        __u64 bytes;
 114};
 115
 116struct {
 117        __uint(type, BPF_MAP_TYPE_HASH);
 118        __type(key, struct flow_key_record);
 119        __type(value, struct pair);
 120        __uint(max_entries, 1024);
 121} hash_map SEC(".maps");
 122
 123static void update_stats(struct __sk_buff *skb, struct globals *g)
 124{
 125        struct flow_key_record key = g->flow;
 126        struct pair *value;
 127
 128        value = bpf_map_lookup_elem(&hash_map, &key);
 129        if (value) {
 130                __sync_fetch_and_add(&value->packets, 1);
 131                __sync_fetch_and_add(&value->bytes, skb->len);
 132        } else {
 133                struct pair val = {1, skb->len};
 134
 135                bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
 136        }
 137}
 138
 139static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 140                                           struct globals *g, __u32 ip_proto)
 141{
 142        __u32 nhoff = skb->cb[0];
 143        int poff;
 144
 145        switch (ip_proto) {
 146        case IPPROTO_GRE: {
 147                struct gre_hdr {
 148                        __be16 flags;
 149                        __be16 proto;
 150                };
 151
 152                __u32 gre_flags = load_half(skb,
 153                                            nhoff + offsetof(struct gre_hdr, flags));
 154                __u32 gre_proto = load_half(skb,
 155                                            nhoff + offsetof(struct gre_hdr, proto));
 156
 157                if (gre_flags & (GRE_VERSION|GRE_ROUTING))
 158                        break;
 159
 160                nhoff += 4;
 161                if (gre_flags & GRE_CSUM)
 162                        nhoff += 4;
 163                if (gre_flags & GRE_KEY)
 164                        nhoff += 4;
 165                if (gre_flags & GRE_SEQ)
 166                        nhoff += 4;
 167
 168                skb->cb[0] = nhoff;
 169                parse_eth_proto(skb, gre_proto);
 170                break;
 171        }
 172        case IPPROTO_IPIP:
 173                parse_eth_proto(skb, ETH_P_IP);
 174                break;
 175        case IPPROTO_IPV6:
 176                parse_eth_proto(skb, ETH_P_IPV6);
 177                break;
 178        case IPPROTO_TCP:
 179        case IPPROTO_UDP:
 180                g->flow.ports = load_word(skb, nhoff);
 181        case IPPROTO_ICMP:
 182                g->flow.ip_proto = ip_proto;
 183                update_stats(skb, g);
 184                break;
 185        default:
 186                break;
 187        }
 188}
 189
 190PROG(PARSE_IP)(struct __sk_buff *skb)
 191{
 192        struct globals *g = this_cpu_globals();
 193        __u32 nhoff, verlen, ip_proto;
 194
 195        if (!g)
 196                return 0;
 197
 198        nhoff = skb->cb[0];
 199
 200        if (unlikely(ip_is_fragment(skb, nhoff)))
 201                return 0;
 202
 203        ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
 204
 205        if (ip_proto != IPPROTO_GRE) {
 206                g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
 207                g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
 208        }
 209
 210        verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
 211        nhoff += (verlen & 0xF) << 2;
 212
 213        skb->cb[0] = nhoff;
 214        parse_ip_proto(skb, g, ip_proto);
 215        return 0;
 216}
 217
 218PROG(PARSE_IPV6)(struct __sk_buff *skb)
 219{
 220        struct globals *g = this_cpu_globals();
 221        __u32 nhoff, ip_proto;
 222
 223        if (!g)
 224                return 0;
 225
 226        nhoff = skb->cb[0];
 227
 228        ip_proto = load_byte(skb,
 229                             nhoff + offsetof(struct ipv6hdr, nexthdr));
 230        g->flow.src = ipv6_addr_hash(skb,
 231                                     nhoff + offsetof(struct ipv6hdr, saddr));
 232        g->flow.dst = ipv6_addr_hash(skb,
 233                                     nhoff + offsetof(struct ipv6hdr, daddr));
 234        nhoff += sizeof(struct ipv6hdr);
 235
 236        skb->cb[0] = nhoff;
 237        parse_ip_proto(skb, g, ip_proto);
 238        return 0;
 239}
 240
 241PROG(PARSE_VLAN)(struct __sk_buff *skb)
 242{
 243        __u32 nhoff, proto;
 244
 245        nhoff = skb->cb[0];
 246
 247        proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
 248                                                h_vlan_encapsulated_proto));
 249        nhoff += sizeof(struct vlan_hdr);
 250        skb->cb[0] = nhoff;
 251
 252        parse_eth_proto(skb, proto);
 253
 254        return 0;
 255}
 256
 257PROG(PARSE_MPLS)(struct __sk_buff *skb)
 258{
 259        __u32 nhoff, label;
 260
 261        nhoff = skb->cb[0];
 262
 263        label = load_word(skb, nhoff);
 264        nhoff += sizeof(struct mpls_label);
 265        skb->cb[0] = nhoff;
 266
 267        if (label & MPLS_LS_S_MASK) {
 268                __u8 verlen = load_byte(skb, nhoff);
 269                if ((verlen & 0xF0) == 4)
 270                        parse_eth_proto(skb, ETH_P_IP);
 271                else
 272                        parse_eth_proto(skb, ETH_P_IPV6);
 273        } else {
 274                parse_eth_proto(skb, ETH_P_MPLS_UC);
 275        }
 276
 277        return 0;
 278}
 279
 280SEC("socket/0")
 281int main_prog(struct __sk_buff *skb)
 282{
 283        __u32 nhoff = ETH_HLEN;
 284        __u32 proto = load_half(skb, 12);
 285
 286        skb->cb[0] = nhoff;
 287        parse_eth_proto(skb, proto);
 288        return 0;
 289}
 290
 291char _license[] SEC("license") = "GPL";
 292