linux/net/ipv4/geneve.c
<<
>>
Prefs
   1/*
   2 * Geneve: Generic Network Virtualization Encapsulation
   3 *
   4 * Copyright (c) 2014 Nicira, Inc.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13
  14#include <linux/kernel.h>
  15#include <linux/types.h>
  16#include <linux/module.h>
  17#include <linux/errno.h>
  18#include <linux/slab.h>
  19#include <linux/skbuff.h>
  20#include <linux/list.h>
  21#include <linux/netdevice.h>
  22#include <linux/in.h>
  23#include <linux/ip.h>
  24#include <linux/udp.h>
  25#include <linux/igmp.h>
  26#include <linux/etherdevice.h>
  27#include <linux/if_ether.h>
  28#include <linux/if_vlan.h>
  29#include <linux/ethtool.h>
  30#include <linux/mutex.h>
  31#include <net/arp.h>
  32#include <net/ndisc.h>
  33#include <net/ip.h>
  34#include <net/ip_tunnels.h>
  35#include <net/icmp.h>
  36#include <net/udp.h>
  37#include <net/rtnetlink.h>
  38#include <net/route.h>
  39#include <net/dsfield.h>
  40#include <net/inet_ecn.h>
  41#include <net/net_namespace.h>
  42#include <net/netns/generic.h>
  43#include <net/geneve.h>
  44#include <net/protocol.h>
  45#include <net/udp_tunnel.h>
  46#if IS_ENABLED(CONFIG_IPV6)
  47#include <net/ipv6.h>
  48#include <net/addrconf.h>
  49#include <net/ip6_tunnel.h>
  50#include <net/ip6_checksum.h>
  51#endif
  52
  53/* Protects sock_list and refcounts. */
  54static DEFINE_MUTEX(geneve_mutex);
  55
  56/* per-network namespace private data for this module */
  57struct geneve_net {
  58        struct list_head        sock_list;
  59};
  60
  61static int geneve_net_id;
  62
  63static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
  64{
  65        return (struct genevehdr *)(udp_hdr(skb) + 1);
  66}
  67
  68static struct geneve_sock *geneve_find_sock(struct net *net,
  69                                            sa_family_t family, __be16 port)
  70{
  71        struct geneve_net *gn = net_generic(net, geneve_net_id);
  72        struct geneve_sock *gs;
  73
  74        list_for_each_entry(gs, &gn->sock_list, list) {
  75                if (inet_sk(gs->sock->sk)->inet_sport == port &&
  76                    inet_sk(gs->sock->sk)->sk.sk_family == family)
  77                        return gs;
  78        }
  79
  80        return NULL;
  81}
  82
  83static void geneve_build_header(struct genevehdr *geneveh,
  84                                __be16 tun_flags, u8 vni[3],
  85                                u8 options_len, u8 *options)
  86{
  87        geneveh->ver = GENEVE_VER;
  88        geneveh->opt_len = options_len / 4;
  89        geneveh->oam = !!(tun_flags & TUNNEL_OAM);
  90        geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
  91        geneveh->rsvd1 = 0;
  92        memcpy(geneveh->vni, vni, 3);
  93        geneveh->proto_type = htons(ETH_P_TEB);
  94        geneveh->rsvd2 = 0;
  95
  96        memcpy(geneveh->options, options, options_len);
  97}
  98
  99/* Transmit a fully formatted Geneve frame.
 100 *
 101 * When calling this function. The skb->data should point
 102 * to the geneve header which is fully formed.
 103 *
 104 * This function will add other UDP tunnel headers.
 105 */
 106int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 107                    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
 108                    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 109                    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
 110                    bool csum, bool xnet)
 111{
 112        struct genevehdr *gnvh;
 113        int min_headroom;
 114        int err;
 115
 116        skb = udp_tunnel_handle_offloads(skb, csum);
 117        if (IS_ERR(skb))
 118                return PTR_ERR(skb);
 119
 120        min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 121                        + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
 122                        + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 123
 124        err = skb_cow_head(skb, min_headroom);
 125        if (unlikely(err)) {
 126                kfree_skb(skb);
 127                return err;
 128        }
 129
 130        skb = vlan_hwaccel_push_inside(skb);
 131        if (unlikely(!skb))
 132                return -ENOMEM;
 133
 134        gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
 135        geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
 136
 137        skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 138
 139        return udp_tunnel_xmit_skb(rt, skb, src, dst,
 140                                   tos, ttl, df, src_port, dst_port, xnet,
 141                                   !csum);
 142}
 143EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 144
 145static int geneve_hlen(struct genevehdr *gh)
 146{
 147        return sizeof(*gh) + gh->opt_len * 4;
 148}
 149
 150static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
 151                                           struct sk_buff *skb,
 152                                           struct udp_offload *uoff)
 153{
 154        struct sk_buff *p, **pp = NULL;
 155        struct genevehdr *gh, *gh2;
 156        unsigned int hlen, gh_len, off_gnv;
 157        const struct packet_offload *ptype;
 158        __be16 type;
 159        int flush = 1;
 160
 161        off_gnv = skb_gro_offset(skb);
 162        hlen = off_gnv + sizeof(*gh);
 163        gh = skb_gro_header_fast(skb, off_gnv);
 164        if (skb_gro_header_hard(skb, hlen)) {
 165                gh = skb_gro_header_slow(skb, hlen, off_gnv);
 166                if (unlikely(!gh))
 167                        goto out;
 168        }
 169
 170        if (gh->ver != GENEVE_VER || gh->oam)
 171                goto out;
 172        gh_len = geneve_hlen(gh);
 173
 174        hlen = off_gnv + gh_len;
 175        if (skb_gro_header_hard(skb, hlen)) {
 176                gh = skb_gro_header_slow(skb, hlen, off_gnv);
 177                if (unlikely(!gh))
 178                        goto out;
 179        }
 180
 181        flush = 0;
 182
 183        for (p = *head; p; p = p->next) {
 184                if (!NAPI_GRO_CB(p)->same_flow)
 185                        continue;
 186
 187                gh2 = (struct genevehdr *)(p->data + off_gnv);
 188                if (gh->opt_len != gh2->opt_len ||
 189                    memcmp(gh, gh2, gh_len)) {
 190                        NAPI_GRO_CB(p)->same_flow = 0;
 191                        continue;
 192                }
 193        }
 194
 195        type = gh->proto_type;
 196
 197        rcu_read_lock();
 198        ptype = gro_find_receive_by_type(type);
 199        if (ptype == NULL) {
 200                flush = 1;
 201                goto out_unlock;
 202        }
 203
 204        skb_gro_pull(skb, gh_len);
 205        skb_gro_postpull_rcsum(skb, gh, gh_len);
 206        pp = ptype->callbacks.gro_receive(head, skb);
 207
 208out_unlock:
 209        rcu_read_unlock();
 210out:
 211        NAPI_GRO_CB(skb)->flush |= flush;
 212
 213        return pp;
 214}
 215
 216static int geneve_gro_complete(struct sk_buff *skb, int nhoff,
 217                               struct udp_offload *uoff)
 218{
 219        struct genevehdr *gh;
 220        struct packet_offload *ptype;
 221        __be16 type;
 222        int gh_len;
 223        int err = -ENOSYS;
 224
 225        udp_tunnel_gro_complete(skb, nhoff);
 226
 227        gh = (struct genevehdr *)(skb->data + nhoff);
 228        gh_len = geneve_hlen(gh);
 229        type = gh->proto_type;
 230
 231        rcu_read_lock();
 232        ptype = gro_find_complete_by_type(type);
 233        if (ptype != NULL)
 234                err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
 235
 236        rcu_read_unlock();
 237        return err;
 238}
 239
 240static void geneve_notify_add_rx_port(struct geneve_sock *gs)
 241{
 242        struct sock *sk = gs->sock->sk;
 243        sa_family_t sa_family = sk->sk_family;
 244        int err;
 245
 246        if (sa_family == AF_INET) {
 247                err = udp_add_offload(&gs->udp_offloads);
 248                if (err)
 249                        pr_warn("geneve: udp_add_offload failed with status %d\n",
 250                                err);
 251        }
 252}
 253
 254static void geneve_notify_del_rx_port(struct geneve_sock *gs)
 255{
 256        struct sock *sk = gs->sock->sk;
 257        sa_family_t sa_family = sk->sk_family;
 258
 259        if (sa_family == AF_INET)
 260                udp_del_offload(&gs->udp_offloads);
 261}
 262
 263/* Callback from net/ipv4/udp.c to receive packets */
 264static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 265{
 266        struct genevehdr *geneveh;
 267        struct geneve_sock *gs;
 268        int opts_len;
 269
 270        /* Need Geneve and inner Ethernet header to be present */
 271        if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
 272                goto error;
 273
 274        /* Return packets with reserved bits set */
 275        geneveh = geneve_hdr(skb);
 276
 277        if (unlikely(geneveh->ver != GENEVE_VER))
 278                goto error;
 279
 280        if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
 281                goto error;
 282
 283        opts_len = geneveh->opt_len * 4;
 284        if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
 285                                 htons(ETH_P_TEB)))
 286                goto drop;
 287
 288        gs = rcu_dereference_sk_user_data(sk);
 289        if (!gs)
 290                goto drop;
 291
 292        gs->rcv(gs, skb);
 293        return 0;
 294
 295drop:
 296        /* Consume bad packet */
 297        kfree_skb(skb);
 298        return 0;
 299
 300error:
 301        /* Let the UDP layer deal with the skb */
 302        return 1;
 303}
 304
 305static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 306                                         __be16 port)
 307{
 308        struct socket *sock;
 309        struct udp_port_cfg udp_conf;
 310        int err;
 311
 312        memset(&udp_conf, 0, sizeof(udp_conf));
 313
 314        if (ipv6) {
 315                udp_conf.family = AF_INET6;
 316        } else {
 317                udp_conf.family = AF_INET;
 318                udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
 319        }
 320
 321        udp_conf.local_udp_port = port;
 322
 323        /* Open UDP socket */
 324        err = udp_sock_create(net, &udp_conf, &sock);
 325        if (err < 0)
 326                return ERR_PTR(err);
 327
 328        return sock;
 329}
 330
 331/* Create new listen socket if needed */
 332static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 333                                                geneve_rcv_t *rcv, void *data,
 334                                                bool ipv6)
 335{
 336        struct geneve_net *gn = net_generic(net, geneve_net_id);
 337        struct geneve_sock *gs;
 338        struct socket *sock;
 339        struct udp_tunnel_sock_cfg tunnel_cfg;
 340
 341        gs = kzalloc(sizeof(*gs), GFP_KERNEL);
 342        if (!gs)
 343                return ERR_PTR(-ENOMEM);
 344
 345        sock = geneve_create_sock(net, ipv6, port);
 346        if (IS_ERR(sock)) {
 347                kfree(gs);
 348                return ERR_CAST(sock);
 349        }
 350
 351        gs->sock = sock;
 352        gs->refcnt = 1;
 353        gs->rcv = rcv;
 354        gs->rcv_data = data;
 355
 356        /* Initialize the geneve udp offloads structure */
 357        gs->udp_offloads.port = port;
 358        gs->udp_offloads.callbacks.gro_receive  = geneve_gro_receive;
 359        gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
 360        geneve_notify_add_rx_port(gs);
 361
 362        /* Mark socket as an encapsulation socket */
 363        tunnel_cfg.sk_user_data = gs;
 364        tunnel_cfg.encap_type = 1;
 365        tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
 366        tunnel_cfg.encap_destroy = NULL;
 367        setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 368
 369        list_add(&gs->list, &gn->sock_list);
 370
 371        return gs;
 372}
 373
 374struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 375                                    geneve_rcv_t *rcv, void *data,
 376                                    bool no_share, bool ipv6)
 377{
 378        struct geneve_sock *gs;
 379
 380        mutex_lock(&geneve_mutex);
 381
 382        gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
 383        if (gs) {
 384                if (!no_share && gs->rcv == rcv)
 385                        gs->refcnt++;
 386                else
 387                        gs = ERR_PTR(-EBUSY);
 388        } else {
 389                gs = geneve_socket_create(net, port, rcv, data, ipv6);
 390        }
 391
 392        mutex_unlock(&geneve_mutex);
 393
 394        return gs;
 395}
 396EXPORT_SYMBOL_GPL(geneve_sock_add);
 397
 398void geneve_sock_release(struct geneve_sock *gs)
 399{
 400        mutex_lock(&geneve_mutex);
 401
 402        if (--gs->refcnt)
 403                goto unlock;
 404
 405        list_del(&gs->list);
 406        geneve_notify_del_rx_port(gs);
 407        udp_tunnel_sock_release(gs->sock);
 408        kfree_rcu(gs, rcu);
 409
 410unlock:
 411        mutex_unlock(&geneve_mutex);
 412}
 413EXPORT_SYMBOL_GPL(geneve_sock_release);
 414
 415static __net_init int geneve_init_net(struct net *net)
 416{
 417        struct geneve_net *gn = net_generic(net, geneve_net_id);
 418
 419        INIT_LIST_HEAD(&gn->sock_list);
 420
 421        return 0;
 422}
 423
 424static struct pernet_operations geneve_net_ops = {
 425        .init = geneve_init_net,
 426        .id   = &geneve_net_id,
 427        .size = sizeof(struct geneve_net),
 428};
 429
 430static int __init geneve_init_module(void)
 431{
 432        int rc;
 433
 434        rc = register_pernet_subsys(&geneve_net_ops);
 435        if (rc)
 436                return rc;
 437
 438        pr_info("Geneve driver\n");
 439
 440        return 0;
 441}
 442module_init(geneve_init_module);
 443
 444static void __exit geneve_cleanup_module(void)
 445{
 446        unregister_pernet_subsys(&geneve_net_ops);
 447}
 448module_exit(geneve_cleanup_module);
 449
 450MODULE_LICENSE("GPL");
 451MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>");
 452MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic");
 453MODULE_ALIAS_RTNL_LINK("geneve");
 454