linux/net/ipv4/ip_gre.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     GRE over IP protocol decoder.
   3 *
   4 *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/capability.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18#include <linux/kernel.h>
  19#include <linux/slab.h>
  20#include <asm/uaccess.h>
  21#include <linux/skbuff.h>
  22#include <linux/netdevice.h>
  23#include <linux/in.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/if_arp.h>
  27#include <linux/if_vlan.h>
  28#include <linux/init.h>
  29#include <linux/in6.h>
  30#include <linux/inetdevice.h>
  31#include <linux/igmp.h>
  32#include <linux/netfilter_ipv4.h>
  33#include <linux/etherdevice.h>
  34#include <linux/if_ether.h>
  35
  36#include <net/sock.h>
  37#include <net/ip.h>
  38#include <net/icmp.h>
  39#include <net/protocol.h>
  40#include <net/ip_tunnels.h>
  41#include <net/arp.h>
  42#include <net/checksum.h>
  43#include <net/dsfield.h>
  44#include <net/inet_ecn.h>
  45#include <net/xfrm.h>
  46#include <net/net_namespace.h>
  47#include <net/netns/generic.h>
  48#include <net/rtnetlink.h>
  49#include <net/gre.h>
  50#include <net/dst_metadata.h>
  51
  52#if IS_ENABLED(CONFIG_IPV6)
  53#include <net/ipv6.h>
  54#include <net/ip6_fib.h>
  55#include <net/ip6_route.h>
  56#endif
  57
  58/*
  59   Problems & solutions
  60   --------------------
  61
  62   1. The most important issue is detecting local dead loops.
  63   They would cause complete host lockup in transmit, which
  64   would be "resolved" by stack overflow or, if queueing is enabled,
  65   with infinite looping in net_bh.
  66
  67   We cannot track such dead loops during route installation,
  68   it is infeasible task. The most general solutions would be
  69   to keep skb->encapsulation counter (sort of local ttl),
  70   and silently drop packet when it expires. It is a good
  71   solution, but it supposes maintaining new variable in ALL
  72   skb, even if no tunneling is used.
  73
  74   Current solution: xmit_recursion breaks dead loops. This is a percpu
  75   counter, since when we enter the first ndo_xmit(), cpu migration is
  76   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  77
  78   2. Networking dead loops would not kill routers, but would really
  79   kill network. IP hop limit plays role of "t->recursion" in this case,
  80   if we copy it from packet being encapsulated to upper header.
  81   It is very good solution, but it introduces two problems:
  82
  83   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  84     do not work over tunnels.
  85   - traceroute does not work. I planned to relay ICMP from tunnel,
  86     so that this problem would be solved and traceroute output
  87     would even more informative. This idea appeared to be wrong:
  88     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  89     true router now :-)), all routers (at least, in neighbourhood of mine)
  90     return only 8 bytes of payload. It is the end.
  91
  92   Hence, if we want that OSPF worked or traceroute said something reasonable,
  93   we should search for another solution.
  94
  95   One of them is to parse packet trying to detect inner encapsulation
  96   made by our node. It is difficult or even impossible, especially,
  97   taking into account fragmentation. TO be short, ttl is not solution at all.
  98
  99   Current solution: The solution was UNEXPECTEDLY SIMPLE.
 100   We force DF flag on tunnels with preconfigured hop limit,
 101   that is ALL. :-) Well, it does not remove the problem completely,
 102   but exponential growth of network traffic is changed to linear
 103   (branches, that exceed pmtu are pruned) and tunnel mtu
 104   rapidly degrades to value <68, where looping stops.
 105   Yes, it is not good if there exists a router in the loop,
 106   which does not force DF, even when encapsulating packets have DF set.
 107   But it is not our problem! Nobody could accuse us, we made
 108   all that we could make. Even if it is your gated who injected
 109   fatal route to network, even if it were you who configured
 110   fatal static route: you are innocent. :-)
 111
 112   Alexey Kuznetsov.
 113 */
 114
 115static bool log_ecn_error = true;
 116module_param(log_ecn_error, bool, 0644);
 117MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 118
 119static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 120static int ipgre_tunnel_init(struct net_device *dev);
 121
 122static int ipgre_net_id __read_mostly;
 123static int gre_tap_net_id __read_mostly;
 124
 125static int ip_gre_calc_hlen(__be16 o_flags)
 126{
 127        int addend = 4;
 128
 129        if (o_flags & TUNNEL_CSUM)
 130                addend += 4;
 131        if (o_flags & TUNNEL_KEY)
 132                addend += 4;
 133        if (o_flags & TUNNEL_SEQ)
 134                addend += 4;
 135        return addend;
 136}
 137
 138static __be16 gre_flags_to_tnl_flags(__be16 flags)
 139{
 140        __be16 tflags = 0;
 141
 142        if (flags & GRE_CSUM)
 143                tflags |= TUNNEL_CSUM;
 144        if (flags & GRE_ROUTING)
 145                tflags |= TUNNEL_ROUTING;
 146        if (flags & GRE_KEY)
 147                tflags |= TUNNEL_KEY;
 148        if (flags & GRE_SEQ)
 149                tflags |= TUNNEL_SEQ;
 150        if (flags & GRE_STRICT)
 151                tflags |= TUNNEL_STRICT;
 152        if (flags & GRE_REC)
 153                tflags |= TUNNEL_REC;
 154        if (flags & GRE_VERSION)
 155                tflags |= TUNNEL_VERSION;
 156
 157        return tflags;
 158}
 159
 160static __be16 tnl_flags_to_gre_flags(__be16 tflags)
 161{
 162        __be16 flags = 0;
 163
 164        if (tflags & TUNNEL_CSUM)
 165                flags |= GRE_CSUM;
 166        if (tflags & TUNNEL_ROUTING)
 167                flags |= GRE_ROUTING;
 168        if (tflags & TUNNEL_KEY)
 169                flags |= GRE_KEY;
 170        if (tflags & TUNNEL_SEQ)
 171                flags |= GRE_SEQ;
 172        if (tflags & TUNNEL_STRICT)
 173                flags |= GRE_STRICT;
 174        if (tflags & TUNNEL_REC)
 175                flags |= GRE_REC;
 176        if (tflags & TUNNEL_VERSION)
 177                flags |= GRE_VERSION;
 178
 179        return flags;
 180}
 181
 182/* Fills in tpi and returns header length to be pulled. */
 183static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 184                            bool *csum_err)
 185{
 186        const struct gre_base_hdr *greh;
 187        __be32 *options;
 188        int hdr_len;
 189
 190        if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
 191                return -EINVAL;
 192
 193        greh = (struct gre_base_hdr *)skb_transport_header(skb);
 194        if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
 195                return -EINVAL;
 196
 197        tpi->flags = gre_flags_to_tnl_flags(greh->flags);
 198        hdr_len = ip_gre_calc_hlen(tpi->flags);
 199
 200        if (!pskb_may_pull(skb, hdr_len))
 201                return -EINVAL;
 202
 203        greh = (struct gre_base_hdr *)skb_transport_header(skb);
 204        tpi->proto = greh->protocol;
 205
 206        options = (__be32 *)(greh + 1);
 207        if (greh->flags & GRE_CSUM) {
 208                if (skb_checksum_simple_validate(skb)) {
 209                        *csum_err = true;
 210                        return -EINVAL;
 211                }
 212
 213                skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
 214                                         null_compute_pseudo);
 215                options++;
 216        }
 217
 218        if (greh->flags & GRE_KEY) {
 219                tpi->key = *options;
 220                options++;
 221        } else {
 222                tpi->key = 0;
 223        }
 224        if (unlikely(greh->flags & GRE_SEQ)) {
 225                tpi->seq = *options;
 226                options++;
 227        } else {
 228                tpi->seq = 0;
 229        }
 230        /* WCCP version 1 and 2 protocol decoding.
 231         * - Change protocol to IP
 232         * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
 233         */
 234        if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
 235                tpi->proto = htons(ETH_P_IP);
 236                if ((*(u8 *)options & 0xF0) != 0x40) {
 237                        hdr_len += 4;
 238                        if (!pskb_may_pull(skb, hdr_len))
 239                                return -EINVAL;
 240                }
 241        }
 242        return hdr_len;
 243}
 244
 245static void ipgre_err(struct sk_buff *skb, u32 info,
 246                      const struct tnl_ptk_info *tpi)
 247{
 248
 249        /* All the routers (except for Linux) return only
 250           8 bytes of packet payload. It means, that precise relaying of
 251           ICMP in the real Internet is absolutely infeasible.
 252
 253           Moreover, Cisco "wise men" put GRE key to the third word
 254           in GRE header. It makes impossible maintaining even soft
 255           state for keyed GRE tunnels with enabled checksum. Tell
 256           them "thank you".
 257
 258           Well, I wonder, rfc1812 was written by Cisco employee,
 259           what the hell these idiots break standards established
 260           by themselves???
 261           */
 262        struct net *net = dev_net(skb->dev);
 263        struct ip_tunnel_net *itn;
 264        const struct iphdr *iph;
 265        const int type = icmp_hdr(skb)->type;
 266        const int code = icmp_hdr(skb)->code;
 267        struct ip_tunnel *t;
 268
 269        switch (type) {
 270        default:
 271        case ICMP_PARAMETERPROB:
 272                return;
 273
 274        case ICMP_DEST_UNREACH:
 275                switch (code) {
 276                case ICMP_SR_FAILED:
 277                case ICMP_PORT_UNREACH:
 278                        /* Impossible event. */
 279                        return;
 280                default:
 281                        /* All others are translated to HOST_UNREACH.
 282                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 283                           I believe they are just ether pollution. --ANK
 284                         */
 285                        break;
 286                }
 287                break;
 288
 289        case ICMP_TIME_EXCEEDED:
 290                if (code != ICMP_EXC_TTL)
 291                        return;
 292                break;
 293
 294        case ICMP_REDIRECT:
 295                break;
 296        }
 297
 298        if (tpi->proto == htons(ETH_P_TEB))
 299                itn = net_generic(net, gre_tap_net_id);
 300        else
 301                itn = net_generic(net, ipgre_net_id);
 302
 303        iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
 304        t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 305                             iph->daddr, iph->saddr, tpi->key);
 306
 307        if (!t)
 308                return;
 309
 310        if (t->parms.iph.daddr == 0 ||
 311            ipv4_is_multicast(t->parms.iph.daddr))
 312                return;
 313
 314        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 315                return;
 316
 317        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 318                t->err_count++;
 319        else
 320                t->err_count = 1;
 321        t->err_time = jiffies;
 322}
 323
 324static void gre_err(struct sk_buff *skb, u32 info)
 325{
 326        /* All the routers (except for Linux) return only
 327         * 8 bytes of packet payload. It means, that precise relaying of
 328         * ICMP in the real Internet is absolutely infeasible.
 329         *
 330         * Moreover, Cisco "wise men" put GRE key to the third word
 331         * in GRE header. It makes impossible maintaining even soft
 332         * state for keyed
 333         * GRE tunnels with enabled checksum. Tell them "thank you".
 334         *
 335         * Well, I wonder, rfc1812 was written by Cisco employee,
 336         * what the hell these idiots break standards established
 337         * by themselves???
 338         */
 339
 340        const int type = icmp_hdr(skb)->type;
 341        const int code = icmp_hdr(skb)->code;
 342        struct tnl_ptk_info tpi;
 343        bool csum_err = false;
 344
 345        if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
 346                if (!csum_err)          /* ignore csum errors. */
 347                        return;
 348        }
 349
 350        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 351                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 352                                 skb->dev->ifindex, 0, IPPROTO_GRE, 0);
 353                return;
 354        }
 355        if (type == ICMP_REDIRECT) {
 356                ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
 357                              IPPROTO_GRE, 0);
 358                return;
 359        }
 360
 361        ipgre_err(skb, info, &tpi);
 362}
 363
 364static __be64 key_to_tunnel_id(__be32 key)
 365{
 366#ifdef __BIG_ENDIAN
 367        return (__force __be64)((__force u32)key);
 368#else
 369        return (__force __be64)((__force u64)key << 32);
 370#endif
 371}
 372
 373/* Returns the least-significant 32 bits of a __be64. */
 374static __be32 tunnel_id_to_key(__be64 x)
 375{
 376#ifdef __BIG_ENDIAN
 377        return (__force __be32)x;
 378#else
 379        return (__force __be32)((__force u64)x >> 32);
 380#endif
 381}
 382
 383static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
 384{
 385        struct net *net = dev_net(skb->dev);
 386        struct metadata_dst *tun_dst = NULL;
 387        struct ip_tunnel_net *itn;
 388        const struct iphdr *iph;
 389        struct ip_tunnel *tunnel;
 390
 391        if (tpi->proto == htons(ETH_P_TEB))
 392                itn = net_generic(net, gre_tap_net_id);
 393        else
 394                itn = net_generic(net, ipgre_net_id);
 395
 396        iph = ip_hdr(skb);
 397        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 398                                  iph->saddr, iph->daddr, tpi->key);
 399
 400        if (tunnel) {
 401                if (tunnel->dev->type != ARPHRD_NONE)
 402                        skb_pop_mac_header(skb);
 403                else
 404                        skb_reset_mac_header(skb);
 405                if (tunnel->collect_md) {
 406                        __be16 flags;
 407                        __be64 tun_id;
 408
 409                        flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
 410                        tun_id = key_to_tunnel_id(tpi->key);
 411                        tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
 412                        if (!tun_dst)
 413                                return PACKET_REJECT;
 414                }
 415
 416                ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 417                return PACKET_RCVD;
 418        }
 419        return PACKET_REJECT;
 420}
 421
 422static int gre_rcv(struct sk_buff *skb)
 423{
 424        struct tnl_ptk_info tpi;
 425        bool csum_err = false;
 426        int hdr_len;
 427
 428#ifdef CONFIG_NET_IPGRE_BROADCAST
 429        if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
 430                /* Looped back packet, drop it! */
 431                if (rt_is_output_route(skb_rtable(skb)))
 432                        goto drop;
 433        }
 434#endif
 435
 436        hdr_len = parse_gre_header(skb, &tpi, &csum_err);
 437        if (hdr_len < 0)
 438                goto drop;
 439        if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0)
 440                goto drop;
 441
 442        if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
 443                return 0;
 444
 445        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 446drop:
 447        kfree_skb(skb);
 448        return 0;
 449}
 450
 451static __sum16 gre_checksum(struct sk_buff *skb)
 452{
 453        __wsum csum;
 454
 455        if (skb->ip_summed == CHECKSUM_PARTIAL)
 456                csum = lco_csum(skb);
 457        else
 458                csum = skb_checksum(skb, 0, skb->len, 0);
 459        return csum_fold(csum);
 460}
 461
 462static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
 463                         __be16 proto, __be32 key, __be32 seq)
 464{
 465        struct gre_base_hdr *greh;
 466
 467        skb_push(skb, hdr_len);
 468
 469        skb_reset_transport_header(skb);
 470        greh = (struct gre_base_hdr *)skb->data;
 471        greh->flags = tnl_flags_to_gre_flags(flags);
 472        greh->protocol = proto;
 473
 474        if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
 475                __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
 476
 477                if (flags & TUNNEL_SEQ) {
 478                        *ptr = seq;
 479                        ptr--;
 480                }
 481                if (flags & TUNNEL_KEY) {
 482                        *ptr = key;
 483                        ptr--;
 484                }
 485                if (flags & TUNNEL_CSUM &&
 486                    !(skb_shinfo(skb)->gso_type &
 487                      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
 488                        *ptr = 0;
 489                        *(__sum16 *)ptr = gre_checksum(skb);
 490                }
 491        }
 492}
 493
 494static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 495                       const struct iphdr *tnl_params,
 496                       __be16 proto)
 497{
 498        struct ip_tunnel *tunnel = netdev_priv(dev);
 499
 500        if (tunnel->parms.o_flags & TUNNEL_SEQ)
 501                tunnel->o_seqno++;
 502
 503        /* Push GRE header. */
 504        build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 505                     proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
 506
 507        skb_set_inner_protocol(skb, proto);
 508        ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 509}
 510
 511static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
 512                                           bool csum)
 513{
 514        return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 515}
 516
 517static struct rtable *gre_get_rt(struct sk_buff *skb,
 518                                 struct net_device *dev,
 519                                 struct flowi4 *fl,
 520                                 const struct ip_tunnel_key *key)
 521{
 522        struct net *net = dev_net(dev);
 523
 524        memset(fl, 0, sizeof(*fl));
 525        fl->daddr = key->u.ipv4.dst;
 526        fl->saddr = key->u.ipv4.src;
 527        fl->flowi4_tos = RT_TOS(key->tos);
 528        fl->flowi4_mark = skb->mark;
 529        fl->flowi4_proto = IPPROTO_GRE;
 530
 531        return ip_route_output_key(net, fl);
 532}
 533
 534static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 535                        __be16 proto)
 536{
 537        struct ip_tunnel_info *tun_info;
 538        const struct ip_tunnel_key *key;
 539        struct rtable *rt = NULL;
 540        struct flowi4 fl;
 541        int min_headroom;
 542        int tunnel_hlen;
 543        __be16 df, flags;
 544        bool use_cache;
 545        int err;
 546
 547        tun_info = skb_tunnel_info(skb);
 548        if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 549                     ip_tunnel_info_af(tun_info) != AF_INET))
 550                goto err_free_skb;
 551
 552        key = &tun_info->key;
 553        use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 554        if (use_cache)
 555                rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
 556        if (!rt) {
 557                rt = gre_get_rt(skb, dev, &fl, key);
 558                if (IS_ERR(rt))
 559                                goto err_free_skb;
 560                if (use_cache)
 561                        dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 562                                          fl.saddr);
 563        }
 564
 565        tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
 566
 567        min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 568                        + tunnel_hlen + sizeof(struct iphdr);
 569        if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
 570                int head_delta = SKB_DATA_ALIGN(min_headroom -
 571                                                skb_headroom(skb) +
 572                                                16);
 573                err = pskb_expand_head(skb, max_t(int, head_delta, 0),
 574                                       0, GFP_ATOMIC);
 575                if (unlikely(err))
 576                        goto err_free_rt;
 577        }
 578
 579        /* Push Tunnel header. */
 580        skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
 581        if (IS_ERR(skb)) {
 582                skb = NULL;
 583                goto err_free_rt;
 584        }
 585
 586        flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
 587        build_header(skb, tunnel_hlen, flags, proto,
 588                     tunnel_id_to_key(tun_info->key.tun_id), 0);
 589
 590        df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 591
 592        iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
 593                      key->tos, key->ttl, df, false);
 594        return;
 595
 596err_free_rt:
 597        ip_rt_put(rt);
 598err_free_skb:
 599        kfree_skb(skb);
 600        dev->stats.tx_dropped++;
 601}
 602
 603static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 604{
 605        struct ip_tunnel_info *info = skb_tunnel_info(skb);
 606        struct rtable *rt;
 607        struct flowi4 fl4;
 608
 609        if (ip_tunnel_info_af(info) != AF_INET)
 610                return -EINVAL;
 611
 612        rt = gre_get_rt(skb, dev, &fl4, &info->key);
 613        if (IS_ERR(rt))
 614                return PTR_ERR(rt);
 615
 616        ip_rt_put(rt);
 617        info->key.u.ipv4.src = fl4.saddr;
 618        return 0;
 619}
 620
 621static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 622                              struct net_device *dev)
 623{
 624        struct ip_tunnel *tunnel = netdev_priv(dev);
 625        const struct iphdr *tnl_params;
 626
 627        if (tunnel->collect_md) {
 628                gre_fb_xmit(skb, dev, skb->protocol);
 629                return NETDEV_TX_OK;
 630        }
 631
 632        if (dev->header_ops) {
 633                /* Need space for new headers */
 634                if (skb_cow_head(skb, dev->needed_headroom -
 635                                      (tunnel->hlen + sizeof(struct iphdr))))
 636                        goto free_skb;
 637
 638                tnl_params = (const struct iphdr *)skb->data;
 639
 640                /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
 641                 * to gre header.
 642                 */
 643                skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
 644                skb_reset_mac_header(skb);
 645        } else {
 646                if (skb_cow_head(skb, dev->needed_headroom))
 647                        goto free_skb;
 648
 649                tnl_params = &tunnel->parms.iph;
 650        }
 651
 652        skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
 653        if (IS_ERR(skb))
 654                goto out;
 655
 656        __gre_xmit(skb, dev, tnl_params, skb->protocol);
 657        return NETDEV_TX_OK;
 658
 659free_skb:
 660        kfree_skb(skb);
 661out:
 662        dev->stats.tx_dropped++;
 663        return NETDEV_TX_OK;
 664}
 665
 666static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 667                                struct net_device *dev)
 668{
 669        struct ip_tunnel *tunnel = netdev_priv(dev);
 670
 671        if (tunnel->collect_md) {
 672                gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
 673                return NETDEV_TX_OK;
 674        }
 675
 676        skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
 677        if (IS_ERR(skb))
 678                goto out;
 679
 680        if (skb_cow_head(skb, dev->needed_headroom))
 681                goto free_skb;
 682
 683        __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 684        return NETDEV_TX_OK;
 685
 686free_skb:
 687        kfree_skb(skb);
 688out:
 689        dev->stats.tx_dropped++;
 690        return NETDEV_TX_OK;
 691}
 692
 693static int ipgre_tunnel_ioctl(struct net_device *dev,
 694                              struct ifreq *ifr, int cmd)
 695{
 696        int err;
 697        struct ip_tunnel_parm p;
 698
 699        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 700                return -EFAULT;
 701        if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 702                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 703                    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
 704                    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
 705                        return -EINVAL;
 706        }
 707        p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
 708        p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 709
 710        err = ip_tunnel_ioctl(dev, &p, cmd);
 711        if (err)
 712                return err;
 713
 714        p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
 715        p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
 716
 717        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 718                return -EFAULT;
 719        return 0;
 720}
 721
 722/* Nice toy. Unfortunately, useless in real life :-)
 723   It allows to construct virtual multiprotocol broadcast "LAN"
 724   over the Internet, provided multicast routing is tuned.
 725
 726
 727   I have no idea was this bicycle invented before me,
 728   so that I had to set ARPHRD_IPGRE to a random value.
 729   I have an impression, that Cisco could make something similar,
 730   but this feature is apparently missing in IOS<=11.2(8).
 731
 732   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 733   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 734
 735   ping -t 255 224.66.66.66
 736
 737   If nobody answers, mbone does not work.
 738
 739   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 740   ip addr add 10.66.66.<somewhat>/24 dev Universe
 741   ifconfig Universe up
 742   ifconfig Universe add fe80::<Your_real_addr>/10
 743   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 744   ftp 10.66.66.66
 745   ...
 746   ftp fec0:6666:6666::193.233.7.65
 747   ...
 748 */
 749static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 750                        unsigned short type,
 751                        const void *daddr, const void *saddr, unsigned int len)
 752{
 753        struct ip_tunnel *t = netdev_priv(dev);
 754        struct iphdr *iph;
 755        struct gre_base_hdr *greh;
 756
 757        iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
 758        greh = (struct gre_base_hdr *)(iph+1);
 759        greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
 760        greh->protocol = htons(type);
 761
 762        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 763
 764        /* Set the source hardware address. */
 765        if (saddr)
 766                memcpy(&iph->saddr, saddr, 4);
 767        if (daddr)
 768                memcpy(&iph->daddr, daddr, 4);
 769        if (iph->daddr)
 770                return t->hlen + sizeof(*iph);
 771
 772        return -(t->hlen + sizeof(*iph));
 773}
 774
 775static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 776{
 777        const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
 778        memcpy(haddr, &iph->saddr, 4);
 779        return 4;
 780}
 781
 782static const struct header_ops ipgre_header_ops = {
 783        .create = ipgre_header,
 784        .parse  = ipgre_header_parse,
 785};
 786
 787#ifdef CONFIG_NET_IPGRE_BROADCAST
 788static int ipgre_open(struct net_device *dev)
 789{
 790        struct ip_tunnel *t = netdev_priv(dev);
 791
 792        if (ipv4_is_multicast(t->parms.iph.daddr)) {
 793                struct flowi4 fl4;
 794                struct rtable *rt;
 795
 796                rt = ip_route_output_gre(t->net, &fl4,
 797                                         t->parms.iph.daddr,
 798                                         t->parms.iph.saddr,
 799                                         t->parms.o_key,
 800                                         RT_TOS(t->parms.iph.tos),
 801                                         t->parms.link);
 802                if (IS_ERR(rt))
 803                        return -EADDRNOTAVAIL;
 804                dev = rt->dst.dev;
 805                ip_rt_put(rt);
 806                if (!__in_dev_get_rtnl(dev))
 807                        return -EADDRNOTAVAIL;
 808                t->mlink = dev->ifindex;
 809                ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 810        }
 811        return 0;
 812}
 813
 814static int ipgre_close(struct net_device *dev)
 815{
 816        struct ip_tunnel *t = netdev_priv(dev);
 817
 818        if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 819                struct in_device *in_dev;
 820                in_dev = inetdev_by_index(t->net, t->mlink);
 821                if (in_dev)
 822                        ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 823        }
 824        return 0;
 825}
 826#endif
 827
 828static const struct net_device_ops ipgre_netdev_ops = {
 829        .ndo_init               = ipgre_tunnel_init,
 830        .ndo_uninit             = ip_tunnel_uninit,
 831#ifdef CONFIG_NET_IPGRE_BROADCAST
 832        .ndo_open               = ipgre_open,
 833        .ndo_stop               = ipgre_close,
 834#endif
 835        .ndo_start_xmit         = ipgre_xmit,
 836        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
 837        .ndo_change_mtu         = ip_tunnel_change_mtu,
 838        .ndo_get_stats64        = ip_tunnel_get_stats64,
 839        .ndo_get_iflink         = ip_tunnel_get_iflink,
 840};
 841
 842#define GRE_FEATURES (NETIF_F_SG |              \
 843                      NETIF_F_FRAGLIST |        \
 844                      NETIF_F_HIGHDMA |         \
 845                      NETIF_F_HW_CSUM)
 846
 847static void ipgre_tunnel_setup(struct net_device *dev)
 848{
 849        dev->netdev_ops         = &ipgre_netdev_ops;
 850        dev->type               = ARPHRD_IPGRE;
 851        ip_tunnel_setup(dev, ipgre_net_id);
 852}
 853
 854static void __gre_tunnel_init(struct net_device *dev)
 855{
 856        struct ip_tunnel *tunnel;
 857        int t_hlen;
 858
 859        tunnel = netdev_priv(dev);
 860        tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
 861        tunnel->parms.iph.protocol = IPPROTO_GRE;
 862
 863        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 864
 865        t_hlen = tunnel->hlen + sizeof(struct iphdr);
 866
 867        dev->needed_headroom    = LL_MAX_HEADER + t_hlen + 4;
 868        dev->mtu                = ETH_DATA_LEN - t_hlen - 4;
 869
 870        dev->features           |= GRE_FEATURES;
 871        dev->hw_features        |= GRE_FEATURES;
 872
 873        if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
 874                /* TCP offload with GRE SEQ is not supported, nor
 875                 * can we support 2 levels of outer headers requiring
 876                 * an update.
 877                 */
 878                if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
 879                    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
 880                        dev->features    |= NETIF_F_GSO_SOFTWARE;
 881                        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 882                }
 883
 884                /* Can use a lockless transmit, unless we generate
 885                 * output sequences
 886                 */
 887                dev->features |= NETIF_F_LLTX;
 888        }
 889}
 890
 891static int ipgre_tunnel_init(struct net_device *dev)
 892{
 893        struct ip_tunnel *tunnel = netdev_priv(dev);
 894        struct iphdr *iph = &tunnel->parms.iph;
 895
 896        __gre_tunnel_init(dev);
 897
 898        memcpy(dev->dev_addr, &iph->saddr, 4);
 899        memcpy(dev->broadcast, &iph->daddr, 4);
 900
 901        dev->flags              = IFF_NOARP;
 902        netif_keep_dst(dev);
 903        dev->addr_len           = 4;
 904
 905        if (iph->daddr && !tunnel->collect_md) {
 906#ifdef CONFIG_NET_IPGRE_BROADCAST
 907                if (ipv4_is_multicast(iph->daddr)) {
 908                        if (!iph->saddr)
 909                                return -EINVAL;
 910                        dev->flags = IFF_BROADCAST;
 911                        dev->header_ops = &ipgre_header_ops;
 912                }
 913#endif
 914        } else if (!tunnel->collect_md) {
 915                dev->header_ops = &ipgre_header_ops;
 916        }
 917
 918        return ip_tunnel_init(dev);
 919}
 920
 921static const struct gre_protocol ipgre_protocol = {
 922        .handler     = gre_rcv,
 923        .err_handler = gre_err,
 924};
 925
 926static int __net_init ipgre_init_net(struct net *net)
 927{
 928        return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
 929}
 930
 931static void __net_exit ipgre_exit_net(struct net *net)
 932{
 933        struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
 934        ip_tunnel_delete_net(itn, &ipgre_link_ops);
 935}
 936
 937static struct pernet_operations ipgre_net_ops = {
 938        .init = ipgre_init_net,
 939        .exit = ipgre_exit_net,
 940        .id   = &ipgre_net_id,
 941        .size = sizeof(struct ip_tunnel_net),
 942};
 943
 944static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 945{
 946        __be16 flags;
 947
 948        if (!data)
 949                return 0;
 950
 951        flags = 0;
 952        if (data[IFLA_GRE_IFLAGS])
 953                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
 954        if (data[IFLA_GRE_OFLAGS])
 955                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
 956        if (flags & (GRE_VERSION|GRE_ROUTING))
 957                return -EINVAL;
 958
 959        if (data[IFLA_GRE_COLLECT_METADATA] &&
 960            data[IFLA_GRE_ENCAP_TYPE] &&
 961            nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
 962                return -EINVAL;
 963
 964        return 0;
 965}
 966
 967static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
 968{
 969        __be32 daddr;
 970
 971        if (tb[IFLA_ADDRESS]) {
 972                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
 973                        return -EINVAL;
 974                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 975                        return -EADDRNOTAVAIL;
 976        }
 977
 978        if (!data)
 979                goto out;
 980
 981        if (data[IFLA_GRE_REMOTE]) {
 982                memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
 983                if (!daddr)
 984                        return -EINVAL;
 985        }
 986
 987out:
 988        return ipgre_tunnel_validate(tb, data);
 989}
 990
 991static void ipgre_netlink_parms(struct net_device *dev,
 992                                struct nlattr *data[],
 993                                struct nlattr *tb[],
 994                                struct ip_tunnel_parm *parms)
 995{
 996        memset(parms, 0, sizeof(*parms));
 997
 998        parms->iph.protocol = IPPROTO_GRE;
 999
1000        if (!data)
1001                return;
1002
1003        if (data[IFLA_GRE_LINK])
1004                parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1005
1006        if (data[IFLA_GRE_IFLAGS])
1007                parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1008
1009        if (data[IFLA_GRE_OFLAGS])
1010                parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1011
1012        if (data[IFLA_GRE_IKEY])
1013                parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1014
1015        if (data[IFLA_GRE_OKEY])
1016                parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1017
1018        if (data[IFLA_GRE_LOCAL])
1019                parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1020
1021        if (data[IFLA_GRE_REMOTE])
1022                parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1023
1024        if (data[IFLA_GRE_TTL])
1025                parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1026
1027        if (data[IFLA_GRE_TOS])
1028                parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1029
1030        if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1031                parms->iph.frag_off = htons(IP_DF);
1032
1033        if (data[IFLA_GRE_COLLECT_METADATA]) {
1034                struct ip_tunnel *t = netdev_priv(dev);
1035
1036                t->collect_md = true;
1037                if (dev->type == ARPHRD_IPGRE)
1038                        dev->type = ARPHRD_NONE;
1039        }
1040}
1041
1042/* This function returns true when ENCAP attributes are present in the nl msg */
1043static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1044                                      struct ip_tunnel_encap *ipencap)
1045{
1046        bool ret = false;
1047
1048        memset(ipencap, 0, sizeof(*ipencap));
1049
1050        if (!data)
1051                return ret;
1052
1053        if (data[IFLA_GRE_ENCAP_TYPE]) {
1054                ret = true;
1055                ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1056        }
1057
1058        if (data[IFLA_GRE_ENCAP_FLAGS]) {
1059                ret = true;
1060                ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1061        }
1062
1063        if (data[IFLA_GRE_ENCAP_SPORT]) {
1064                ret = true;
1065                ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1066        }
1067
1068        if (data[IFLA_GRE_ENCAP_DPORT]) {
1069                ret = true;
1070                ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1071        }
1072
1073        return ret;
1074}
1075
1076static int gre_tap_init(struct net_device *dev)
1077{
1078        __gre_tunnel_init(dev);
1079        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1080
1081        return ip_tunnel_init(dev);
1082}
1083
1084static const struct net_device_ops gre_tap_netdev_ops = {
1085        .ndo_init               = gre_tap_init,
1086        .ndo_uninit             = ip_tunnel_uninit,
1087        .ndo_start_xmit         = gre_tap_xmit,
1088        .ndo_set_mac_address    = eth_mac_addr,
1089        .ndo_validate_addr      = eth_validate_addr,
1090        .ndo_change_mtu         = ip_tunnel_change_mtu,
1091        .ndo_get_stats64        = ip_tunnel_get_stats64,
1092        .ndo_get_iflink         = ip_tunnel_get_iflink,
1093        .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1094};
1095
1096static void ipgre_tap_setup(struct net_device *dev)
1097{
1098        ether_setup(dev);
1099        dev->netdev_ops = &gre_tap_netdev_ops;
1100        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1101        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1102        ip_tunnel_setup(dev, gre_tap_net_id);
1103}
1104
1105static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1106                         struct nlattr *tb[], struct nlattr *data[])
1107{
1108        struct ip_tunnel_parm p;
1109        struct ip_tunnel_encap ipencap;
1110
1111        if (ipgre_netlink_encap_parms(data, &ipencap)) {
1112                struct ip_tunnel *t = netdev_priv(dev);
1113                int err = ip_tunnel_encap_setup(t, &ipencap);
1114
1115                if (err < 0)
1116                        return err;
1117        }
1118
1119        ipgre_netlink_parms(dev, data, tb, &p);
1120        return ip_tunnel_newlink(dev, tb, &p);
1121}
1122
1123static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1124                            struct nlattr *data[])
1125{
1126        struct ip_tunnel_parm p;
1127        struct ip_tunnel_encap ipencap;
1128
1129        if (ipgre_netlink_encap_parms(data, &ipencap)) {
1130                struct ip_tunnel *t = netdev_priv(dev);
1131                int err = ip_tunnel_encap_setup(t, &ipencap);
1132
1133                if (err < 0)
1134                        return err;
1135        }
1136
1137        ipgre_netlink_parms(dev, data, tb, &p);
1138        return ip_tunnel_changelink(dev, tb, &p);
1139}
1140
1141static size_t ipgre_get_size(const struct net_device *dev)
1142{
1143        return
1144                /* IFLA_GRE_LINK */
1145                nla_total_size(4) +
1146                /* IFLA_GRE_IFLAGS */
1147                nla_total_size(2) +
1148                /* IFLA_GRE_OFLAGS */
1149                nla_total_size(2) +
1150                /* IFLA_GRE_IKEY */
1151                nla_total_size(4) +
1152                /* IFLA_GRE_OKEY */
1153                nla_total_size(4) +
1154                /* IFLA_GRE_LOCAL */
1155                nla_total_size(4) +
1156                /* IFLA_GRE_REMOTE */
1157                nla_total_size(4) +
1158                /* IFLA_GRE_TTL */
1159                nla_total_size(1) +
1160                /* IFLA_GRE_TOS */
1161                nla_total_size(1) +
1162                /* IFLA_GRE_PMTUDISC */
1163                nla_total_size(1) +
1164                /* IFLA_GRE_ENCAP_TYPE */
1165                nla_total_size(2) +
1166                /* IFLA_GRE_ENCAP_FLAGS */
1167                nla_total_size(2) +
1168                /* IFLA_GRE_ENCAP_SPORT */
1169                nla_total_size(2) +
1170                /* IFLA_GRE_ENCAP_DPORT */
1171                nla_total_size(2) +
1172                /* IFLA_GRE_COLLECT_METADATA */
1173                nla_total_size(0) +
1174                0;
1175}
1176
1177static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1178{
1179        struct ip_tunnel *t = netdev_priv(dev);
1180        struct ip_tunnel_parm *p = &t->parms;
1181
1182        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1183            nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1184            nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1185            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1186            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1187            nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1188            nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1189            nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1190            nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1191            nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1192                       !!(p->iph.frag_off & htons(IP_DF))))
1193                goto nla_put_failure;
1194
1195        if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1196                        t->encap.type) ||
1197            nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1198                         t->encap.sport) ||
1199            nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1200                         t->encap.dport) ||
1201            nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1202                        t->encap.flags))
1203                goto nla_put_failure;
1204
1205        if (t->collect_md) {
1206                if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1207                        goto nla_put_failure;
1208        }
1209
1210        return 0;
1211
1212nla_put_failure:
1213        return -EMSGSIZE;
1214}
1215
1216static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1217        [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1218        [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1219        [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1220        [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1221        [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1222        [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1223        [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1224        [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1225        [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1226        [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1227        [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1228        [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1229        [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1230        [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1231        [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1232};
1233
1234static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1235        .kind           = "gre",
1236        .maxtype        = IFLA_GRE_MAX,
1237        .policy         = ipgre_policy,
1238        .priv_size      = sizeof(struct ip_tunnel),
1239        .setup          = ipgre_tunnel_setup,
1240        .validate       = ipgre_tunnel_validate,
1241        .newlink        = ipgre_newlink,
1242        .changelink     = ipgre_changelink,
1243        .dellink        = ip_tunnel_dellink,
1244        .get_size       = ipgre_get_size,
1245        .fill_info      = ipgre_fill_info,
1246        .get_link_net   = ip_tunnel_get_link_net,
1247};
1248
1249static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1250        .kind           = "gretap",
1251        .maxtype        = IFLA_GRE_MAX,
1252        .policy         = ipgre_policy,
1253        .priv_size      = sizeof(struct ip_tunnel),
1254        .setup          = ipgre_tap_setup,
1255        .validate       = ipgre_tap_validate,
1256        .newlink        = ipgre_newlink,
1257        .changelink     = ipgre_changelink,
1258        .dellink        = ip_tunnel_dellink,
1259        .get_size       = ipgre_get_size,
1260        .fill_info      = ipgre_fill_info,
1261        .get_link_net   = ip_tunnel_get_link_net,
1262};
1263
1264struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1265                                        u8 name_assign_type)
1266{
1267        struct nlattr *tb[IFLA_MAX + 1];
1268        struct net_device *dev;
1269        struct ip_tunnel *t;
1270        int err;
1271
1272        memset(&tb, 0, sizeof(tb));
1273
1274        dev = rtnl_create_link(net, name, name_assign_type,
1275                               &ipgre_tap_ops, tb);
1276        if (IS_ERR(dev))
1277                return dev;
1278
1279        /* Configure flow based GRE device. */
1280        t = netdev_priv(dev);
1281        t->collect_md = true;
1282
1283        err = ipgre_newlink(net, dev, tb, NULL);
1284        if (err < 0)
1285                goto out;
1286
1287        /* openvswitch users expect packet sizes to be unrestricted,
1288         * so set the largest MTU we can.
1289         */
1290        err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1291        if (err)
1292                goto out;
1293
1294        return dev;
1295out:
1296        free_netdev(dev);
1297        return ERR_PTR(err);
1298}
1299EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1300
1301static int __net_init ipgre_tap_init_net(struct net *net)
1302{
1303        return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1304}
1305
1306static void __net_exit ipgre_tap_exit_net(struct net *net)
1307{
1308        struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
1309        ip_tunnel_delete_net(itn, &ipgre_tap_ops);
1310}
1311
1312static struct pernet_operations ipgre_tap_net_ops = {
1313        .init = ipgre_tap_init_net,
1314        .exit = ipgre_tap_exit_net,
1315        .id   = &gre_tap_net_id,
1316        .size = sizeof(struct ip_tunnel_net),
1317};
1318
1319static int __init ipgre_init(void)
1320{
1321        int err;
1322
1323        pr_info("GRE over IPv4 tunneling driver\n");
1324
1325        err = register_pernet_device(&ipgre_net_ops);
1326        if (err < 0)
1327                return err;
1328
1329        err = register_pernet_device(&ipgre_tap_net_ops);
1330        if (err < 0)
1331                goto pnet_tap_faied;
1332
1333        err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1334        if (err < 0) {
1335                pr_info("%s: can't add protocol\n", __func__);
1336                goto add_proto_failed;
1337        }
1338
1339        err = rtnl_link_register(&ipgre_link_ops);
1340        if (err < 0)
1341                goto rtnl_link_failed;
1342
1343        err = rtnl_link_register(&ipgre_tap_ops);
1344        if (err < 0)
1345                goto tap_ops_failed;
1346
1347        return 0;
1348
1349tap_ops_failed:
1350        rtnl_link_unregister(&ipgre_link_ops);
1351rtnl_link_failed:
1352        gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1353add_proto_failed:
1354        unregister_pernet_device(&ipgre_tap_net_ops);
1355pnet_tap_faied:
1356        unregister_pernet_device(&ipgre_net_ops);
1357        return err;
1358}
1359
1360static void __exit ipgre_fini(void)
1361{
1362        rtnl_link_unregister(&ipgre_tap_ops);
1363        rtnl_link_unregister(&ipgre_link_ops);
1364        gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1365        unregister_pernet_device(&ipgre_tap_net_ops);
1366        unregister_pernet_device(&ipgre_net_ops);
1367}
1368
1369module_init(ipgre_init);
1370module_exit(ipgre_fini);
1371MODULE_LICENSE("GPL");
1372MODULE_ALIAS_RTNL_LINK("gre");
1373MODULE_ALIAS_RTNL_LINK("gretap");
1374MODULE_ALIAS_NETDEV("gre0");
1375MODULE_ALIAS_NETDEV("gretap0");
1376