linux/net/ipv4/ip_gre.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     GRE over IP protocol decoder.
   3 *
   4 *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/capability.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18#include <linux/kernel.h>
  19#include <linux/slab.h>
  20#include <asm/uaccess.h>
  21#include <linux/skbuff.h>
  22#include <linux/netdevice.h>
  23#include <linux/in.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/if_arp.h>
  27#include <linux/mroute.h>
  28#include <linux/init.h>
  29#include <linux/in6.h>
  30#include <linux/inetdevice.h>
  31#include <linux/igmp.h>
  32#include <linux/netfilter_ipv4.h>
  33#include <linux/etherdevice.h>
  34#include <linux/if_ether.h>
  35
  36#include <net/sock.h>
  37#include <net/ip.h>
  38#include <net/icmp.h>
  39#include <net/protocol.h>
  40#include <net/ip_tunnels.h>
  41#include <net/arp.h>
  42#include <net/checksum.h>
  43#include <net/dsfield.h>
  44#include <net/inet_ecn.h>
  45#include <net/xfrm.h>
  46#include <net/net_namespace.h>
  47#include <net/netns/generic.h>
  48#include <net/rtnetlink.h>
  49#include <net/gre.h>
  50
  51#if IS_ENABLED(CONFIG_IPV6)
  52#include <net/ipv6.h>
  53#include <net/ip6_fib.h>
  54#include <net/ip6_route.h>
  55#endif
  56
  57/*
  58   Problems & solutions
  59   --------------------
  60
  61   1. The most important issue is detecting local dead loops.
  62   They would cause complete host lockup in transmit, which
  63   would be "resolved" by stack overflow or, if queueing is enabled,
  64   with infinite looping in net_bh.
  65
  66   We cannot track such dead loops during route installation,
  67   it is infeasible task. The most general solutions would be
  68   to keep skb->encapsulation counter (sort of local ttl),
  69   and silently drop packet when it expires. It is a good
  70   solution, but it supposes maintaining new variable in ALL
  71   skb, even if no tunneling is used.
  72
  73   Current solution: xmit_recursion breaks dead loops. This is a percpu
  74   counter, since when we enter the first ndo_xmit(), cpu migration is
  75   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  76
  77   2. Networking dead loops would not kill routers, but would really
  78   kill network. IP hop limit plays role of "t->recursion" in this case,
  79   if we copy it from packet being encapsulated to upper header.
  80   It is very good solution, but it introduces two problems:
  81
  82   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  83     do not work over tunnels.
  84   - traceroute does not work. I planned to relay ICMP from tunnel,
  85     so that this problem would be solved and traceroute output
  86     would even more informative. This idea appeared to be wrong:
  87     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  88     true router now :-)), all routers (at least, in neighbourhood of mine)
  89     return only 8 bytes of payload. It is the end.
  90
  91   Hence, if we want that OSPF worked or traceroute said something reasonable,
  92   we should search for another solution.
  93
  94   One of them is to parse packet trying to detect inner encapsulation
  95   made by our node. It is difficult or even impossible, especially,
  96   taking into account fragmentation. TO be short, ttl is not solution at all.
  97
  98   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  99   We force DF flag on tunnels with preconfigured hop limit,
 100   that is ALL. :-) Well, it does not remove the problem completely,
 101   but exponential growth of network traffic is changed to linear
 102   (branches, that exceed pmtu are pruned) and tunnel mtu
 103   rapidly degrades to value <68, where looping stops.
 104   Yes, it is not good if there exists a router in the loop,
 105   which does not force DF, even when encapsulating packets have DF set.
 106   But it is not our problem! Nobody could accuse us, we made
 107   all that we could make. Even if it is your gated who injected
 108   fatal route to network, even if it were you who configured
 109   fatal static route: you are innocent. :-)
 110
 111   Alexey Kuznetsov.
 112 */
 113
 114static bool log_ecn_error = true;
 115module_param(log_ecn_error, bool, 0644);
 116MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 117
 118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 119static int ipgre_tunnel_init(struct net_device *dev);
 120
 121static int ipgre_net_id __read_mostly;
 122static int gre_tap_net_id __read_mostly;
 123
 124static __sum16 check_checksum(struct sk_buff *skb)
 125{
 126        __sum16 csum = 0;
 127
 128        switch (skb->ip_summed) {
 129        case CHECKSUM_COMPLETE:
 130                csum = csum_fold(skb->csum);
 131
 132                if (!csum)
 133                        break;
 134                /* Fall through. */
 135
 136        case CHECKSUM_NONE:
 137                skb->csum = 0;
 138                csum = __skb_checksum_complete(skb);
 139                skb->ip_summed = CHECKSUM_COMPLETE;
 140                break;
 141        }
 142
 143        return csum;
 144}
 145
 146static int ip_gre_calc_hlen(__be16 o_flags)
 147{
 148        int addend = 4;
 149
 150        if (o_flags&TUNNEL_CSUM)
 151                addend += 4;
 152        if (o_flags&TUNNEL_KEY)
 153                addend += 4;
 154        if (o_flags&TUNNEL_SEQ)
 155                addend += 4;
 156        return addend;
 157}
 158
 159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 160                            bool *csum_err, int *hdr_len)
 161{
 162        unsigned int ip_hlen = ip_hdrlen(skb);
 163        const struct gre_base_hdr *greh;
 164        __be32 *options;
 165
 166        if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
 167                return -EINVAL;
 168
 169        greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
 170        if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
 171                return -EINVAL;
 172
 173        tpi->flags = gre_flags_to_tnl_flags(greh->flags);
 174        *hdr_len = ip_gre_calc_hlen(tpi->flags);
 175
 176        if (!pskb_may_pull(skb, *hdr_len))
 177                return -EINVAL;
 178
 179        greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
 180
 181        tpi->proto = greh->protocol;
 182
 183        options = (__be32 *)(greh + 1);
 184        if (greh->flags & GRE_CSUM) {
 185                if (check_checksum(skb)) {
 186                        *csum_err = true;
 187                        return -EINVAL;
 188                }
 189                options++;
 190        }
 191
 192        if (greh->flags & GRE_KEY) {
 193                tpi->key = *options;
 194                options++;
 195        } else
 196                tpi->key = 0;
 197
 198        if (unlikely(greh->flags & GRE_SEQ)) {
 199                tpi->seq = *options;
 200                options++;
 201        } else
 202                tpi->seq = 0;
 203
 204        /* WCCP version 1 and 2 protocol decoding.
 205         * - Change protocol to IP
 206         * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
 207         */
 208        if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
 209                tpi->proto = htons(ETH_P_IP);
 210                if ((*(u8 *)options & 0xF0) != 0x40) {
 211                        *hdr_len += 4;
 212                        if (!pskb_may_pull(skb, *hdr_len))
 213                                return -EINVAL;
 214                }
 215        }
 216
 217        return 0;
 218}
 219
 220static void ipgre_err(struct sk_buff *skb, u32 info)
 221{
 222
 223        /* All the routers (except for Linux) return only
 224           8 bytes of packet payload. It means, that precise relaying of
 225           ICMP in the real Internet is absolutely infeasible.
 226
 227           Moreover, Cisco "wise men" put GRE key to the third word
 228           in GRE header. It makes impossible maintaining even soft
 229           state for keyed GRE tunnels with enabled checksum. Tell
 230           them "thank you".
 231
 232           Well, I wonder, rfc1812 was written by Cisco employee,
 233           what the hell these idiots break standards established
 234           by themselves???
 235           */
 236        struct net *net = dev_net(skb->dev);
 237        struct ip_tunnel_net *itn;
 238        const struct iphdr *iph;
 239        const int type = icmp_hdr(skb)->type;
 240        const int code = icmp_hdr(skb)->code;
 241        struct ip_tunnel *t;
 242        struct tnl_ptk_info tpi;
 243        int hdr_len;
 244        bool csum_err = false;
 245
 246        if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
 247                if (!csum_err)          /* ignore csum errors. */
 248                        return;
 249        }
 250
 251        switch (type) {
 252        default:
 253        case ICMP_PARAMETERPROB:
 254                return;
 255
 256        case ICMP_DEST_UNREACH:
 257                switch (code) {
 258                case ICMP_SR_FAILED:
 259                case ICMP_PORT_UNREACH:
 260                        /* Impossible event. */
 261                        return;
 262                default:
 263                        /* All others are translated to HOST_UNREACH.
 264                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 265                           I believe they are just ether pollution. --ANK
 266                         */
 267                        break;
 268                }
 269                break;
 270        case ICMP_TIME_EXCEEDED:
 271                if (code != ICMP_EXC_TTL)
 272                        return;
 273                break;
 274
 275        case ICMP_REDIRECT:
 276                break;
 277        }
 278
 279        if (tpi.proto == htons(ETH_P_TEB))
 280                itn = net_generic(net, gre_tap_net_id);
 281        else
 282                itn = net_generic(net, ipgre_net_id);
 283
 284        iph = (const struct iphdr *)skb->data;
 285        t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
 286                             iph->daddr, iph->saddr, tpi.key);
 287
 288        if (t == NULL)
 289                return;
 290
 291        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 292                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 293                                 t->parms.link, 0, IPPROTO_GRE, 0);
 294                return;
 295        }
 296        if (type == ICMP_REDIRECT) {
 297                ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 298                              IPPROTO_GRE, 0);
 299                return;
 300        }
 301        if (t->parms.iph.daddr == 0 ||
 302            ipv4_is_multicast(t->parms.iph.daddr))
 303                return;
 304
 305        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 306                return;
 307
 308        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 309                t->err_count++;
 310        else
 311                t->err_count = 1;
 312        t->err_time = jiffies;
 313}
 314
 315static int ipgre_rcv(struct sk_buff *skb)
 316{
 317        struct net *net = dev_net(skb->dev);
 318        struct ip_tunnel_net *itn;
 319        const struct iphdr *iph;
 320        struct ip_tunnel *tunnel;
 321        struct tnl_ptk_info tpi;
 322        int hdr_len;
 323        bool csum_err = false;
 324
 325        if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
 326                goto drop;
 327
 328        if (tpi.proto == htons(ETH_P_TEB))
 329                itn = net_generic(net, gre_tap_net_id);
 330        else
 331                itn = net_generic(net, ipgre_net_id);
 332
 333        iph = ip_hdr(skb);
 334        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
 335                                  iph->saddr, iph->daddr, tpi.key);
 336
 337        if (tunnel) {
 338                ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
 339                return 0;
 340        }
 341        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 342drop:
 343        kfree_skb(skb);
 344        return 0;
 345}
 346
 347static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb)
 348{
 349        int err;
 350
 351        if (skb_is_gso(skb)) {
 352                err = skb_unclone(skb, GFP_ATOMIC);
 353                if (unlikely(err))
 354                        goto error;
 355                skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
 356                return skb;
 357        } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
 358                   tunnel->parms.o_flags&TUNNEL_CSUM) {
 359                err = skb_checksum_help(skb);
 360                if (unlikely(err))
 361                        goto error;
 362        } else if (skb->ip_summed != CHECKSUM_PARTIAL)
 363                skb->ip_summed = CHECKSUM_NONE;
 364
 365        return skb;
 366
 367error:
 368        kfree_skb(skb);
 369        return ERR_PTR(err);
 370}
 371
 372static struct sk_buff *gre_build_header(struct sk_buff *skb,
 373                                        const struct tnl_ptk_info *tpi,
 374                                        int hdr_len)
 375{
 376        struct gre_base_hdr *greh;
 377
 378        skb_push(skb, hdr_len);
 379
 380        greh = (struct gre_base_hdr *)skb->data;
 381        greh->flags = tnl_flags_to_gre_flags(tpi->flags);
 382        greh->protocol = tpi->proto;
 383
 384        if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
 385                __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
 386
 387                if (tpi->flags&TUNNEL_SEQ) {
 388                        *ptr = tpi->seq;
 389                        ptr--;
 390                }
 391                if (tpi->flags&TUNNEL_KEY) {
 392                        *ptr = tpi->key;
 393                        ptr--;
 394                }
 395                if (tpi->flags&TUNNEL_CSUM &&
 396                    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
 397                        *(__sum16 *)ptr = 0;
 398                        *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
 399                                                                 skb->len, 0));
 400                }
 401        }
 402
 403        return skb;
 404}
 405
 406static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 407                       const struct iphdr *tnl_params,
 408                       __be16 proto)
 409{
 410        struct ip_tunnel *tunnel = netdev_priv(dev);
 411        struct tnl_ptk_info tpi;
 412
 413        if (likely(!skb->encapsulation)) {
 414                skb_reset_inner_headers(skb);
 415                skb->encapsulation = 1;
 416        }
 417
 418        tpi.flags = tunnel->parms.o_flags;
 419        tpi.proto = proto;
 420        tpi.key = tunnel->parms.o_key;
 421        if (tunnel->parms.o_flags & TUNNEL_SEQ)
 422                tunnel->o_seqno++;
 423        tpi.seq = htonl(tunnel->o_seqno);
 424
 425        /* Push GRE header. */
 426        skb = gre_build_header(skb, &tpi, tunnel->hlen);
 427        if (unlikely(!skb)) {
 428                dev->stats.tx_dropped++;
 429                return;
 430        }
 431
 432        ip_tunnel_xmit(skb, dev, tnl_params);
 433}
 434
 435static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 436                              struct net_device *dev)
 437{
 438        struct ip_tunnel *tunnel = netdev_priv(dev);
 439        const struct iphdr *tnl_params;
 440
 441        skb = handle_offloads(tunnel, skb);
 442        if (IS_ERR(skb))
 443                goto out;
 444
 445        if (dev->header_ops) {
 446                /* Need space for new headers */
 447                if (skb_cow_head(skb, dev->needed_headroom -
 448                                      (tunnel->hlen + sizeof(struct iphdr))))
 449                        goto free_skb;
 450
 451                tnl_params = (const struct iphdr *)skb->data;
 452
 453                /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
 454                 * to gre header.
 455                 */
 456                skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
 457        } else {
 458                if (skb_cow_head(skb, dev->needed_headroom))
 459                        goto free_skb;
 460
 461                tnl_params = &tunnel->parms.iph;
 462        }
 463
 464        __gre_xmit(skb, dev, tnl_params, skb->protocol);
 465
 466        return NETDEV_TX_OK;
 467
 468free_skb:
 469        dev_kfree_skb(skb);
 470out:
 471        dev->stats.tx_dropped++;
 472        return NETDEV_TX_OK;
 473}
 474
 475static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 476                                struct net_device *dev)
 477{
 478        struct ip_tunnel *tunnel = netdev_priv(dev);
 479
 480        skb = handle_offloads(tunnel, skb);
 481        if (IS_ERR(skb))
 482                goto out;
 483
 484        if (skb_cow_head(skb, dev->needed_headroom))
 485                goto free_skb;
 486
 487        __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 488
 489        return NETDEV_TX_OK;
 490
 491free_skb:
 492        dev_kfree_skb(skb);
 493out:
 494        dev->stats.tx_dropped++;
 495        return NETDEV_TX_OK;
 496}
 497
 498static int ipgre_tunnel_ioctl(struct net_device *dev,
 499                              struct ifreq *ifr, int cmd)
 500{
 501        int err = 0;
 502        struct ip_tunnel_parm p;
 503
 504        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 505                return -EFAULT;
 506        if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 507            p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
 508            ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
 509                return -EINVAL;
 510        }
 511        p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
 512        p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 513
 514        err = ip_tunnel_ioctl(dev, &p, cmd);
 515        if (err)
 516                return err;
 517
 518        p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
 519        p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
 520
 521        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 522                return -EFAULT;
 523        return 0;
 524}
 525
 526/* Nice toy. Unfortunately, useless in real life :-)
 527   It allows to construct virtual multiprotocol broadcast "LAN"
 528   over the Internet, provided multicast routing is tuned.
 529
 530
 531   I have no idea was this bicycle invented before me,
 532   so that I had to set ARPHRD_IPGRE to a random value.
 533   I have an impression, that Cisco could make something similar,
 534   but this feature is apparently missing in IOS<=11.2(8).
 535
 536   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 537   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 538
 539   ping -t 255 224.66.66.66
 540
 541   If nobody answers, mbone does not work.
 542
 543   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 544   ip addr add 10.66.66.<somewhat>/24 dev Universe
 545   ifconfig Universe up
 546   ifconfig Universe add fe80::<Your_real_addr>/10
 547   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 548   ftp 10.66.66.66
 549   ...
 550   ftp fec0:6666:6666::193.233.7.65
 551   ...
 552 */
 553static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 554                        unsigned short type,
 555                        const void *daddr, const void *saddr, unsigned int len)
 556{
 557        struct ip_tunnel *t = netdev_priv(dev);
 558        struct iphdr *iph;
 559        struct gre_base_hdr *greh;
 560
 561        iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
 562        greh = (struct gre_base_hdr *)(iph+1);
 563        greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
 564        greh->protocol = htons(type);
 565
 566        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 567
 568        /* Set the source hardware address. */
 569        if (saddr)
 570                memcpy(&iph->saddr, saddr, 4);
 571        if (daddr)
 572                memcpy(&iph->daddr, daddr, 4);
 573        if (iph->daddr)
 574                return t->hlen;
 575
 576        return -(t->hlen + sizeof(*iph));
 577}
 578
 579static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 580{
 581        const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
 582        memcpy(haddr, &iph->saddr, 4);
 583        return 4;
 584}
 585
 586static const struct header_ops ipgre_header_ops = {
 587        .create = ipgre_header,
 588        .parse  = ipgre_header_parse,
 589};
 590
 591#ifdef CONFIG_NET_IPGRE_BROADCAST
 592static int ipgre_open(struct net_device *dev)
 593{
 594        struct ip_tunnel *t = netdev_priv(dev);
 595
 596        if (ipv4_is_multicast(t->parms.iph.daddr)) {
 597                struct flowi4 fl4;
 598                struct rtable *rt;
 599
 600                rt = ip_route_output_gre(dev_net(dev), &fl4,
 601                                         t->parms.iph.daddr,
 602                                         t->parms.iph.saddr,
 603                                         t->parms.o_key,
 604                                         RT_TOS(t->parms.iph.tos),
 605                                         t->parms.link);
 606                if (IS_ERR(rt))
 607                        return -EADDRNOTAVAIL;
 608                dev = rt->dst.dev;
 609                ip_rt_put(rt);
 610                if (__in_dev_get_rtnl(dev) == NULL)
 611                        return -EADDRNOTAVAIL;
 612                t->mlink = dev->ifindex;
 613                ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 614        }
 615        return 0;
 616}
 617
 618static int ipgre_close(struct net_device *dev)
 619{
 620        struct ip_tunnel *t = netdev_priv(dev);
 621
 622        if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 623                struct in_device *in_dev;
 624                in_dev = inetdev_by_index(dev_net(dev), t->mlink);
 625                if (in_dev)
 626                        ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 627        }
 628        return 0;
 629}
 630#endif
 631
 632static const struct net_device_ops ipgre_netdev_ops = {
 633        .ndo_init               = ipgre_tunnel_init,
 634        .ndo_uninit             = ip_tunnel_uninit,
 635#ifdef CONFIG_NET_IPGRE_BROADCAST
 636        .ndo_open               = ipgre_open,
 637        .ndo_stop               = ipgre_close,
 638#endif
 639        .ndo_start_xmit         = ipgre_xmit,
 640        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
 641        .ndo_change_mtu         = ip_tunnel_change_mtu,
 642        .ndo_get_stats64        = ip_tunnel_get_stats64,
 643};
 644
 645#define GRE_FEATURES (NETIF_F_SG |              \
 646                      NETIF_F_FRAGLIST |        \
 647                      NETIF_F_HIGHDMA |         \
 648                      NETIF_F_HW_CSUM)
 649
 650static void ipgre_tunnel_setup(struct net_device *dev)
 651{
 652        dev->netdev_ops         = &ipgre_netdev_ops;
 653        ip_tunnel_setup(dev, ipgre_net_id);
 654}
 655
 656static void __gre_tunnel_init(struct net_device *dev)
 657{
 658        struct ip_tunnel *tunnel;
 659
 660        tunnel = netdev_priv(dev);
 661        tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
 662        tunnel->parms.iph.protocol = IPPROTO_GRE;
 663
 664        dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
 665        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
 666
 667        dev->features           |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
 668        dev->hw_features        |= GRE_FEATURES;
 669
 670        if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
 671                /* TCP offload with GRE SEQ is not supported. */
 672                dev->features    |= NETIF_F_GSO_SOFTWARE;
 673                dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 674                /* Can use a lockless transmit, unless we generate
 675                 * output sequences
 676                 */
 677                dev->features |= NETIF_F_LLTX;
 678        }
 679}
 680
 681static int ipgre_tunnel_init(struct net_device *dev)
 682{
 683        struct ip_tunnel *tunnel = netdev_priv(dev);
 684        struct iphdr *iph = &tunnel->parms.iph;
 685
 686        __gre_tunnel_init(dev);
 687
 688        memcpy(dev->dev_addr, &iph->saddr, 4);
 689        memcpy(dev->broadcast, &iph->daddr, 4);
 690
 691        dev->type               = ARPHRD_IPGRE;
 692        dev->flags              = IFF_NOARP;
 693        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 694        dev->addr_len           = 4;
 695
 696        if (iph->daddr) {
 697#ifdef CONFIG_NET_IPGRE_BROADCAST
 698                if (ipv4_is_multicast(iph->daddr)) {
 699                        if (!iph->saddr)
 700                                return -EINVAL;
 701                        dev->flags = IFF_BROADCAST;
 702                        dev->header_ops = &ipgre_header_ops;
 703                }
 704#endif
 705        } else
 706                dev->header_ops = &ipgre_header_ops;
 707
 708        return ip_tunnel_init(dev);
 709}
 710
 711static const struct gre_protocol ipgre_protocol = {
 712        .handler     = ipgre_rcv,
 713        .err_handler = ipgre_err,
 714};
 715
 716static int __net_init ipgre_init_net(struct net *net)
 717{
 718        return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
 719}
 720
 721static void __net_exit ipgre_exit_net(struct net *net)
 722{
 723        struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
 724        ip_tunnel_delete_net(itn);
 725}
 726
 727static struct pernet_operations ipgre_net_ops = {
 728        .init = ipgre_init_net,
 729        .exit = ipgre_exit_net,
 730        .id   = &ipgre_net_id,
 731        .size = sizeof(struct ip_tunnel_net),
 732};
 733
 734static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 735{
 736        __be16 flags;
 737
 738        if (!data)
 739                return 0;
 740
 741        flags = 0;
 742        if (data[IFLA_GRE_IFLAGS])
 743                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
 744        if (data[IFLA_GRE_OFLAGS])
 745                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
 746        if (flags & (GRE_VERSION|GRE_ROUTING))
 747                return -EINVAL;
 748
 749        return 0;
 750}
 751
 752static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
 753{
 754        __be32 daddr;
 755
 756        if (tb[IFLA_ADDRESS]) {
 757                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
 758                        return -EINVAL;
 759                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
 760                        return -EADDRNOTAVAIL;
 761        }
 762
 763        if (!data)
 764                goto out;
 765
 766        if (data[IFLA_GRE_REMOTE]) {
 767                memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
 768                if (!daddr)
 769                        return -EINVAL;
 770        }
 771
 772out:
 773        return ipgre_tunnel_validate(tb, data);
 774}
 775
 776static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
 777                               struct ip_tunnel_parm *parms)
 778{
 779        memset(parms, 0, sizeof(*parms));
 780
 781        parms->iph.protocol = IPPROTO_GRE;
 782
 783        if (!data)
 784                return;
 785
 786        if (data[IFLA_GRE_LINK])
 787                parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
 788
 789        if (data[IFLA_GRE_IFLAGS])
 790                parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
 791
 792        if (data[IFLA_GRE_OFLAGS])
 793                parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
 794
 795        if (data[IFLA_GRE_IKEY])
 796                parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
 797
 798        if (data[IFLA_GRE_OKEY])
 799                parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
 800
 801        if (data[IFLA_GRE_LOCAL])
 802                parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
 803
 804        if (data[IFLA_GRE_REMOTE])
 805                parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
 806
 807        if (data[IFLA_GRE_TTL])
 808                parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
 809
 810        if (data[IFLA_GRE_TOS])
 811                parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
 812
 813        if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
 814                parms->iph.frag_off = htons(IP_DF);
 815}
 816
 817static int gre_tap_init(struct net_device *dev)
 818{
 819        __gre_tunnel_init(dev);
 820
 821        return ip_tunnel_init(dev);
 822}
 823
 824static const struct net_device_ops gre_tap_netdev_ops = {
 825        .ndo_init               = gre_tap_init,
 826        .ndo_uninit             = ip_tunnel_uninit,
 827        .ndo_start_xmit         = gre_tap_xmit,
 828        .ndo_set_mac_address    = eth_mac_addr,
 829        .ndo_validate_addr      = eth_validate_addr,
 830        .ndo_change_mtu         = ip_tunnel_change_mtu,
 831        .ndo_get_stats64        = ip_tunnel_get_stats64,
 832};
 833
 834static void ipgre_tap_setup(struct net_device *dev)
 835{
 836        ether_setup(dev);
 837        dev->netdev_ops         = &gre_tap_netdev_ops;
 838        ip_tunnel_setup(dev, gre_tap_net_id);
 839}
 840
 841static int ipgre_newlink(struct net *src_net, struct net_device *dev,
 842                         struct nlattr *tb[], struct nlattr *data[])
 843{
 844        struct ip_tunnel_parm p;
 845
 846        ipgre_netlink_parms(data, tb, &p);
 847        return ip_tunnel_newlink(dev, tb, &p);
 848}
 849
 850static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
 851                            struct nlattr *data[])
 852{
 853        struct ip_tunnel_parm p;
 854
 855        ipgre_netlink_parms(data, tb, &p);
 856        return ip_tunnel_changelink(dev, tb, &p);
 857}
 858
 859static size_t ipgre_get_size(const struct net_device *dev)
 860{
 861        return
 862                /* IFLA_GRE_LINK */
 863                nla_total_size(4) +
 864                /* IFLA_GRE_IFLAGS */
 865                nla_total_size(2) +
 866                /* IFLA_GRE_OFLAGS */
 867                nla_total_size(2) +
 868                /* IFLA_GRE_IKEY */
 869                nla_total_size(4) +
 870                /* IFLA_GRE_OKEY */
 871                nla_total_size(4) +
 872                /* IFLA_GRE_LOCAL */
 873                nla_total_size(4) +
 874                /* IFLA_GRE_REMOTE */
 875                nla_total_size(4) +
 876                /* IFLA_GRE_TTL */
 877                nla_total_size(1) +
 878                /* IFLA_GRE_TOS */
 879                nla_total_size(1) +
 880                /* IFLA_GRE_PMTUDISC */
 881                nla_total_size(1) +
 882                0;
 883}
 884
 885static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 886{
 887        struct ip_tunnel *t = netdev_priv(dev);
 888        struct ip_tunnel_parm *p = &t->parms;
 889
 890        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
 891            nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
 892            nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
 893            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
 894            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
 895            nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
 896            nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
 897            nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
 898            nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
 899            nla_put_u8(skb, IFLA_GRE_PMTUDISC,
 900                       !!(p->iph.frag_off & htons(IP_DF))))
 901                goto nla_put_failure;
 902        return 0;
 903
 904nla_put_failure:
 905        return -EMSGSIZE;
 906}
 907
 908static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
 909        [IFLA_GRE_LINK]         = { .type = NLA_U32 },
 910        [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
 911        [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
 912        [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
 913        [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
 914        [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 915        [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 916        [IFLA_GRE_TTL]          = { .type = NLA_U8 },
 917        [IFLA_GRE_TOS]          = { .type = NLA_U8 },
 918        [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
 919};
 920
 921static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
 922        .kind           = "gre",
 923        .maxtype        = IFLA_GRE_MAX,
 924        .policy         = ipgre_policy,
 925        .priv_size      = sizeof(struct ip_tunnel),
 926        .setup          = ipgre_tunnel_setup,
 927        .validate       = ipgre_tunnel_validate,
 928        .newlink        = ipgre_newlink,
 929        .changelink     = ipgre_changelink,
 930        .dellink        = ip_tunnel_dellink,
 931        .get_size       = ipgre_get_size,
 932        .fill_info      = ipgre_fill_info,
 933};
 934
 935static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 936        .kind           = "gretap",
 937        .maxtype        = IFLA_GRE_MAX,
 938        .policy         = ipgre_policy,
 939        .priv_size      = sizeof(struct ip_tunnel),
 940        .setup          = ipgre_tap_setup,
 941        .validate       = ipgre_tap_validate,
 942        .newlink        = ipgre_newlink,
 943        .changelink     = ipgre_changelink,
 944        .dellink        = ip_tunnel_dellink,
 945        .get_size       = ipgre_get_size,
 946        .fill_info      = ipgre_fill_info,
 947};
 948
 949static int __net_init ipgre_tap_init_net(struct net *net)
 950{
 951        return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
 952}
 953
 954static void __net_exit ipgre_tap_exit_net(struct net *net)
 955{
 956        struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
 957        ip_tunnel_delete_net(itn);
 958}
 959
 960static struct pernet_operations ipgre_tap_net_ops = {
 961        .init = ipgre_tap_init_net,
 962        .exit = ipgre_tap_exit_net,
 963        .id   = &gre_tap_net_id,
 964        .size = sizeof(struct ip_tunnel_net),
 965};
 966
 967static int __init ipgre_init(void)
 968{
 969        int err;
 970
 971        pr_info("GRE over IPv4 tunneling driver\n");
 972
 973        err = register_pernet_device(&ipgre_net_ops);
 974        if (err < 0)
 975                return err;
 976
 977        err = register_pernet_device(&ipgre_tap_net_ops);
 978        if (err < 0)
 979                goto pnet_tap_faied;
 980
 981        err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
 982        if (err < 0) {
 983                pr_info("%s: can't add protocol\n", __func__);
 984                goto add_proto_failed;
 985        }
 986
 987        err = rtnl_link_register(&ipgre_link_ops);
 988        if (err < 0)
 989                goto rtnl_link_failed;
 990
 991        err = rtnl_link_register(&ipgre_tap_ops);
 992        if (err < 0)
 993                goto tap_ops_failed;
 994
 995        return 0;
 996
 997tap_ops_failed:
 998        rtnl_link_unregister(&ipgre_link_ops);
 999rtnl_link_failed:
1000        gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1001add_proto_failed:
1002        unregister_pernet_device(&ipgre_tap_net_ops);
1003pnet_tap_faied:
1004        unregister_pernet_device(&ipgre_net_ops);
1005        return err;
1006}
1007
1008static void __exit ipgre_fini(void)
1009{
1010        rtnl_link_unregister(&ipgre_tap_ops);
1011        rtnl_link_unregister(&ipgre_link_ops);
1012        if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1013                pr_info("%s: can't remove protocol\n", __func__);
1014        unregister_pernet_device(&ipgre_tap_net_ops);
1015        unregister_pernet_device(&ipgre_net_ops);
1016}
1017
1018module_init(ipgre_init);
1019module_exit(ipgre_fini);
1020MODULE_LICENSE("GPL");
1021MODULE_ALIAS_RTNL_LINK("gre");
1022MODULE_ALIAS_RTNL_LINK("gretap");
1023MODULE_ALIAS_NETDEV("gre0");
1024MODULE_ALIAS_NETDEV("gretap0");
1025