linux/net/ipv6/seg6_iptunnel.c
<<
>>
Prefs
   1/*
   2 *  SR-IPv6 implementation
   3 *
   4 *  Author:
   5 *  David Lebrun <david.lebrun@uclouvain.be>
   6 *
   7 *
   8 *  This program is free software; you can redistribute it and/or
   9 *        modify it under the terms of the GNU General Public License
  10 *        as published by the Free Software Foundation; either version
  11 *        2 of the License, or (at your option) any later version.
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/skbuff.h>
  16#include <linux/net.h>
  17#include <linux/module.h>
  18#include <net/ip.h>
  19#include <net/ip_tunnels.h>
  20#include <net/lwtunnel.h>
  21#include <net/netevent.h>
  22#include <net/netns/generic.h>
  23#include <net/ip6_fib.h>
  24#include <net/route.h>
  25#include <net/seg6.h>
  26#include <linux/seg6.h>
  27#include <linux/seg6_iptunnel.h>
  28#include <net/addrconf.h>
  29#include <net/ip6_route.h>
  30#include <net/dst_cache.h>
  31#ifdef CONFIG_IPV6_SEG6_HMAC
  32#include <net/seg6_hmac.h>
  33#endif
  34
  35struct seg6_lwt {
  36        struct dst_cache cache;
  37        struct seg6_iptunnel_encap tuninfo[0];
  38};
  39
  40static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
  41{
  42        return (struct seg6_lwt *)lwt->data;
  43}
  44
  45static inline struct seg6_iptunnel_encap *
  46seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
  47{
  48        return seg6_lwt_lwtunnel(lwt)->tuninfo;
  49}
  50
  51static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
  52        [SEG6_IPTUNNEL_SRH]     = { .type = NLA_BINARY },
  53};
  54
  55static int nla_put_srh(struct sk_buff *skb, int attrtype,
  56                       struct seg6_iptunnel_encap *tuninfo)
  57{
  58        struct seg6_iptunnel_encap *data;
  59        struct nlattr *nla;
  60        int len;
  61
  62        len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
  63
  64        nla = nla_reserve(skb, attrtype, len);
  65        if (!nla)
  66                return -EMSGSIZE;
  67
  68        data = nla_data(nla);
  69        memcpy(data, tuninfo, len);
  70
  71        return 0;
  72}
  73
  74static void set_tun_src(struct net *net, struct net_device *dev,
  75                        struct in6_addr *daddr, struct in6_addr *saddr)
  76{
  77        struct seg6_pernet_data *sdata = seg6_pernet(net);
  78        struct in6_addr *tun_src;
  79
  80        rcu_read_lock();
  81
  82        tun_src = rcu_dereference(sdata->tun_src);
  83
  84        if (!ipv6_addr_any(tun_src)) {
  85                memcpy(saddr, tun_src, sizeof(struct in6_addr));
  86        } else {
  87                ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
  88                                   saddr);
  89        }
  90
  91        rcu_read_unlock();
  92}
  93
  94/* Compute flowlabel for outer IPv6 header */
  95static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
  96                                  struct ipv6hdr *inner_hdr)
  97{
  98        int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
  99        __be32 flowlabel = 0;
 100        u32 hash;
 101
 102        if (do_flowlabel > 0) {
 103                hash = skb_get_hash(skb);
 104                hash = rol32(hash, 16);
 105                flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 106        } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
 107                flowlabel = ip6_flowlabel(inner_hdr);
 108        }
 109        return flowlabel;
 110}
 111
 112/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 113int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 114{
 115        struct dst_entry *dst = skb_dst(skb);
 116        struct net *net = dev_net(dst->dev);
 117        struct ipv6hdr *hdr, *inner_hdr;
 118        struct ipv6_sr_hdr *isrh;
 119        int hdrlen, tot_len, err;
 120        __be32 flowlabel;
 121
 122        hdrlen = (osrh->hdrlen + 1) << 3;
 123        tot_len = hdrlen + sizeof(*hdr);
 124
 125        err = skb_cow_head(skb, tot_len + skb->mac_len);
 126        if (unlikely(err))
 127                return err;
 128
 129        inner_hdr = ipv6_hdr(skb);
 130        flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
 131
 132        skb_push(skb, tot_len);
 133        skb_reset_network_header(skb);
 134        skb_mac_header_rebuild(skb);
 135        hdr = ipv6_hdr(skb);
 136
 137        /* inherit tc, flowlabel and hlim
 138         * hlim will be decremented in ip6_forward() afterwards and
 139         * decapsulation will overwrite inner hlim with outer hlim
 140         */
 141
 142        if (skb->protocol == htons(ETH_P_IPV6)) {
 143                ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
 144                             flowlabel);
 145                hdr->hop_limit = inner_hdr->hop_limit;
 146        } else {
 147                ip6_flow_hdr(hdr, 0, flowlabel);
 148                hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
 149        }
 150
 151        hdr->nexthdr = NEXTHDR_ROUTING;
 152
 153        isrh = (void *)hdr + sizeof(*hdr);
 154        memcpy(isrh, osrh, hdrlen);
 155
 156        isrh->nexthdr = proto;
 157
 158        hdr->daddr = isrh->segments[isrh->first_segment];
 159        set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
 160
 161#ifdef CONFIG_IPV6_SEG6_HMAC
 162        if (sr_has_hmac(isrh)) {
 163                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 164                if (unlikely(err))
 165                        return err;
 166        }
 167#endif
 168
 169        skb_postpush_rcsum(skb, hdr, tot_len);
 170
 171        return 0;
 172}
 173EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 174
 175/* insert an SRH within an IPv6 packet, just after the IPv6 header */
 176int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 177{
 178        struct ipv6hdr *hdr, *oldhdr;
 179        struct ipv6_sr_hdr *isrh;
 180        int hdrlen, err;
 181
 182        hdrlen = (osrh->hdrlen + 1) << 3;
 183
 184        err = skb_cow_head(skb, hdrlen + skb->mac_len);
 185        if (unlikely(err))
 186                return err;
 187
 188        oldhdr = ipv6_hdr(skb);
 189
 190        skb_pull(skb, sizeof(struct ipv6hdr));
 191        skb_postpull_rcsum(skb, skb_network_header(skb),
 192                           sizeof(struct ipv6hdr));
 193
 194        skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
 195        skb_reset_network_header(skb);
 196        skb_mac_header_rebuild(skb);
 197
 198        hdr = ipv6_hdr(skb);
 199
 200        memmove(hdr, oldhdr, sizeof(*hdr));
 201
 202        isrh = (void *)hdr + sizeof(*hdr);
 203        memcpy(isrh, osrh, hdrlen);
 204
 205        isrh->nexthdr = hdr->nexthdr;
 206        hdr->nexthdr = NEXTHDR_ROUTING;
 207
 208        isrh->segments[0] = hdr->daddr;
 209        hdr->daddr = isrh->segments[isrh->first_segment];
 210
 211#ifdef CONFIG_IPV6_SEG6_HMAC
 212        if (sr_has_hmac(isrh)) {
 213                struct net *net = dev_net(skb_dst(skb)->dev);
 214
 215                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 216                if (unlikely(err))
 217                        return err;
 218        }
 219#endif
 220
 221        skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
 222
 223        return 0;
 224}
 225EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 226
 227static int seg6_do_srh(struct sk_buff *skb)
 228{
 229        struct dst_entry *dst = skb_dst(skb);
 230        struct seg6_iptunnel_encap *tinfo;
 231        int proto, err = 0;
 232
 233        tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 234
 235        switch (tinfo->mode) {
 236        case SEG6_IPTUN_MODE_INLINE:
 237                if (skb->protocol != htons(ETH_P_IPV6))
 238                        return -EINVAL;
 239
 240                err = seg6_do_srh_inline(skb, tinfo->srh);
 241                if (err)
 242                        return err;
 243                break;
 244        case SEG6_IPTUN_MODE_ENCAP:
 245                err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
 246                if (err)
 247                        return err;
 248
 249                if (skb->protocol == htons(ETH_P_IPV6))
 250                        proto = IPPROTO_IPV6;
 251                else if (skb->protocol == htons(ETH_P_IP))
 252                        proto = IPPROTO_IPIP;
 253                else
 254                        return -EINVAL;
 255
 256                err = seg6_do_srh_encap(skb, tinfo->srh, proto);
 257                if (err)
 258                        return err;
 259
 260                skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 261                skb_set_inner_protocol(skb, skb->protocol);
 262                skb->protocol = htons(ETH_P_IPV6);
 263                break;
 264        case SEG6_IPTUN_MODE_L2ENCAP:
 265                if (!skb_mac_header_was_set(skb))
 266                        return -EINVAL;
 267
 268                if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
 269                        return -ENOMEM;
 270
 271                skb_mac_header_rebuild(skb);
 272                skb_push(skb, skb->mac_len);
 273
 274                err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
 275                if (err)
 276                        return err;
 277
 278                skb->protocol = htons(ETH_P_IPV6);
 279                break;
 280        }
 281
 282        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 283        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 284
 285        return 0;
 286}
 287
 288static int seg6_input(struct sk_buff *skb)
 289{
 290        struct dst_entry *orig_dst = skb_dst(skb);
 291        struct dst_entry *dst = NULL;
 292        struct seg6_lwt *slwt;
 293        int err;
 294
 295        err = seg6_do_srh(skb);
 296        if (unlikely(err)) {
 297                kfree_skb(skb);
 298                return err;
 299        }
 300
 301        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 302
 303        preempt_disable();
 304        dst = dst_cache_get(&slwt->cache);
 305        preempt_enable();
 306
 307        skb_dst_drop(skb);
 308
 309        if (!dst) {
 310                ip6_route_input(skb);
 311                dst = skb_dst(skb);
 312                if (!dst->error) {
 313                        preempt_disable();
 314                        dst_cache_set_ip6(&slwt->cache, dst,
 315                                          &ipv6_hdr(skb)->saddr);
 316                        preempt_enable();
 317                }
 318        } else {
 319                skb_dst_set(skb, dst);
 320        }
 321
 322        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 323        if (unlikely(err))
 324                return err;
 325
 326        return dst_input(skb);
 327}
 328
 329static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 330{
 331        struct dst_entry *orig_dst = skb_dst(skb);
 332        struct dst_entry *dst = NULL;
 333        struct seg6_lwt *slwt;
 334        int err = -EINVAL;
 335
 336        err = seg6_do_srh(skb);
 337        if (unlikely(err))
 338                goto drop;
 339
 340        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 341
 342        preempt_disable();
 343        dst = dst_cache_get(&slwt->cache);
 344        preempt_enable();
 345
 346        if (unlikely(!dst)) {
 347                struct ipv6hdr *hdr = ipv6_hdr(skb);
 348                struct flowi6 fl6;
 349
 350                memset(&fl6, 0, sizeof(fl6));
 351                fl6.daddr = hdr->daddr;
 352                fl6.saddr = hdr->saddr;
 353                fl6.flowlabel = ip6_flowinfo(hdr);
 354                fl6.flowi6_mark = skb->mark;
 355                fl6.flowi6_proto = hdr->nexthdr;
 356
 357                dst = ip6_route_output(net, NULL, &fl6);
 358                if (dst->error) {
 359                        err = dst->error;
 360                        dst_release(dst);
 361                        goto drop;
 362                }
 363
 364                preempt_disable();
 365                dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
 366                preempt_enable();
 367        }
 368
 369        skb_dst_drop(skb);
 370        skb_dst_set(skb, dst);
 371
 372        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 373        if (unlikely(err))
 374                goto drop;
 375
 376        return dst_output(net, sk, skb);
 377drop:
 378        kfree_skb(skb);
 379        return err;
 380}
 381
 382static int seg6_build_state(struct nlattr *nla,
 383                            unsigned int family, const void *cfg,
 384                            struct lwtunnel_state **ts,
 385                            struct netlink_ext_ack *extack)
 386{
 387        struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 388        struct seg6_iptunnel_encap *tuninfo;
 389        struct lwtunnel_state *newts;
 390        int tuninfo_len, min_size;
 391        struct seg6_lwt *slwt;
 392        int err;
 393
 394        if (family != AF_INET && family != AF_INET6)
 395                return -EINVAL;
 396
 397        err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
 398                               seg6_iptunnel_policy, extack);
 399
 400        if (err < 0)
 401                return err;
 402
 403        if (!tb[SEG6_IPTUNNEL_SRH])
 404                return -EINVAL;
 405
 406        tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
 407        tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
 408
 409        /* tuninfo must contain at least the iptunnel encap structure,
 410         * the SRH and one segment
 411         */
 412        min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
 413                   sizeof(struct in6_addr);
 414        if (tuninfo_len < min_size)
 415                return -EINVAL;
 416
 417        switch (tuninfo->mode) {
 418        case SEG6_IPTUN_MODE_INLINE:
 419                if (family != AF_INET6)
 420                        return -EINVAL;
 421
 422                break;
 423        case SEG6_IPTUN_MODE_ENCAP:
 424                break;
 425        case SEG6_IPTUN_MODE_L2ENCAP:
 426                break;
 427        default:
 428                return -EINVAL;
 429        }
 430
 431        /* verify that SRH is consistent */
 432        if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
 433                return -EINVAL;
 434
 435        newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
 436        if (!newts)
 437                return -ENOMEM;
 438
 439        slwt = seg6_lwt_lwtunnel(newts);
 440
 441        err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
 442        if (err) {
 443                kfree(newts);
 444                return err;
 445        }
 446
 447        memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 448
 449        newts->type = LWTUNNEL_ENCAP_SEG6;
 450        newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
 451
 452        if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
 453                newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
 454
 455        newts->headroom = seg6_lwt_headroom(tuninfo);
 456
 457        *ts = newts;
 458
 459        return 0;
 460}
 461
 462static void seg6_destroy_state(struct lwtunnel_state *lwt)
 463{
 464        dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
 465}
 466
 467static int seg6_fill_encap_info(struct sk_buff *skb,
 468                                struct lwtunnel_state *lwtstate)
 469{
 470        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 471
 472        if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
 473                return -EMSGSIZE;
 474
 475        return 0;
 476}
 477
 478static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
 479{
 480        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 481
 482        return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
 483}
 484
 485static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 486{
 487        struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
 488        struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
 489        int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
 490
 491        if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
 492                return 1;
 493
 494        return memcmp(a_hdr, b_hdr, len);
 495}
 496
 497static const struct lwtunnel_encap_ops seg6_iptun_ops = {
 498        .build_state = seg6_build_state,
 499        .destroy_state = seg6_destroy_state,
 500        .output = seg6_output,
 501        .input = seg6_input,
 502        .fill_encap = seg6_fill_encap_info,
 503        .get_encap_size = seg6_encap_nlsize,
 504        .cmp_encap = seg6_encap_cmp,
 505        .owner = THIS_MODULE,
 506};
 507
 508int __init seg6_iptunnel_init(void)
 509{
 510        return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 511}
 512
 513void seg6_iptunnel_exit(void)
 514{
 515        lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 516}
 517