linux/net/ipv6/seg6_iptunnel.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  SR-IPv6 implementation
   4 *
   5 *  Author:
   6 *  David Lebrun <david.lebrun@uclouvain.be>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/skbuff.h>
  11#include <linux/net.h>
  12#include <linux/module.h>
  13#include <net/ip.h>
  14#include <net/ip_tunnels.h>
  15#include <net/lwtunnel.h>
  16#include <net/netevent.h>
  17#include <net/netns/generic.h>
  18#include <net/ip6_fib.h>
  19#include <net/route.h>
  20#include <net/seg6.h>
  21#include <linux/seg6.h>
  22#include <linux/seg6_iptunnel.h>
  23#include <net/addrconf.h>
  24#include <net/ip6_route.h>
  25#include <net/dst_cache.h>
  26#ifdef CONFIG_IPV6_SEG6_HMAC
  27#include <net/seg6_hmac.h>
  28#endif
  29#include <linux/netfilter.h>
  30
  31static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
  32{
  33        int head = 0;
  34
  35        switch (tuninfo->mode) {
  36        case SEG6_IPTUN_MODE_INLINE:
  37                break;
  38        case SEG6_IPTUN_MODE_ENCAP:
  39                head = sizeof(struct ipv6hdr);
  40                break;
  41        case SEG6_IPTUN_MODE_L2ENCAP:
  42                return 0;
  43        }
  44
  45        return ((tuninfo->srh->hdrlen + 1) << 3) + head;
  46}
  47
  48struct seg6_lwt {
  49        struct dst_cache cache;
  50        struct seg6_iptunnel_encap tuninfo[];
  51};
  52
  53static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
  54{
  55        return (struct seg6_lwt *)lwt->data;
  56}
  57
  58static inline struct seg6_iptunnel_encap *
  59seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
  60{
  61        return seg6_lwt_lwtunnel(lwt)->tuninfo;
  62}
  63
  64static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
  65        [SEG6_IPTUNNEL_SRH]     = { .type = NLA_BINARY },
  66};
  67
  68static int nla_put_srh(struct sk_buff *skb, int attrtype,
  69                       struct seg6_iptunnel_encap *tuninfo)
  70{
  71        struct seg6_iptunnel_encap *data;
  72        struct nlattr *nla;
  73        int len;
  74
  75        len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
  76
  77        nla = nla_reserve(skb, attrtype, len);
  78        if (!nla)
  79                return -EMSGSIZE;
  80
  81        data = nla_data(nla);
  82        memcpy(data, tuninfo, len);
  83
  84        return 0;
  85}
  86
  87static void set_tun_src(struct net *net, struct net_device *dev,
  88                        struct in6_addr *daddr, struct in6_addr *saddr)
  89{
  90        struct seg6_pernet_data *sdata = seg6_pernet(net);
  91        struct in6_addr *tun_src;
  92
  93        rcu_read_lock();
  94
  95        tun_src = rcu_dereference(sdata->tun_src);
  96
  97        if (!ipv6_addr_any(tun_src)) {
  98                memcpy(saddr, tun_src, sizeof(struct in6_addr));
  99        } else {
 100                ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
 101                                   saddr);
 102        }
 103
 104        rcu_read_unlock();
 105}
 106
 107/* Compute flowlabel for outer IPv6 header */
 108static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
 109                                  struct ipv6hdr *inner_hdr)
 110{
 111        int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
 112        __be32 flowlabel = 0;
 113        u32 hash;
 114
 115        if (do_flowlabel > 0) {
 116                hash = skb_get_hash(skb);
 117                hash = rol32(hash, 16);
 118                flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 119        } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
 120                flowlabel = ip6_flowlabel(inner_hdr);
 121        }
 122        return flowlabel;
 123}
 124
 125/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 126int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 127{
 128        struct dst_entry *dst = skb_dst(skb);
 129        struct net *net = dev_net(dst->dev);
 130        struct ipv6hdr *hdr, *inner_hdr;
 131        struct ipv6_sr_hdr *isrh;
 132        int hdrlen, tot_len, err;
 133        __be32 flowlabel;
 134
 135        hdrlen = (osrh->hdrlen + 1) << 3;
 136        tot_len = hdrlen + sizeof(*hdr);
 137
 138        err = skb_cow_head(skb, tot_len + skb->mac_len);
 139        if (unlikely(err))
 140                return err;
 141
 142        inner_hdr = ipv6_hdr(skb);
 143        flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
 144
 145        skb_push(skb, tot_len);
 146        skb_reset_network_header(skb);
 147        skb_mac_header_rebuild(skb);
 148        hdr = ipv6_hdr(skb);
 149
 150        /* inherit tc, flowlabel and hlim
 151         * hlim will be decremented in ip6_forward() afterwards and
 152         * decapsulation will overwrite inner hlim with outer hlim
 153         */
 154
 155        if (skb->protocol == htons(ETH_P_IPV6)) {
 156                ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
 157                             flowlabel);
 158                hdr->hop_limit = inner_hdr->hop_limit;
 159        } else {
 160                ip6_flow_hdr(hdr, 0, flowlabel);
 161                hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
 162
 163                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 164        }
 165
 166        hdr->nexthdr = NEXTHDR_ROUTING;
 167
 168        isrh = (void *)hdr + sizeof(*hdr);
 169        memcpy(isrh, osrh, hdrlen);
 170
 171        isrh->nexthdr = proto;
 172
 173        hdr->daddr = isrh->segments[isrh->first_segment];
 174        set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
 175
 176#ifdef CONFIG_IPV6_SEG6_HMAC
 177        if (sr_has_hmac(isrh)) {
 178                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 179                if (unlikely(err))
 180                        return err;
 181        }
 182#endif
 183
 184        skb_postpush_rcsum(skb, hdr, tot_len);
 185
 186        return 0;
 187}
 188EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 189
 190/* insert an SRH within an IPv6 packet, just after the IPv6 header */
 191int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 192{
 193        struct ipv6hdr *hdr, *oldhdr;
 194        struct ipv6_sr_hdr *isrh;
 195        int hdrlen, err;
 196
 197        hdrlen = (osrh->hdrlen + 1) << 3;
 198
 199        err = skb_cow_head(skb, hdrlen + skb->mac_len);
 200        if (unlikely(err))
 201                return err;
 202
 203        oldhdr = ipv6_hdr(skb);
 204
 205        skb_pull(skb, sizeof(struct ipv6hdr));
 206        skb_postpull_rcsum(skb, skb_network_header(skb),
 207                           sizeof(struct ipv6hdr));
 208
 209        skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
 210        skb_reset_network_header(skb);
 211        skb_mac_header_rebuild(skb);
 212
 213        hdr = ipv6_hdr(skb);
 214
 215        memmove(hdr, oldhdr, sizeof(*hdr));
 216
 217        isrh = (void *)hdr + sizeof(*hdr);
 218        memcpy(isrh, osrh, hdrlen);
 219
 220        isrh->nexthdr = hdr->nexthdr;
 221        hdr->nexthdr = NEXTHDR_ROUTING;
 222
 223        isrh->segments[0] = hdr->daddr;
 224        hdr->daddr = isrh->segments[isrh->first_segment];
 225
 226#ifdef CONFIG_IPV6_SEG6_HMAC
 227        if (sr_has_hmac(isrh)) {
 228                struct net *net = dev_net(skb_dst(skb)->dev);
 229
 230                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 231                if (unlikely(err))
 232                        return err;
 233        }
 234#endif
 235
 236        skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
 237
 238        return 0;
 239}
 240EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 241
 242static int seg6_do_srh(struct sk_buff *skb)
 243{
 244        struct dst_entry *dst = skb_dst(skb);
 245        struct seg6_iptunnel_encap *tinfo;
 246        int proto, err = 0;
 247
 248        tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 249
 250        switch (tinfo->mode) {
 251        case SEG6_IPTUN_MODE_INLINE:
 252                if (skb->protocol != htons(ETH_P_IPV6))
 253                        return -EINVAL;
 254
 255                err = seg6_do_srh_inline(skb, tinfo->srh);
 256                if (err)
 257                        return err;
 258                break;
 259        case SEG6_IPTUN_MODE_ENCAP:
 260                err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
 261                if (err)
 262                        return err;
 263
 264                if (skb->protocol == htons(ETH_P_IPV6))
 265                        proto = IPPROTO_IPV6;
 266                else if (skb->protocol == htons(ETH_P_IP))
 267                        proto = IPPROTO_IPIP;
 268                else
 269                        return -EINVAL;
 270
 271                err = seg6_do_srh_encap(skb, tinfo->srh, proto);
 272                if (err)
 273                        return err;
 274
 275                skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 276                skb_set_inner_protocol(skb, skb->protocol);
 277                skb->protocol = htons(ETH_P_IPV6);
 278                break;
 279        case SEG6_IPTUN_MODE_L2ENCAP:
 280                if (!skb_mac_header_was_set(skb))
 281                        return -EINVAL;
 282
 283                if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
 284                        return -ENOMEM;
 285
 286                skb_mac_header_rebuild(skb);
 287                skb_push(skb, skb->mac_len);
 288
 289                err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
 290                if (err)
 291                        return err;
 292
 293                skb->protocol = htons(ETH_P_IPV6);
 294                break;
 295        }
 296
 297        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 298        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 299        nf_reset_ct(skb);
 300
 301        return 0;
 302}
 303
 304static int seg6_input_finish(struct net *net, struct sock *sk,
 305                             struct sk_buff *skb)
 306{
 307        return dst_input(skb);
 308}
 309
 310static int seg6_input_core(struct net *net, struct sock *sk,
 311                           struct sk_buff *skb)
 312{
 313        struct dst_entry *orig_dst = skb_dst(skb);
 314        struct dst_entry *dst = NULL;
 315        struct seg6_lwt *slwt;
 316        int err;
 317
 318        err = seg6_do_srh(skb);
 319        if (unlikely(err)) {
 320                kfree_skb(skb);
 321                return err;
 322        }
 323
 324        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 325
 326        preempt_disable();
 327        dst = dst_cache_get(&slwt->cache);
 328        preempt_enable();
 329
 330        skb_dst_drop(skb);
 331
 332        if (!dst) {
 333                ip6_route_input(skb);
 334                dst = skb_dst(skb);
 335                if (!dst->error) {
 336                        preempt_disable();
 337                        dst_cache_set_ip6(&slwt->cache, dst,
 338                                          &ipv6_hdr(skb)->saddr);
 339                        preempt_enable();
 340                }
 341        } else {
 342                skb_dst_set(skb, dst);
 343        }
 344
 345        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 346        if (unlikely(err))
 347                return err;
 348
 349        if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
 350                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 351                               dev_net(skb->dev), NULL, skb, NULL,
 352                               skb_dst(skb)->dev, seg6_input_finish);
 353
 354        return seg6_input_finish(dev_net(skb->dev), NULL, skb);
 355}
 356
 357static int seg6_input_nf(struct sk_buff *skb)
 358{
 359        struct net_device *dev = skb_dst(skb)->dev;
 360        struct net *net = dev_net(skb->dev);
 361
 362        switch (skb->protocol) {
 363        case htons(ETH_P_IP):
 364                return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
 365                               skb, NULL, dev, seg6_input_core);
 366        case htons(ETH_P_IPV6):
 367                return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
 368                               skb, NULL, dev, seg6_input_core);
 369        }
 370
 371        return -EINVAL;
 372}
 373
 374static int seg6_input(struct sk_buff *skb)
 375{
 376        if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
 377                return seg6_input_nf(skb);
 378
 379        return seg6_input_core(dev_net(skb->dev), NULL, skb);
 380}
 381
 382static int seg6_output_core(struct net *net, struct sock *sk,
 383                            struct sk_buff *skb)
 384{
 385        struct dst_entry *orig_dst = skb_dst(skb);
 386        struct dst_entry *dst = NULL;
 387        struct seg6_lwt *slwt;
 388        int err;
 389
 390        err = seg6_do_srh(skb);
 391        if (unlikely(err))
 392                goto drop;
 393
 394        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 395
 396        preempt_disable();
 397        dst = dst_cache_get(&slwt->cache);
 398        preempt_enable();
 399
 400        if (unlikely(!dst)) {
 401                struct ipv6hdr *hdr = ipv6_hdr(skb);
 402                struct flowi6 fl6;
 403
 404                memset(&fl6, 0, sizeof(fl6));
 405                fl6.daddr = hdr->daddr;
 406                fl6.saddr = hdr->saddr;
 407                fl6.flowlabel = ip6_flowinfo(hdr);
 408                fl6.flowi6_mark = skb->mark;
 409                fl6.flowi6_proto = hdr->nexthdr;
 410
 411                dst = ip6_route_output(net, NULL, &fl6);
 412                if (dst->error) {
 413                        err = dst->error;
 414                        dst_release(dst);
 415                        goto drop;
 416                }
 417
 418                preempt_disable();
 419                dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
 420                preempt_enable();
 421        }
 422
 423        skb_dst_drop(skb);
 424        skb_dst_set(skb, dst);
 425
 426        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 427        if (unlikely(err))
 428                goto drop;
 429
 430        if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
 431                return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
 432                               NULL, skb_dst(skb)->dev, dst_output);
 433
 434        return dst_output(net, sk, skb);
 435drop:
 436        kfree_skb(skb);
 437        return err;
 438}
 439
 440static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
 441{
 442        struct net_device *dev = skb_dst(skb)->dev;
 443
 444        switch (skb->protocol) {
 445        case htons(ETH_P_IP):
 446                return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
 447                               NULL, dev, seg6_output_core);
 448        case htons(ETH_P_IPV6):
 449                return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
 450                               NULL, dev, seg6_output_core);
 451        }
 452
 453        return -EINVAL;
 454}
 455
 456static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 457{
 458        if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
 459                return seg6_output_nf(net, sk, skb);
 460
 461        return seg6_output_core(net, sk, skb);
 462}
 463
 464static int seg6_build_state(struct net *net, struct nlattr *nla,
 465                            unsigned int family, const void *cfg,
 466                            struct lwtunnel_state **ts,
 467                            struct netlink_ext_ack *extack)
 468{
 469        struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 470        struct seg6_iptunnel_encap *tuninfo;
 471        struct lwtunnel_state *newts;
 472        int tuninfo_len, min_size;
 473        struct seg6_lwt *slwt;
 474        int err;
 475
 476        if (family != AF_INET && family != AF_INET6)
 477                return -EINVAL;
 478
 479        err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
 480                                          seg6_iptunnel_policy, extack);
 481
 482        if (err < 0)
 483                return err;
 484
 485        if (!tb[SEG6_IPTUNNEL_SRH])
 486                return -EINVAL;
 487
 488        tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
 489        tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
 490
 491        /* tuninfo must contain at least the iptunnel encap structure,
 492         * the SRH and one segment
 493         */
 494        min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
 495                   sizeof(struct in6_addr);
 496        if (tuninfo_len < min_size)
 497                return -EINVAL;
 498
 499        switch (tuninfo->mode) {
 500        case SEG6_IPTUN_MODE_INLINE:
 501                if (family != AF_INET6)
 502                        return -EINVAL;
 503
 504                break;
 505        case SEG6_IPTUN_MODE_ENCAP:
 506                break;
 507        case SEG6_IPTUN_MODE_L2ENCAP:
 508                break;
 509        default:
 510                return -EINVAL;
 511        }
 512
 513        /* verify that SRH is consistent */
 514        if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
 515                return -EINVAL;
 516
 517        newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
 518        if (!newts)
 519                return -ENOMEM;
 520
 521        slwt = seg6_lwt_lwtunnel(newts);
 522
 523        err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
 524        if (err) {
 525                kfree(newts);
 526                return err;
 527        }
 528
 529        memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 530
 531        newts->type = LWTUNNEL_ENCAP_SEG6;
 532        newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
 533
 534        if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
 535                newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
 536
 537        newts->headroom = seg6_lwt_headroom(tuninfo);
 538
 539        *ts = newts;
 540
 541        return 0;
 542}
 543
 544static void seg6_destroy_state(struct lwtunnel_state *lwt)
 545{
 546        dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
 547}
 548
 549static int seg6_fill_encap_info(struct sk_buff *skb,
 550                                struct lwtunnel_state *lwtstate)
 551{
 552        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 553
 554        if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
 555                return -EMSGSIZE;
 556
 557        return 0;
 558}
 559
 560static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
 561{
 562        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 563
 564        return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
 565}
 566
 567static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 568{
 569        struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
 570        struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
 571        int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
 572
 573        if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
 574                return 1;
 575
 576        return memcmp(a_hdr, b_hdr, len);
 577}
 578
 579static const struct lwtunnel_encap_ops seg6_iptun_ops = {
 580        .build_state = seg6_build_state,
 581        .destroy_state = seg6_destroy_state,
 582        .output = seg6_output,
 583        .input = seg6_input,
 584        .fill_encap = seg6_fill_encap_info,
 585        .get_encap_size = seg6_encap_nlsize,
 586        .cmp_encap = seg6_encap_cmp,
 587        .owner = THIS_MODULE,
 588};
 589
 590int __init seg6_iptunnel_init(void)
 591{
 592        return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 593}
 594
 595void seg6_iptunnel_exit(void)
 596{
 597        lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 598}
 599