linux/net/ipv6/seg6_iptunnel.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  SR-IPv6 implementation
   4 *
   5 *  Author:
   6 *  David Lebrun <david.lebrun@uclouvain.be>
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/skbuff.h>
  11#include <linux/net.h>
  12#include <linux/module.h>
  13#include <net/ip.h>
  14#include <net/ip_tunnels.h>
  15#include <net/lwtunnel.h>
  16#include <net/netevent.h>
  17#include <net/netns/generic.h>
  18#include <net/ip6_fib.h>
  19#include <net/route.h>
  20#include <net/seg6.h>
  21#include <linux/seg6.h>
  22#include <linux/seg6_iptunnel.h>
  23#include <net/addrconf.h>
  24#include <net/ip6_route.h>
  25#include <net/dst_cache.h>
  26#ifdef CONFIG_IPV6_SEG6_HMAC
  27#include <net/seg6_hmac.h>
  28#endif
  29
  30static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
  31{
  32        int head = 0;
  33
  34        switch (tuninfo->mode) {
  35        case SEG6_IPTUN_MODE_INLINE:
  36                break;
  37        case SEG6_IPTUN_MODE_ENCAP:
  38                head = sizeof(struct ipv6hdr);
  39                break;
  40        case SEG6_IPTUN_MODE_L2ENCAP:
  41                return 0;
  42        }
  43
  44        return ((tuninfo->srh->hdrlen + 1) << 3) + head;
  45}
  46
  47struct seg6_lwt {
  48        struct dst_cache cache;
  49        struct seg6_iptunnel_encap tuninfo[];
  50};
  51
  52static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
  53{
  54        return (struct seg6_lwt *)lwt->data;
  55}
  56
  57static inline struct seg6_iptunnel_encap *
  58seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
  59{
  60        return seg6_lwt_lwtunnel(lwt)->tuninfo;
  61}
  62
  63static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
  64        [SEG6_IPTUNNEL_SRH]     = { .type = NLA_BINARY },
  65};
  66
  67static int nla_put_srh(struct sk_buff *skb, int attrtype,
  68                       struct seg6_iptunnel_encap *tuninfo)
  69{
  70        struct seg6_iptunnel_encap *data;
  71        struct nlattr *nla;
  72        int len;
  73
  74        len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
  75
  76        nla = nla_reserve(skb, attrtype, len);
  77        if (!nla)
  78                return -EMSGSIZE;
  79
  80        data = nla_data(nla);
  81        memcpy(data, tuninfo, len);
  82
  83        return 0;
  84}
  85
  86static void set_tun_src(struct net *net, struct net_device *dev,
  87                        struct in6_addr *daddr, struct in6_addr *saddr)
  88{
  89        struct seg6_pernet_data *sdata = seg6_pernet(net);
  90        struct in6_addr *tun_src;
  91
  92        rcu_read_lock();
  93
  94        tun_src = rcu_dereference(sdata->tun_src);
  95
  96        if (!ipv6_addr_any(tun_src)) {
  97                memcpy(saddr, tun_src, sizeof(struct in6_addr));
  98        } else {
  99                ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
 100                                   saddr);
 101        }
 102
 103        rcu_read_unlock();
 104}
 105
 106/* Compute flowlabel for outer IPv6 header */
 107static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
 108                                  struct ipv6hdr *inner_hdr)
 109{
 110        int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
 111        __be32 flowlabel = 0;
 112        u32 hash;
 113
 114        if (do_flowlabel > 0) {
 115                hash = skb_get_hash(skb);
 116                hash = rol32(hash, 16);
 117                flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 118        } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
 119                flowlabel = ip6_flowlabel(inner_hdr);
 120        }
 121        return flowlabel;
 122}
 123
 124/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
 125int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
 126{
 127        struct dst_entry *dst = skb_dst(skb);
 128        struct net *net = dev_net(dst->dev);
 129        struct ipv6hdr *hdr, *inner_hdr;
 130        struct ipv6_sr_hdr *isrh;
 131        int hdrlen, tot_len, err;
 132        __be32 flowlabel;
 133
 134        hdrlen = (osrh->hdrlen + 1) << 3;
 135        tot_len = hdrlen + sizeof(*hdr);
 136
 137        err = skb_cow_head(skb, tot_len + skb->mac_len);
 138        if (unlikely(err))
 139                return err;
 140
 141        inner_hdr = ipv6_hdr(skb);
 142        flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
 143
 144        skb_push(skb, tot_len);
 145        skb_reset_network_header(skb);
 146        skb_mac_header_rebuild(skb);
 147        hdr = ipv6_hdr(skb);
 148
 149        /* inherit tc, flowlabel and hlim
 150         * hlim will be decremented in ip6_forward() afterwards and
 151         * decapsulation will overwrite inner hlim with outer hlim
 152         */
 153
 154        if (skb->protocol == htons(ETH_P_IPV6)) {
 155                ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
 156                             flowlabel);
 157                hdr->hop_limit = inner_hdr->hop_limit;
 158        } else {
 159                ip6_flow_hdr(hdr, 0, flowlabel);
 160                hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
 161
 162                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 163        }
 164
 165        hdr->nexthdr = NEXTHDR_ROUTING;
 166
 167        isrh = (void *)hdr + sizeof(*hdr);
 168        memcpy(isrh, osrh, hdrlen);
 169
 170        isrh->nexthdr = proto;
 171
 172        hdr->daddr = isrh->segments[isrh->first_segment];
 173        set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
 174
 175#ifdef CONFIG_IPV6_SEG6_HMAC
 176        if (sr_has_hmac(isrh)) {
 177                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 178                if (unlikely(err))
 179                        return err;
 180        }
 181#endif
 182
 183        skb_postpush_rcsum(skb, hdr, tot_len);
 184
 185        return 0;
 186}
 187EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 188
 189/* insert an SRH within an IPv6 packet, just after the IPv6 header */
 190int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 191{
 192        struct ipv6hdr *hdr, *oldhdr;
 193        struct ipv6_sr_hdr *isrh;
 194        int hdrlen, err;
 195
 196        hdrlen = (osrh->hdrlen + 1) << 3;
 197
 198        err = skb_cow_head(skb, hdrlen + skb->mac_len);
 199        if (unlikely(err))
 200                return err;
 201
 202        oldhdr = ipv6_hdr(skb);
 203
 204        skb_pull(skb, sizeof(struct ipv6hdr));
 205        skb_postpull_rcsum(skb, skb_network_header(skb),
 206                           sizeof(struct ipv6hdr));
 207
 208        skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
 209        skb_reset_network_header(skb);
 210        skb_mac_header_rebuild(skb);
 211
 212        hdr = ipv6_hdr(skb);
 213
 214        memmove(hdr, oldhdr, sizeof(*hdr));
 215
 216        isrh = (void *)hdr + sizeof(*hdr);
 217        memcpy(isrh, osrh, hdrlen);
 218
 219        isrh->nexthdr = hdr->nexthdr;
 220        hdr->nexthdr = NEXTHDR_ROUTING;
 221
 222        isrh->segments[0] = hdr->daddr;
 223        hdr->daddr = isrh->segments[isrh->first_segment];
 224
 225#ifdef CONFIG_IPV6_SEG6_HMAC
 226        if (sr_has_hmac(isrh)) {
 227                struct net *net = dev_net(skb_dst(skb)->dev);
 228
 229                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 230                if (unlikely(err))
 231                        return err;
 232        }
 233#endif
 234
 235        skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
 236
 237        return 0;
 238}
 239EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 240
 241static int seg6_do_srh(struct sk_buff *skb)
 242{
 243        struct dst_entry *dst = skb_dst(skb);
 244        struct seg6_iptunnel_encap *tinfo;
 245        int proto, err = 0;
 246
 247        tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 248
 249        switch (tinfo->mode) {
 250        case SEG6_IPTUN_MODE_INLINE:
 251                if (skb->protocol != htons(ETH_P_IPV6))
 252                        return -EINVAL;
 253
 254                err = seg6_do_srh_inline(skb, tinfo->srh);
 255                if (err)
 256                        return err;
 257                break;
 258        case SEG6_IPTUN_MODE_ENCAP:
 259                err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
 260                if (err)
 261                        return err;
 262
 263                if (skb->protocol == htons(ETH_P_IPV6))
 264                        proto = IPPROTO_IPV6;
 265                else if (skb->protocol == htons(ETH_P_IP))
 266                        proto = IPPROTO_IPIP;
 267                else
 268                        return -EINVAL;
 269
 270                err = seg6_do_srh_encap(skb, tinfo->srh, proto);
 271                if (err)
 272                        return err;
 273
 274                skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 275                skb_set_inner_protocol(skb, skb->protocol);
 276                skb->protocol = htons(ETH_P_IPV6);
 277                break;
 278        case SEG6_IPTUN_MODE_L2ENCAP:
 279                if (!skb_mac_header_was_set(skb))
 280                        return -EINVAL;
 281
 282                if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
 283                        return -ENOMEM;
 284
 285                skb_mac_header_rebuild(skb);
 286                skb_push(skb, skb->mac_len);
 287
 288                err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
 289                if (err)
 290                        return err;
 291
 292                skb->protocol = htons(ETH_P_IPV6);
 293                break;
 294        }
 295
 296        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 297        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 298
 299        return 0;
 300}
 301
 302static int seg6_input(struct sk_buff *skb)
 303{
 304        struct dst_entry *orig_dst = skb_dst(skb);
 305        struct dst_entry *dst = NULL;
 306        struct seg6_lwt *slwt;
 307        int err;
 308
 309        err = seg6_do_srh(skb);
 310        if (unlikely(err)) {
 311                kfree_skb(skb);
 312                return err;
 313        }
 314
 315        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 316
 317        preempt_disable();
 318        dst = dst_cache_get(&slwt->cache);
 319        preempt_enable();
 320
 321        skb_dst_drop(skb);
 322
 323        if (!dst) {
 324                ip6_route_input(skb);
 325                dst = skb_dst(skb);
 326                if (!dst->error) {
 327                        preempt_disable();
 328                        dst_cache_set_ip6(&slwt->cache, dst,
 329                                          &ipv6_hdr(skb)->saddr);
 330                        preempt_enable();
 331                }
 332        } else {
 333                skb_dst_set(skb, dst);
 334        }
 335
 336        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 337        if (unlikely(err))
 338                return err;
 339
 340        return dst_input(skb);
 341}
 342
 343static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 344{
 345        struct dst_entry *orig_dst = skb_dst(skb);
 346        struct dst_entry *dst = NULL;
 347        struct seg6_lwt *slwt;
 348        int err = -EINVAL;
 349
 350        err = seg6_do_srh(skb);
 351        if (unlikely(err))
 352                goto drop;
 353
 354        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 355
 356        preempt_disable();
 357        dst = dst_cache_get(&slwt->cache);
 358        preempt_enable();
 359
 360        if (unlikely(!dst)) {
 361                struct ipv6hdr *hdr = ipv6_hdr(skb);
 362                struct flowi6 fl6;
 363
 364                memset(&fl6, 0, sizeof(fl6));
 365                fl6.daddr = hdr->daddr;
 366                fl6.saddr = hdr->saddr;
 367                fl6.flowlabel = ip6_flowinfo(hdr);
 368                fl6.flowi6_mark = skb->mark;
 369                fl6.flowi6_proto = hdr->nexthdr;
 370
 371                dst = ip6_route_output(net, NULL, &fl6);
 372                if (dst->error) {
 373                        err = dst->error;
 374                        dst_release(dst);
 375                        goto drop;
 376                }
 377
 378                preempt_disable();
 379                dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
 380                preempt_enable();
 381        }
 382
 383        skb_dst_drop(skb);
 384        skb_dst_set(skb, dst);
 385
 386        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 387        if (unlikely(err))
 388                goto drop;
 389
 390        return dst_output(net, sk, skb);
 391drop:
 392        kfree_skb(skb);
 393        return err;
 394}
 395
 396static int seg6_build_state(struct net *net, struct nlattr *nla,
 397                            unsigned int family, const void *cfg,
 398                            struct lwtunnel_state **ts,
 399                            struct netlink_ext_ack *extack)
 400{
 401        struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 402        struct seg6_iptunnel_encap *tuninfo;
 403        struct lwtunnel_state *newts;
 404        int tuninfo_len, min_size;
 405        struct seg6_lwt *slwt;
 406        int err;
 407
 408        if (family != AF_INET && family != AF_INET6)
 409                return -EINVAL;
 410
 411        err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
 412                                          seg6_iptunnel_policy, extack);
 413
 414        if (err < 0)
 415                return err;
 416
 417        if (!tb[SEG6_IPTUNNEL_SRH])
 418                return -EINVAL;
 419
 420        tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
 421        tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
 422
 423        /* tuninfo must contain at least the iptunnel encap structure,
 424         * the SRH and one segment
 425         */
 426        min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
 427                   sizeof(struct in6_addr);
 428        if (tuninfo_len < min_size)
 429                return -EINVAL;
 430
 431        switch (tuninfo->mode) {
 432        case SEG6_IPTUN_MODE_INLINE:
 433                if (family != AF_INET6)
 434                        return -EINVAL;
 435
 436                break;
 437        case SEG6_IPTUN_MODE_ENCAP:
 438                break;
 439        case SEG6_IPTUN_MODE_L2ENCAP:
 440                break;
 441        default:
 442                return -EINVAL;
 443        }
 444
 445        /* verify that SRH is consistent */
 446        if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
 447                return -EINVAL;
 448
 449        newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
 450        if (!newts)
 451                return -ENOMEM;
 452
 453        slwt = seg6_lwt_lwtunnel(newts);
 454
 455        err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
 456        if (err) {
 457                kfree(newts);
 458                return err;
 459        }
 460
 461        memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 462
 463        newts->type = LWTUNNEL_ENCAP_SEG6;
 464        newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
 465
 466        if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
 467                newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
 468
 469        newts->headroom = seg6_lwt_headroom(tuninfo);
 470
 471        *ts = newts;
 472
 473        return 0;
 474}
 475
 476static void seg6_destroy_state(struct lwtunnel_state *lwt)
 477{
 478        dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
 479}
 480
 481static int seg6_fill_encap_info(struct sk_buff *skb,
 482                                struct lwtunnel_state *lwtstate)
 483{
 484        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 485
 486        if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
 487                return -EMSGSIZE;
 488
 489        return 0;
 490}
 491
 492static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
 493{
 494        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 495
 496        return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
 497}
 498
 499static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 500{
 501        struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
 502        struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
 503        int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
 504
 505        if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
 506                return 1;
 507
 508        return memcmp(a_hdr, b_hdr, len);
 509}
 510
 511static const struct lwtunnel_encap_ops seg6_iptun_ops = {
 512        .build_state = seg6_build_state,
 513        .destroy_state = seg6_destroy_state,
 514        .output = seg6_output,
 515        .input = seg6_input,
 516        .fill_encap = seg6_fill_encap_info,
 517        .get_encap_size = seg6_encap_nlsize,
 518        .cmp_encap = seg6_encap_cmp,
 519        .owner = THIS_MODULE,
 520};
 521
 522int __init seg6_iptunnel_init(void)
 523{
 524        return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 525}
 526
 527void seg6_iptunnel_exit(void)
 528{
 529        lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 530}
 531