linux/net/ipv6/seg6_iptunnel.c
<<
>>
Prefs
   1/*
   2 *  SR-IPv6 implementation
   3 *
   4 *  Author:
   5 *  David Lebrun <david.lebrun@uclouvain.be>
   6 *
   7 *
   8 *  This program is free software; you can redistribute it and/or
   9 *        modify it under the terms of the GNU General Public License
  10 *        as published by the Free Software Foundation; either version
  11 *        2 of the License, or (at your option) any later version.
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/skbuff.h>
  16#include <linux/net.h>
  17#include <linux/module.h>
  18#include <net/ip.h>
  19#include <net/ip_tunnels.h>
  20#include <net/lwtunnel.h>
  21#include <net/netevent.h>
  22#include <net/netns/generic.h>
  23#include <net/ip6_fib.h>
  24#include <net/route.h>
  25#include <net/seg6.h>
  26#include <linux/seg6.h>
  27#include <linux/seg6_iptunnel.h>
  28#include <net/addrconf.h>
  29#include <net/ip6_route.h>
  30#include <net/dst_cache.h>
  31#ifdef CONFIG_IPV6_SEG6_HMAC
  32#include <net/seg6_hmac.h>
  33#endif
  34
  35struct seg6_lwt {
  36        struct dst_cache cache;
  37        struct seg6_iptunnel_encap tuninfo[0];
  38};
  39
  40static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
  41{
  42        return (struct seg6_lwt *)lwt->data;
  43}
  44
  45static inline struct seg6_iptunnel_encap *
  46seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
  47{
  48        return seg6_lwt_lwtunnel(lwt)->tuninfo;
  49}
  50
  51static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
  52        [SEG6_IPTUNNEL_SRH]     = { .type = NLA_BINARY },
  53};
  54
  55static int nla_put_srh(struct sk_buff *skb, int attrtype,
  56                       struct seg6_iptunnel_encap *tuninfo)
  57{
  58        struct seg6_iptunnel_encap *data;
  59        struct nlattr *nla;
  60        int len;
  61
  62        len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
  63
  64        nla = nla_reserve(skb, attrtype, len);
  65        if (!nla)
  66                return -EMSGSIZE;
  67
  68        data = nla_data(nla);
  69        memcpy(data, tuninfo, len);
  70
  71        return 0;
  72}
  73
  74static void set_tun_src(struct net *net, struct net_device *dev,
  75                        struct in6_addr *daddr, struct in6_addr *saddr)
  76{
  77        struct seg6_pernet_data *sdata = seg6_pernet(net);
  78        struct in6_addr *tun_src;
  79
  80        rcu_read_lock();
  81
  82        tun_src = rcu_dereference(sdata->tun_src);
  83
  84        if (!ipv6_addr_any(tun_src)) {
  85                memcpy(saddr, tun_src, sizeof(struct in6_addr));
  86        } else {
  87                ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
  88                                   saddr);
  89        }
  90
  91        rcu_read_unlock();
  92}
  93
  94/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
  95int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
  96{
  97        struct dst_entry *dst = skb_dst(skb);
  98        struct net *net = dev_net(dst->dev);
  99        struct ipv6hdr *hdr, *inner_hdr;
 100        struct ipv6_sr_hdr *isrh;
 101        int hdrlen, tot_len, err;
 102
 103        hdrlen = (osrh->hdrlen + 1) << 3;
 104        tot_len = hdrlen + sizeof(*hdr);
 105
 106        err = skb_cow_head(skb, tot_len + skb->mac_len);
 107        if (unlikely(err))
 108                return err;
 109
 110        inner_hdr = ipv6_hdr(skb);
 111
 112        skb_push(skb, tot_len);
 113        skb_reset_network_header(skb);
 114        skb_mac_header_rebuild(skb);
 115        hdr = ipv6_hdr(skb);
 116
 117        /* inherit tc, flowlabel and hlim
 118         * hlim will be decremented in ip6_forward() afterwards and
 119         * decapsulation will overwrite inner hlim with outer hlim
 120         */
 121
 122        if (skb->protocol == htons(ETH_P_IPV6)) {
 123                ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
 124                             ip6_flowlabel(inner_hdr));
 125                hdr->hop_limit = inner_hdr->hop_limit;
 126        } else {
 127                ip6_flow_hdr(hdr, 0, 0);
 128                hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
 129        }
 130
 131        hdr->nexthdr = NEXTHDR_ROUTING;
 132
 133        isrh = (void *)hdr + sizeof(*hdr);
 134        memcpy(isrh, osrh, hdrlen);
 135
 136        isrh->nexthdr = proto;
 137
 138        hdr->daddr = isrh->segments[isrh->first_segment];
 139        set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
 140
 141#ifdef CONFIG_IPV6_SEG6_HMAC
 142        if (sr_has_hmac(isrh)) {
 143                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 144                if (unlikely(err))
 145                        return err;
 146        }
 147#endif
 148
 149        skb_postpush_rcsum(skb, hdr, tot_len);
 150
 151        return 0;
 152}
 153EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
 154
 155/* insert an SRH within an IPv6 packet, just after the IPv6 header */
 156int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 157{
 158        struct ipv6hdr *hdr, *oldhdr;
 159        struct ipv6_sr_hdr *isrh;
 160        int hdrlen, err;
 161
 162        hdrlen = (osrh->hdrlen + 1) << 3;
 163
 164        err = skb_cow_head(skb, hdrlen + skb->mac_len);
 165        if (unlikely(err))
 166                return err;
 167
 168        oldhdr = ipv6_hdr(skb);
 169
 170        skb_pull(skb, sizeof(struct ipv6hdr));
 171        skb_postpull_rcsum(skb, skb_network_header(skb),
 172                           sizeof(struct ipv6hdr));
 173
 174        skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
 175        skb_reset_network_header(skb);
 176        skb_mac_header_rebuild(skb);
 177
 178        hdr = ipv6_hdr(skb);
 179
 180        memmove(hdr, oldhdr, sizeof(*hdr));
 181
 182        isrh = (void *)hdr + sizeof(*hdr);
 183        memcpy(isrh, osrh, hdrlen);
 184
 185        isrh->nexthdr = hdr->nexthdr;
 186        hdr->nexthdr = NEXTHDR_ROUTING;
 187
 188        isrh->segments[0] = hdr->daddr;
 189        hdr->daddr = isrh->segments[isrh->first_segment];
 190
 191#ifdef CONFIG_IPV6_SEG6_HMAC
 192        if (sr_has_hmac(isrh)) {
 193                struct net *net = dev_net(skb_dst(skb)->dev);
 194
 195                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 196                if (unlikely(err))
 197                        return err;
 198        }
 199#endif
 200
 201        skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
 202
 203        return 0;
 204}
 205EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
 206
 207static int seg6_do_srh(struct sk_buff *skb)
 208{
 209        struct dst_entry *dst = skb_dst(skb);
 210        struct seg6_iptunnel_encap *tinfo;
 211        int proto, err = 0;
 212
 213        tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 214
 215        switch (tinfo->mode) {
 216        case SEG6_IPTUN_MODE_INLINE:
 217                if (skb->protocol != htons(ETH_P_IPV6))
 218                        return -EINVAL;
 219
 220                err = seg6_do_srh_inline(skb, tinfo->srh);
 221                if (err)
 222                        return err;
 223                break;
 224        case SEG6_IPTUN_MODE_ENCAP:
 225                err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
 226                if (err)
 227                        return err;
 228
 229                if (skb->protocol == htons(ETH_P_IPV6))
 230                        proto = IPPROTO_IPV6;
 231                else if (skb->protocol == htons(ETH_P_IP))
 232                        proto = IPPROTO_IPIP;
 233                else
 234                        return -EINVAL;
 235
 236                err = seg6_do_srh_encap(skb, tinfo->srh, proto);
 237                if (err)
 238                        return err;
 239
 240                skb_set_inner_transport_header(skb, skb_transport_offset(skb));
 241                skb_set_inner_protocol(skb, skb->protocol);
 242                skb->protocol = htons(ETH_P_IPV6);
 243                break;
 244        case SEG6_IPTUN_MODE_L2ENCAP:
 245                if (!skb_mac_header_was_set(skb))
 246                        return -EINVAL;
 247
 248                if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
 249                        return -ENOMEM;
 250
 251                skb_mac_header_rebuild(skb);
 252                skb_push(skb, skb->mac_len);
 253
 254                err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
 255                if (err)
 256                        return err;
 257
 258                skb->protocol = htons(ETH_P_IPV6);
 259                break;
 260        }
 261
 262        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 263        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 264
 265        return 0;
 266}
 267
 268static int seg6_input(struct sk_buff *skb)
 269{
 270        struct dst_entry *orig_dst = skb_dst(skb);
 271        struct dst_entry *dst = NULL;
 272        struct seg6_lwt *slwt;
 273        int err;
 274
 275        err = seg6_do_srh(skb);
 276        if (unlikely(err)) {
 277                kfree_skb(skb);
 278                return err;
 279        }
 280
 281        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 282
 283        preempt_disable();
 284        dst = dst_cache_get(&slwt->cache);
 285        preempt_enable();
 286
 287        skb_dst_drop(skb);
 288
 289        if (!dst) {
 290                ip6_route_input(skb);
 291                dst = skb_dst(skb);
 292                if (!dst->error) {
 293                        preempt_disable();
 294                        dst_cache_set_ip6(&slwt->cache, dst,
 295                                          &ipv6_hdr(skb)->saddr);
 296                        preempt_enable();
 297                }
 298        } else {
 299                skb_dst_set(skb, dst);
 300        }
 301
 302        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 303        if (unlikely(err))
 304                return err;
 305
 306        return dst_input(skb);
 307}
 308
 309static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 310{
 311        struct dst_entry *orig_dst = skb_dst(skb);
 312        struct dst_entry *dst = NULL;
 313        struct seg6_lwt *slwt;
 314        int err = -EINVAL;
 315
 316        err = seg6_do_srh(skb);
 317        if (unlikely(err))
 318                goto drop;
 319
 320        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 321
 322        preempt_disable();
 323        dst = dst_cache_get(&slwt->cache);
 324        preempt_enable();
 325
 326        if (unlikely(!dst)) {
 327                struct ipv6hdr *hdr = ipv6_hdr(skb);
 328                struct flowi6 fl6;
 329
 330                fl6.daddr = hdr->daddr;
 331                fl6.saddr = hdr->saddr;
 332                fl6.flowlabel = ip6_flowinfo(hdr);
 333                fl6.flowi6_mark = skb->mark;
 334                fl6.flowi6_proto = hdr->nexthdr;
 335
 336                dst = ip6_route_output(net, NULL, &fl6);
 337                if (dst->error) {
 338                        err = dst->error;
 339                        dst_release(dst);
 340                        goto drop;
 341                }
 342
 343                preempt_disable();
 344                dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
 345                preempt_enable();
 346        }
 347
 348        skb_dst_drop(skb);
 349        skb_dst_set(skb, dst);
 350
 351        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 352        if (unlikely(err))
 353                goto drop;
 354
 355        return dst_output(net, sk, skb);
 356drop:
 357        kfree_skb(skb);
 358        return err;
 359}
 360
 361static int seg6_build_state(struct nlattr *nla,
 362                            unsigned int family, const void *cfg,
 363                            struct lwtunnel_state **ts,
 364                            struct netlink_ext_ack *extack)
 365{
 366        struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 367        struct seg6_iptunnel_encap *tuninfo;
 368        struct lwtunnel_state *newts;
 369        int tuninfo_len, min_size;
 370        struct seg6_lwt *slwt;
 371        int err;
 372
 373        if (family != AF_INET && family != AF_INET6)
 374                return -EINVAL;
 375
 376        err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
 377                               seg6_iptunnel_policy, extack);
 378
 379        if (err < 0)
 380                return err;
 381
 382        if (!tb[SEG6_IPTUNNEL_SRH])
 383                return -EINVAL;
 384
 385        tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
 386        tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
 387
 388        /* tuninfo must contain at least the iptunnel encap structure,
 389         * the SRH and one segment
 390         */
 391        min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
 392                   sizeof(struct in6_addr);
 393        if (tuninfo_len < min_size)
 394                return -EINVAL;
 395
 396        switch (tuninfo->mode) {
 397        case SEG6_IPTUN_MODE_INLINE:
 398                if (family != AF_INET6)
 399                        return -EINVAL;
 400
 401                break;
 402        case SEG6_IPTUN_MODE_ENCAP:
 403                break;
 404        case SEG6_IPTUN_MODE_L2ENCAP:
 405                break;
 406        default:
 407                return -EINVAL;
 408        }
 409
 410        /* verify that SRH is consistent */
 411        if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
 412                return -EINVAL;
 413
 414        newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
 415        if (!newts)
 416                return -ENOMEM;
 417
 418        slwt = seg6_lwt_lwtunnel(newts);
 419
 420        err = dst_cache_init(&slwt->cache, GFP_ATOMIC);
 421        if (err) {
 422                kfree(newts);
 423                return err;
 424        }
 425
 426        memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 427
 428        newts->type = LWTUNNEL_ENCAP_SEG6;
 429        newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
 430
 431        if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
 432                newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
 433
 434        newts->headroom = seg6_lwt_headroom(tuninfo);
 435
 436        *ts = newts;
 437
 438        return 0;
 439}
 440
 441static void seg6_destroy_state(struct lwtunnel_state *lwt)
 442{
 443        dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
 444}
 445
 446static int seg6_fill_encap_info(struct sk_buff *skb,
 447                                struct lwtunnel_state *lwtstate)
 448{
 449        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 450
 451        if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
 452                return -EMSGSIZE;
 453
 454        return 0;
 455}
 456
 457static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
 458{
 459        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 460
 461        return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
 462}
 463
 464static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 465{
 466        struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
 467        struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
 468        int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
 469
 470        if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
 471                return 1;
 472
 473        return memcmp(a_hdr, b_hdr, len);
 474}
 475
 476static const struct lwtunnel_encap_ops seg6_iptun_ops = {
 477        .build_state = seg6_build_state,
 478        .destroy_state = seg6_destroy_state,
 479        .output = seg6_output,
 480        .input = seg6_input,
 481        .fill_encap = seg6_fill_encap_info,
 482        .get_encap_size = seg6_encap_nlsize,
 483        .cmp_encap = seg6_encap_cmp,
 484        .owner = THIS_MODULE,
 485};
 486
 487int __init seg6_iptunnel_init(void)
 488{
 489        return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 490}
 491
 492void seg6_iptunnel_exit(void)
 493{
 494        lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 495}
 496