linux/net/ipv6/seg6_iptunnel.c
<<
>>
Prefs
   1/*
   2 *  SR-IPv6 implementation
   3 *
   4 *  Author:
   5 *  David Lebrun <david.lebrun@uclouvain.be>
   6 *
   7 *
   8 *  This program is free software; you can redistribute it and/or
   9 *        modify it under the terms of the GNU General Public License
  10 *        as published by the Free Software Foundation; either version
  11 *        2 of the License, or (at your option) any later version.
  12 */
  13
  14#include <linux/types.h>
  15#include <linux/skbuff.h>
  16#include <linux/net.h>
  17#include <linux/module.h>
  18#include <net/ip.h>
  19#include <net/lwtunnel.h>
  20#include <net/netevent.h>
  21#include <net/netns/generic.h>
  22#include <net/ip6_fib.h>
  23#include <net/route.h>
  24#include <net/seg6.h>
  25#include <linux/seg6.h>
  26#include <linux/seg6_iptunnel.h>
  27#include <net/addrconf.h>
  28#include <net/ip6_route.h>
  29#include <net/dst_cache.h>
  30#ifdef CONFIG_IPV6_SEG6_HMAC
  31#include <net/seg6_hmac.h>
  32#endif
  33
  34struct seg6_lwt {
  35        struct dst_cache cache;
  36        struct seg6_iptunnel_encap tuninfo[0];
  37};
  38
  39static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
  40{
  41        return (struct seg6_lwt *)lwt->data;
  42}
  43
  44static inline struct seg6_iptunnel_encap *
  45seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
  46{
  47        return seg6_lwt_lwtunnel(lwt)->tuninfo;
  48}
  49
  50static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
  51        [SEG6_IPTUNNEL_SRH]     = { .type = NLA_BINARY },
  52};
  53
  54static int nla_put_srh(struct sk_buff *skb, int attrtype,
  55                       struct seg6_iptunnel_encap *tuninfo)
  56{
  57        struct seg6_iptunnel_encap *data;
  58        struct nlattr *nla;
  59        int len;
  60
  61        len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
  62
  63        nla = nla_reserve(skb, attrtype, len);
  64        if (!nla)
  65                return -EMSGSIZE;
  66
  67        data = nla_data(nla);
  68        memcpy(data, tuninfo, len);
  69
  70        return 0;
  71}
  72
  73static void set_tun_src(struct net *net, struct net_device *dev,
  74                        struct in6_addr *daddr, struct in6_addr *saddr)
  75{
  76        struct seg6_pernet_data *sdata = seg6_pernet(net);
  77        struct in6_addr *tun_src;
  78
  79        rcu_read_lock();
  80
  81        tun_src = rcu_dereference(sdata->tun_src);
  82
  83        if (!ipv6_addr_any(tun_src)) {
  84                memcpy(saddr, tun_src, sizeof(struct in6_addr));
  85        } else {
  86                ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
  87                                   saddr);
  88        }
  89
  90        rcu_read_unlock();
  91}
  92
  93/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
  94static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
  95{
  96        struct net *net = dev_net(skb_dst(skb)->dev);
  97        struct ipv6hdr *hdr, *inner_hdr;
  98        struct ipv6_sr_hdr *isrh;
  99        int hdrlen, tot_len, err;
 100
 101        hdrlen = (osrh->hdrlen + 1) << 3;
 102        tot_len = hdrlen + sizeof(*hdr);
 103
 104        err = skb_cow_head(skb, tot_len);
 105        if (unlikely(err))
 106                return err;
 107
 108        inner_hdr = ipv6_hdr(skb);
 109
 110        skb_push(skb, tot_len);
 111        skb_reset_network_header(skb);
 112        skb_mac_header_rebuild(skb);
 113        hdr = ipv6_hdr(skb);
 114
 115        /* inherit tc, flowlabel and hlim
 116         * hlim will be decremented in ip6_forward() afterwards and
 117         * decapsulation will overwrite inner hlim with outer hlim
 118         */
 119        ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
 120                     ip6_flowlabel(inner_hdr));
 121        hdr->hop_limit = inner_hdr->hop_limit;
 122        hdr->nexthdr = NEXTHDR_ROUTING;
 123
 124        isrh = (void *)hdr + sizeof(*hdr);
 125        memcpy(isrh, osrh, hdrlen);
 126
 127        isrh->nexthdr = NEXTHDR_IPV6;
 128
 129        hdr->daddr = isrh->segments[isrh->first_segment];
 130        set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
 131
 132#ifdef CONFIG_IPV6_SEG6_HMAC
 133        if (sr_has_hmac(isrh)) {
 134                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 135                if (unlikely(err))
 136                        return err;
 137        }
 138#endif
 139
 140        skb_postpush_rcsum(skb, hdr, tot_len);
 141
 142        return 0;
 143}
 144
 145/* insert an SRH within an IPv6 packet, just after the IPv6 header */
 146#ifdef CONFIG_IPV6_SEG6_INLINE
 147static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
 148{
 149        struct ipv6hdr *hdr, *oldhdr;
 150        struct ipv6_sr_hdr *isrh;
 151        int hdrlen, err;
 152
 153        hdrlen = (osrh->hdrlen + 1) << 3;
 154
 155        err = skb_cow_head(skb, hdrlen);
 156        if (unlikely(err))
 157                return err;
 158
 159        oldhdr = ipv6_hdr(skb);
 160
 161        skb_pull(skb, sizeof(struct ipv6hdr));
 162        skb_postpull_rcsum(skb, skb_network_header(skb),
 163                           sizeof(struct ipv6hdr));
 164
 165        skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
 166        skb_reset_network_header(skb);
 167        skb_mac_header_rebuild(skb);
 168
 169        hdr = ipv6_hdr(skb);
 170
 171        memmove(hdr, oldhdr, sizeof(*hdr));
 172
 173        isrh = (void *)hdr + sizeof(*hdr);
 174        memcpy(isrh, osrh, hdrlen);
 175
 176        isrh->nexthdr = hdr->nexthdr;
 177        hdr->nexthdr = NEXTHDR_ROUTING;
 178
 179        isrh->segments[0] = hdr->daddr;
 180        hdr->daddr = isrh->segments[isrh->first_segment];
 181
 182#ifdef CONFIG_IPV6_SEG6_HMAC
 183        if (sr_has_hmac(isrh)) {
 184                struct net *net = dev_net(skb_dst(skb)->dev);
 185
 186                err = seg6_push_hmac(net, &hdr->saddr, isrh);
 187                if (unlikely(err))
 188                        return err;
 189        }
 190#endif
 191
 192        skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
 193
 194        return 0;
 195}
 196#endif
 197
 198static int seg6_do_srh(struct sk_buff *skb)
 199{
 200        struct dst_entry *dst = skb_dst(skb);
 201        struct seg6_iptunnel_encap *tinfo;
 202        int err = 0;
 203
 204        tinfo = seg6_encap_lwtunnel(dst->lwtstate);
 205
 206        if (likely(!skb->encapsulation)) {
 207                skb_reset_inner_headers(skb);
 208                skb->encapsulation = 1;
 209        }
 210
 211        switch (tinfo->mode) {
 212#ifdef CONFIG_IPV6_SEG6_INLINE
 213        case SEG6_IPTUN_MODE_INLINE:
 214                err = seg6_do_srh_inline(skb, tinfo->srh);
 215                skb_reset_inner_headers(skb);
 216                break;
 217#endif
 218        case SEG6_IPTUN_MODE_ENCAP:
 219                err = seg6_do_srh_encap(skb, tinfo->srh);
 220                break;
 221        }
 222
 223        if (err)
 224                return err;
 225
 226        ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 227        skb_set_transport_header(skb, sizeof(struct ipv6hdr));
 228
 229        skb_set_inner_protocol(skb, skb->protocol);
 230
 231        return 0;
 232}
 233
 234static int seg6_input(struct sk_buff *skb)
 235{
 236        struct dst_entry *orig_dst = skb_dst(skb);
 237        struct dst_entry *dst = NULL;
 238        struct seg6_lwt *slwt;
 239        int err;
 240
 241        err = seg6_do_srh(skb);
 242        if (unlikely(err)) {
 243                kfree_skb(skb);
 244                return err;
 245        }
 246
 247        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 248
 249        preempt_disable();
 250        dst = dst_cache_get(&slwt->cache);
 251        preempt_enable();
 252
 253        skb_dst_drop(skb);
 254
 255        if (!dst) {
 256                ip6_route_input(skb);
 257                dst = skb_dst(skb);
 258                if (!dst->error) {
 259                        preempt_disable();
 260                        dst_cache_set_ip6(&slwt->cache, dst,
 261                                          &ipv6_hdr(skb)->saddr);
 262                        preempt_enable();
 263                }
 264        } else {
 265                skb_dst_set(skb, dst);
 266        }
 267
 268        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 269        if (unlikely(err))
 270                return err;
 271
 272        return dst_input(skb);
 273}
 274
 275static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 276{
 277        struct dst_entry *orig_dst = skb_dst(skb);
 278        struct dst_entry *dst = NULL;
 279        struct seg6_lwt *slwt;
 280        int err = -EINVAL;
 281
 282        err = seg6_do_srh(skb);
 283        if (unlikely(err))
 284                goto drop;
 285
 286        slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 287
 288        preempt_disable();
 289        dst = dst_cache_get(&slwt->cache);
 290        preempt_enable();
 291
 292        if (unlikely(!dst)) {
 293                struct ipv6hdr *hdr = ipv6_hdr(skb);
 294                struct flowi6 fl6;
 295
 296                fl6.daddr = hdr->daddr;
 297                fl6.saddr = hdr->saddr;
 298                fl6.flowlabel = ip6_flowinfo(hdr);
 299                fl6.flowi6_mark = skb->mark;
 300                fl6.flowi6_proto = hdr->nexthdr;
 301
 302                dst = ip6_route_output(net, NULL, &fl6);
 303                if (dst->error) {
 304                        err = dst->error;
 305                        dst_release(dst);
 306                        goto drop;
 307                }
 308
 309                preempt_disable();
 310                dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
 311                preempt_enable();
 312        }
 313
 314        skb_dst_drop(skb);
 315        skb_dst_set(skb, dst);
 316
 317        err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 318        if (unlikely(err))
 319                goto drop;
 320
 321        return dst_output(net, sk, skb);
 322drop:
 323        kfree_skb(skb);
 324        return err;
 325}
 326
 327static int seg6_build_state(struct nlattr *nla,
 328                            unsigned int family, const void *cfg,
 329                            struct lwtunnel_state **ts,
 330                            struct netlink_ext_ack *extack)
 331{
 332        struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
 333        struct seg6_iptunnel_encap *tuninfo;
 334        struct lwtunnel_state *newts;
 335        int tuninfo_len, min_size;
 336        struct seg6_lwt *slwt;
 337        int err;
 338
 339        err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
 340                               seg6_iptunnel_policy, extack);
 341
 342        if (err < 0)
 343                return err;
 344
 345        if (!tb[SEG6_IPTUNNEL_SRH])
 346                return -EINVAL;
 347
 348        tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
 349        tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
 350
 351        /* tuninfo must contain at least the iptunnel encap structure,
 352         * the SRH and one segment
 353         */
 354        min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
 355                   sizeof(struct in6_addr);
 356        if (tuninfo_len < min_size)
 357                return -EINVAL;
 358
 359        switch (tuninfo->mode) {
 360#ifdef CONFIG_IPV6_SEG6_INLINE
 361        case SEG6_IPTUN_MODE_INLINE:
 362                break;
 363#endif
 364        case SEG6_IPTUN_MODE_ENCAP:
 365                break;
 366        default:
 367                return -EINVAL;
 368        }
 369
 370        /* verify that SRH is consistent */
 371        if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
 372                return -EINVAL;
 373
 374        newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
 375        if (!newts)
 376                return -ENOMEM;
 377
 378        slwt = seg6_lwt_lwtunnel(newts);
 379
 380        err = dst_cache_init(&slwt->cache, GFP_KERNEL);
 381        if (err) {
 382                kfree(newts);
 383                return err;
 384        }
 385
 386        memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
 387
 388        newts->type = LWTUNNEL_ENCAP_SEG6;
 389        newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
 390                        LWTUNNEL_STATE_INPUT_REDIRECT;
 391        newts->headroom = seg6_lwt_headroom(tuninfo);
 392
 393        *ts = newts;
 394
 395        return 0;
 396}
 397
 398static void seg6_destroy_state(struct lwtunnel_state *lwt)
 399{
 400        dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
 401}
 402
 403static int seg6_fill_encap_info(struct sk_buff *skb,
 404                                struct lwtunnel_state *lwtstate)
 405{
 406        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 407
 408        if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
 409                return -EMSGSIZE;
 410
 411        return 0;
 412}
 413
 414static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
 415{
 416        struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
 417
 418        return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
 419}
 420
 421static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 422{
 423        struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
 424        struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
 425        int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
 426
 427        if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
 428                return 1;
 429
 430        return memcmp(a_hdr, b_hdr, len);
 431}
 432
 433static const struct lwtunnel_encap_ops seg6_iptun_ops = {
 434        .build_state = seg6_build_state,
 435        .destroy_state = seg6_destroy_state,
 436        .output = seg6_output,
 437        .input = seg6_input,
 438        .fill_encap = seg6_fill_encap_info,
 439        .get_encap_size = seg6_encap_nlsize,
 440        .cmp_encap = seg6_encap_cmp,
 441        .owner = THIS_MODULE,
 442};
 443
 444int __init seg6_iptunnel_init(void)
 445{
 446        return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 447}
 448
 449void seg6_iptunnel_exit(void)
 450{
 451        lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
 452}
 453