linux/net/core/lwtunnel.c
<<
>>
Prefs
   1/*
   2 * lwtunnel     Infrastructure for light weight tunnels like mpls
   3 *
   4 * Authors:     Roopa Prabhu, <roopa@cumulusnetworks.com>
   5 *
   6 *              This program is free software; you can redistribute it and/or
   7 *              modify it under the terms of the GNU General Public License
   8 *              as published by the Free Software Foundation; either version
   9 *              2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#include <linux/capability.h>
  14#include <linux/module.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/slab.h>
  18#include <linux/uaccess.h>
  19#include <linux/skbuff.h>
  20#include <linux/netdevice.h>
  21#include <linux/lwtunnel.h>
  22#include <linux/in.h>
  23#include <linux/init.h>
  24#include <linux/err.h>
  25
  26#include <net/lwtunnel.h>
  27#include <net/rtnetlink.h>
  28#include <net/ip6_fib.h>
  29#include <net/nexthop.h>
  30
  31#ifdef CONFIG_MODULES
  32
  33static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
  34{
  35        /* Only lwt encaps implemented without using an interface for
  36         * the encap need to return a string here.
  37         */
  38        switch (encap_type) {
  39        case LWTUNNEL_ENCAP_MPLS:
  40                return "MPLS";
  41        case LWTUNNEL_ENCAP_ILA:
  42                return "ILA";
  43        case LWTUNNEL_ENCAP_SEG6:
  44                return "SEG6";
  45        case LWTUNNEL_ENCAP_BPF:
  46                return "BPF";
  47        case LWTUNNEL_ENCAP_IP6:
  48        case LWTUNNEL_ENCAP_IP:
  49        case LWTUNNEL_ENCAP_NONE:
  50        case __LWTUNNEL_ENCAP_MAX:
  51                /* should not have got here */
  52                WARN_ON(1);
  53                break;
  54        }
  55        return NULL;
  56}
  57
  58#endif /* CONFIG_MODULES */
  59
  60struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
  61{
  62        struct lwtunnel_state *lws;
  63
  64        lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
  65
  66        return lws;
  67}
  68EXPORT_SYMBOL(lwtunnel_state_alloc);
  69
  70static const struct lwtunnel_encap_ops __rcu *
  71                lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
  72
  73int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
  74                           unsigned int num)
  75{
  76        if (num > LWTUNNEL_ENCAP_MAX)
  77                return -ERANGE;
  78
  79        return !cmpxchg((const struct lwtunnel_encap_ops **)
  80                        &lwtun_encaps[num],
  81                        NULL, ops) ? 0 : -1;
  82}
  83EXPORT_SYMBOL(lwtunnel_encap_add_ops);
  84
  85int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
  86                           unsigned int encap_type)
  87{
  88        int ret;
  89
  90        if (encap_type == LWTUNNEL_ENCAP_NONE ||
  91            encap_type > LWTUNNEL_ENCAP_MAX)
  92                return -ERANGE;
  93
  94        ret = (cmpxchg((const struct lwtunnel_encap_ops **)
  95                       &lwtun_encaps[encap_type],
  96                       ops, NULL) == ops) ? 0 : -1;
  97
  98        synchronize_net();
  99
 100        return ret;
 101}
 102EXPORT_SYMBOL(lwtunnel_encap_del_ops);
 103
 104int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
 105                         struct nlattr *encap, unsigned int family,
 106                         const void *cfg, struct lwtunnel_state **lws)
 107{
 108        const struct lwtunnel_encap_ops *ops;
 109        int ret = -EINVAL;
 110
 111        if (encap_type == LWTUNNEL_ENCAP_NONE ||
 112            encap_type > LWTUNNEL_ENCAP_MAX)
 113                return ret;
 114
 115        ret = -EOPNOTSUPP;
 116        rcu_read_lock();
 117        ops = rcu_dereference(lwtun_encaps[encap_type]);
 118        if (likely(ops && ops->build_state && try_module_get(ops->owner))) {
 119                ret = ops->build_state(dev, encap, family, cfg, lws);
 120                if (ret)
 121                        module_put(ops->owner);
 122        }
 123        rcu_read_unlock();
 124
 125        return ret;
 126}
 127EXPORT_SYMBOL(lwtunnel_build_state);
 128
 129int lwtunnel_valid_encap_type(u16 encap_type)
 130{
 131        const struct lwtunnel_encap_ops *ops;
 132        int ret = -EINVAL;
 133
 134        if (encap_type == LWTUNNEL_ENCAP_NONE ||
 135            encap_type > LWTUNNEL_ENCAP_MAX)
 136                return ret;
 137
 138        rcu_read_lock();
 139        ops = rcu_dereference(lwtun_encaps[encap_type]);
 140        rcu_read_unlock();
 141#ifdef CONFIG_MODULES
 142        if (!ops) {
 143                const char *encap_type_str = lwtunnel_encap_str(encap_type);
 144
 145                if (encap_type_str) {
 146                        __rtnl_unlock();
 147                        request_module("rtnl-lwt-%s", encap_type_str);
 148                        rtnl_lock();
 149
 150                        rcu_read_lock();
 151                        ops = rcu_dereference(lwtun_encaps[encap_type]);
 152                        rcu_read_unlock();
 153                }
 154        }
 155#endif
 156        return ops ? 0 : -EOPNOTSUPP;
 157}
 158EXPORT_SYMBOL(lwtunnel_valid_encap_type);
 159
 160int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining)
 161{
 162        struct rtnexthop *rtnh = (struct rtnexthop *)attr;
 163        struct nlattr *nla_entype;
 164        struct nlattr *attrs;
 165        struct nlattr *nla;
 166        u16 encap_type;
 167        int attrlen;
 168
 169        while (rtnh_ok(rtnh, remaining)) {
 170                attrlen = rtnh_attrlen(rtnh);
 171                if (attrlen > 0) {
 172                        attrs = rtnh_attrs(rtnh);
 173                        nla = nla_find(attrs, attrlen, RTA_ENCAP);
 174                        nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
 175
 176                        if (nla_entype) {
 177                                encap_type = nla_get_u16(nla_entype);
 178
 179                                if (lwtunnel_valid_encap_type(encap_type) != 0)
 180                                        return -EOPNOTSUPP;
 181                        }
 182                }
 183                rtnh = rtnh_next(rtnh, &remaining);
 184        }
 185
 186        return 0;
 187}
 188EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr);
 189
 190void lwtstate_free(struct lwtunnel_state *lws)
 191{
 192        const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
 193
 194        if (ops->destroy_state) {
 195                ops->destroy_state(lws);
 196                kfree_rcu(lws, rcu);
 197        } else {
 198                kfree(lws);
 199        }
 200        module_put(ops->owner);
 201}
 202EXPORT_SYMBOL(lwtstate_free);
 203
 204int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
 205{
 206        const struct lwtunnel_encap_ops *ops;
 207        struct nlattr *nest;
 208        int ret = -EINVAL;
 209
 210        if (!lwtstate)
 211                return 0;
 212
 213        if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
 214            lwtstate->type > LWTUNNEL_ENCAP_MAX)
 215                return 0;
 216
 217        ret = -EOPNOTSUPP;
 218        nest = nla_nest_start(skb, RTA_ENCAP);
 219        rcu_read_lock();
 220        ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 221        if (likely(ops && ops->fill_encap))
 222                ret = ops->fill_encap(skb, lwtstate);
 223        rcu_read_unlock();
 224
 225        if (ret)
 226                goto nla_put_failure;
 227        nla_nest_end(skb, nest);
 228        ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
 229        if (ret)
 230                goto nla_put_failure;
 231
 232        return 0;
 233
 234nla_put_failure:
 235        nla_nest_cancel(skb, nest);
 236
 237        return (ret == -EOPNOTSUPP ? 0 : ret);
 238}
 239EXPORT_SYMBOL(lwtunnel_fill_encap);
 240
 241int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
 242{
 243        const struct lwtunnel_encap_ops *ops;
 244        int ret = 0;
 245
 246        if (!lwtstate)
 247                return 0;
 248
 249        if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
 250            lwtstate->type > LWTUNNEL_ENCAP_MAX)
 251                return 0;
 252
 253        rcu_read_lock();
 254        ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 255        if (likely(ops && ops->get_encap_size))
 256                ret = nla_total_size(ops->get_encap_size(lwtstate));
 257        rcu_read_unlock();
 258
 259        return ret;
 260}
 261EXPORT_SYMBOL(lwtunnel_get_encap_size);
 262
 263int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 264{
 265        const struct lwtunnel_encap_ops *ops;
 266        int ret = 0;
 267
 268        if (!a && !b)
 269                return 0;
 270
 271        if (!a || !b)
 272                return 1;
 273
 274        if (a->type != b->type)
 275                return 1;
 276
 277        if (a->type == LWTUNNEL_ENCAP_NONE ||
 278            a->type > LWTUNNEL_ENCAP_MAX)
 279                return 0;
 280
 281        rcu_read_lock();
 282        ops = rcu_dereference(lwtun_encaps[a->type]);
 283        if (likely(ops && ops->cmp_encap))
 284                ret = ops->cmp_encap(a, b);
 285        rcu_read_unlock();
 286
 287        return ret;
 288}
 289EXPORT_SYMBOL(lwtunnel_cmp_encap);
 290
 291int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 292{
 293        struct dst_entry *dst = skb_dst(skb);
 294        const struct lwtunnel_encap_ops *ops;
 295        struct lwtunnel_state *lwtstate;
 296        int ret = -EINVAL;
 297
 298        if (!dst)
 299                goto drop;
 300        lwtstate = dst->lwtstate;
 301
 302        if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
 303            lwtstate->type > LWTUNNEL_ENCAP_MAX)
 304                return 0;
 305
 306        ret = -EOPNOTSUPP;
 307        rcu_read_lock();
 308        ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 309        if (likely(ops && ops->output))
 310                ret = ops->output(net, sk, skb);
 311        rcu_read_unlock();
 312
 313        if (ret == -EOPNOTSUPP)
 314                goto drop;
 315
 316        return ret;
 317
 318drop:
 319        kfree_skb(skb);
 320
 321        return ret;
 322}
 323EXPORT_SYMBOL(lwtunnel_output);
 324
 325int lwtunnel_xmit(struct sk_buff *skb)
 326{
 327        struct dst_entry *dst = skb_dst(skb);
 328        const struct lwtunnel_encap_ops *ops;
 329        struct lwtunnel_state *lwtstate;
 330        int ret = -EINVAL;
 331
 332        if (!dst)
 333                goto drop;
 334
 335        lwtstate = dst->lwtstate;
 336
 337        if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
 338            lwtstate->type > LWTUNNEL_ENCAP_MAX)
 339                return 0;
 340
 341        ret = -EOPNOTSUPP;
 342        rcu_read_lock();
 343        ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 344        if (likely(ops && ops->xmit))
 345                ret = ops->xmit(skb);
 346        rcu_read_unlock();
 347
 348        if (ret == -EOPNOTSUPP)
 349                goto drop;
 350
 351        return ret;
 352
 353drop:
 354        kfree_skb(skb);
 355
 356        return ret;
 357}
 358EXPORT_SYMBOL(lwtunnel_xmit);
 359
 360int lwtunnel_input(struct sk_buff *skb)
 361{
 362        struct dst_entry *dst = skb_dst(skb);
 363        const struct lwtunnel_encap_ops *ops;
 364        struct lwtunnel_state *lwtstate;
 365        int ret = -EINVAL;
 366
 367        if (!dst)
 368                goto drop;
 369        lwtstate = dst->lwtstate;
 370
 371        if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
 372            lwtstate->type > LWTUNNEL_ENCAP_MAX)
 373                return 0;
 374
 375        ret = -EOPNOTSUPP;
 376        rcu_read_lock();
 377        ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 378        if (likely(ops && ops->input))
 379                ret = ops->input(skb);
 380        rcu_read_unlock();
 381
 382        if (ret == -EOPNOTSUPP)
 383                goto drop;
 384
 385        return ret;
 386
 387drop:
 388        kfree_skb(skb);
 389
 390        return ret;
 391}
 392EXPORT_SYMBOL(lwtunnel_input);
 393