linux/net/ipv6/addrlabel.c
<<
>>
Prefs
   1/*
   2 * IPv6 Address Label subsystem
   3 * for the IPv6 "Default" Source Address Selection
   4 *
   5 * Copyright (C)2007 USAGI/WIDE Project
   6 */
   7/*
   8 * Author:
   9 *      YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
  10 */
  11
  12#include <linux/kernel.h>
  13#include <linux/list.h>
  14#include <linux/rcupdate.h>
  15#include <linux/in6.h>
  16#include <linux/slab.h>
  17#include <net/addrconf.h>
  18#include <linux/if_addrlabel.h>
  19#include <linux/netlink.h>
  20#include <linux/rtnetlink.h>
  21
  22#if 0
  23#define ADDRLABEL(x...) printk(x)
  24#else
  25#define ADDRLABEL(x...) do { ; } while(0)
  26#endif
  27
  28/*
  29 * Policy Table
  30 */
  31struct ip6addrlbl_entry
  32{
  33        possible_net_t lbl_net;
  34        struct in6_addr prefix;
  35        int prefixlen;
  36        int ifindex;
  37        int addrtype;
  38        u32 label;
  39        struct hlist_node list;
  40        atomic_t refcnt;
  41        struct rcu_head rcu;
  42};
  43
  44static struct ip6addrlbl_table
  45{
  46        struct hlist_head head;
  47        spinlock_t lock;
  48        u32 seq;
  49} ip6addrlbl_table;
  50
  51static inline
  52struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
  53{
  54        return read_pnet(&lbl->lbl_net);
  55}
  56
  57/*
  58 * Default policy table (RFC6724 + extensions)
  59 *
  60 * prefix               addr_type       label
  61 * -------------------------------------------------------------------------
  62 * ::1/128              LOOPBACK        0
  63 * ::/0                 N/A             1
  64 * 2002::/16            N/A             2
  65 * ::/96                COMPATv4        3
  66 * ::ffff:0:0/96        V4MAPPED        4
  67 * fc00::/7             N/A             5               ULA (RFC 4193)
  68 * 2001::/32            N/A             6               Teredo (RFC 4380)
  69 * 2001:10::/28         N/A             7               ORCHID (RFC 4843)
  70 * fec0::/10            N/A             11              Site-local
  71 *                                                      (deprecated by RFC3879)
  72 * 3ffe::/16            N/A             12              6bone
  73 *
  74 * Note: 0xffffffff is used if we do not have any policies.
  75 * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
  76 */
  77
  78#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
  79
  80static const __net_initconst struct ip6addrlbl_init_table
  81{
  82        const struct in6_addr *prefix;
  83        int prefixlen;
  84        u32 label;
  85} ip6addrlbl_init_table[] = {
  86        {       /* ::/0 */
  87                .prefix = &in6addr_any,
  88                .label = 1,
  89        },{     /* fc00::/7 */
  90                .prefix = &(struct in6_addr){{{ 0xfc }}},
  91                .prefixlen = 7,
  92                .label = 5,
  93        },{     /* fec0::/10 */
  94                .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
  95                .prefixlen = 10,
  96                .label = 11,
  97        },{     /* 2002::/16 */
  98                .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
  99                .prefixlen = 16,
 100                .label = 2,
 101        },{     /* 3ffe::/16 */
 102                .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
 103                .prefixlen = 16,
 104                .label = 12,
 105        },{     /* 2001::/32 */
 106                .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
 107                .prefixlen = 32,
 108                .label = 6,
 109        },{     /* 2001:10::/28 */
 110                .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}},
 111                .prefixlen = 28,
 112                .label = 7,
 113        },{     /* ::ffff:0:0 */
 114                .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
 115                .prefixlen = 96,
 116                .label = 4,
 117        },{     /* ::/96 */
 118                .prefix = &in6addr_any,
 119                .prefixlen = 96,
 120                .label = 3,
 121        },{     /* ::1/128 */
 122                .prefix = &in6addr_loopback,
 123                .prefixlen = 128,
 124                .label = 0,
 125        }
 126};
 127
 128/* Object management */
 129static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
 130{
 131        kfree(p);
 132}
 133
 134static void ip6addrlbl_free_rcu(struct rcu_head *h)
 135{
 136        ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
 137}
 138
 139static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
 140{
 141        return atomic_inc_not_zero(&p->refcnt);
 142}
 143
 144static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
 145{
 146        if (atomic_dec_and_test(&p->refcnt))
 147                call_rcu(&p->rcu, ip6addrlbl_free_rcu);
 148}
 149
 150/* Find label */
 151static bool __ip6addrlbl_match(struct net *net,
 152                               const struct ip6addrlbl_entry *p,
 153                               const struct in6_addr *addr,
 154                               int addrtype, int ifindex)
 155{
 156        if (!net_eq(ip6addrlbl_net(p), net))
 157                return false;
 158        if (p->ifindex && p->ifindex != ifindex)
 159                return false;
 160        if (p->addrtype && p->addrtype != addrtype)
 161                return false;
 162        if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
 163                return false;
 164        return true;
 165}
 166
 167static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
 168                                                  const struct in6_addr *addr,
 169                                                  int type, int ifindex)
 170{
 171        struct ip6addrlbl_entry *p;
 172        hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
 173                if (__ip6addrlbl_match(net, p, addr, type, ifindex))
 174                        return p;
 175        }
 176        return NULL;
 177}
 178
 179u32 ipv6_addr_label(struct net *net,
 180                    const struct in6_addr *addr, int type, int ifindex)
 181{
 182        u32 label;
 183        struct ip6addrlbl_entry *p;
 184
 185        type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
 186
 187        rcu_read_lock();
 188        p = __ipv6_addr_label(net, addr, type, ifindex);
 189        label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
 190        rcu_read_unlock();
 191
 192        ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n",
 193                  __func__, addr, type, ifindex, label);
 194
 195        return label;
 196}
 197
 198/* allocate one entry */
 199static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
 200                                                 const struct in6_addr *prefix,
 201                                                 int prefixlen, int ifindex,
 202                                                 u32 label)
 203{
 204        struct ip6addrlbl_entry *newp;
 205        int addrtype;
 206
 207        ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n",
 208                  __func__, prefix, prefixlen, ifindex, (unsigned int)label);
 209
 210        addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
 211
 212        switch (addrtype) {
 213        case IPV6_ADDR_MAPPED:
 214                if (prefixlen > 96)
 215                        return ERR_PTR(-EINVAL);
 216                if (prefixlen < 96)
 217                        addrtype = 0;
 218                break;
 219        case IPV6_ADDR_COMPATv4:
 220                if (prefixlen != 96)
 221                        addrtype = 0;
 222                break;
 223        case IPV6_ADDR_LOOPBACK:
 224                if (prefixlen != 128)
 225                        addrtype = 0;
 226                break;
 227        }
 228
 229        newp = kmalloc(sizeof(*newp), GFP_KERNEL);
 230        if (!newp)
 231                return ERR_PTR(-ENOMEM);
 232
 233        ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
 234        newp->prefixlen = prefixlen;
 235        newp->ifindex = ifindex;
 236        newp->addrtype = addrtype;
 237        newp->label = label;
 238        INIT_HLIST_NODE(&newp->list);
 239        write_pnet(&newp->lbl_net, net);
 240        atomic_set(&newp->refcnt, 1);
 241        return newp;
 242}
 243
 244/* add a label */
 245static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
 246{
 247        struct hlist_node *n;
 248        struct ip6addrlbl_entry *last = NULL, *p = NULL;
 249        int ret = 0;
 250
 251        ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
 252                  replace);
 253
 254        hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 255                if (p->prefixlen == newp->prefixlen &&
 256                    net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
 257                    p->ifindex == newp->ifindex &&
 258                    ipv6_addr_equal(&p->prefix, &newp->prefix)) {
 259                        if (!replace) {
 260                                ret = -EEXIST;
 261                                goto out;
 262                        }
 263                        hlist_replace_rcu(&p->list, &newp->list);
 264                        ip6addrlbl_put(p);
 265                        goto out;
 266                } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
 267                           (p->prefixlen < newp->prefixlen)) {
 268                        hlist_add_before_rcu(&newp->list, &p->list);
 269                        goto out;
 270                }
 271                last = p;
 272        }
 273        if (last)
 274                hlist_add_behind_rcu(&newp->list, &last->list);
 275        else
 276                hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
 277out:
 278        if (!ret)
 279                ip6addrlbl_table.seq++;
 280        return ret;
 281}
 282
 283/* add a label */
 284static int ip6addrlbl_add(struct net *net,
 285                          const struct in6_addr *prefix, int prefixlen,
 286                          int ifindex, u32 label, int replace)
 287{
 288        struct ip6addrlbl_entry *newp;
 289        int ret = 0;
 290
 291        ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
 292                  __func__, prefix, prefixlen, ifindex, (unsigned int)label,
 293                  replace);
 294
 295        newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
 296        if (IS_ERR(newp))
 297                return PTR_ERR(newp);
 298        spin_lock(&ip6addrlbl_table.lock);
 299        ret = __ip6addrlbl_add(newp, replace);
 300        spin_unlock(&ip6addrlbl_table.lock);
 301        if (ret)
 302                ip6addrlbl_free(newp);
 303        return ret;
 304}
 305
 306/* remove a label */
 307static int __ip6addrlbl_del(struct net *net,
 308                            const struct in6_addr *prefix, int prefixlen,
 309                            int ifindex)
 310{
 311        struct ip6addrlbl_entry *p = NULL;
 312        struct hlist_node *n;
 313        int ret = -ESRCH;
 314
 315        ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 316                  __func__, prefix, prefixlen, ifindex);
 317
 318        hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 319                if (p->prefixlen == prefixlen &&
 320                    net_eq(ip6addrlbl_net(p), net) &&
 321                    p->ifindex == ifindex &&
 322                    ipv6_addr_equal(&p->prefix, prefix)) {
 323                        hlist_del_rcu(&p->list);
 324                        ip6addrlbl_put(p);
 325                        ret = 0;
 326                        break;
 327                }
 328        }
 329        return ret;
 330}
 331
 332static int ip6addrlbl_del(struct net *net,
 333                          const struct in6_addr *prefix, int prefixlen,
 334                          int ifindex)
 335{
 336        struct in6_addr prefix_buf;
 337        int ret;
 338
 339        ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
 340                  __func__, prefix, prefixlen, ifindex);
 341
 342        ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
 343        spin_lock(&ip6addrlbl_table.lock);
 344        ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
 345        spin_unlock(&ip6addrlbl_table.lock);
 346        return ret;
 347}
 348
 349/* add default label */
 350static int __net_init ip6addrlbl_net_init(struct net *net)
 351{
 352        int err = 0;
 353        int i;
 354
 355        ADDRLABEL(KERN_DEBUG "%s\n", __func__);
 356
 357        for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
 358                int ret = ip6addrlbl_add(net,
 359                                         ip6addrlbl_init_table[i].prefix,
 360                                         ip6addrlbl_init_table[i].prefixlen,
 361                                         0,
 362                                         ip6addrlbl_init_table[i].label, 0);
 363                /* XXX: should we free all rules when we catch an error? */
 364                if (ret && (!err || err != -ENOMEM))
 365                        err = ret;
 366        }
 367        return err;
 368}
 369
 370static void __net_exit ip6addrlbl_net_exit(struct net *net)
 371{
 372        struct ip6addrlbl_entry *p = NULL;
 373        struct hlist_node *n;
 374
 375        /* Remove all labels belonging to the exiting net */
 376        spin_lock(&ip6addrlbl_table.lock);
 377        hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
 378                if (net_eq(ip6addrlbl_net(p), net)) {
 379                        hlist_del_rcu(&p->list);
 380                        ip6addrlbl_put(p);
 381                }
 382        }
 383        spin_unlock(&ip6addrlbl_table.lock);
 384}
 385
 386static struct pernet_operations ipv6_addr_label_ops = {
 387        .init = ip6addrlbl_net_init,
 388        .exit = ip6addrlbl_net_exit,
 389};
 390
 391int __init ipv6_addr_label_init(void)
 392{
 393        spin_lock_init(&ip6addrlbl_table.lock);
 394
 395        return register_pernet_subsys(&ipv6_addr_label_ops);
 396}
 397
 398void ipv6_addr_label_cleanup(void)
 399{
 400        unregister_pernet_subsys(&ipv6_addr_label_ops);
 401}
 402
 403static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
 404        [IFAL_ADDRESS]          = { .len = sizeof(struct in6_addr), },
 405        [IFAL_LABEL]            = { .len = sizeof(u32), },
 406};
 407
 408static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
 409{
 410        struct net *net = sock_net(skb->sk);
 411        struct ifaddrlblmsg *ifal;
 412        struct nlattr *tb[IFAL_MAX+1];
 413        struct in6_addr *pfx;
 414        u32 label;
 415        int err = 0;
 416
 417        err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
 418        if (err < 0)
 419                return err;
 420
 421        ifal = nlmsg_data(nlh);
 422
 423        if (ifal->ifal_family != AF_INET6 ||
 424            ifal->ifal_prefixlen > 128)
 425                return -EINVAL;
 426
 427        if (!tb[IFAL_ADDRESS])
 428                return -EINVAL;
 429        pfx = nla_data(tb[IFAL_ADDRESS]);
 430
 431        if (!tb[IFAL_LABEL])
 432                return -EINVAL;
 433        label = nla_get_u32(tb[IFAL_LABEL]);
 434        if (label == IPV6_ADDR_LABEL_DEFAULT)
 435                return -EINVAL;
 436
 437        switch(nlh->nlmsg_type) {
 438        case RTM_NEWADDRLABEL:
 439                if (ifal->ifal_index &&
 440                    !__dev_get_by_index(net, ifal->ifal_index))
 441                        return -EINVAL;
 442
 443                err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
 444                                     ifal->ifal_index, label,
 445                                     nlh->nlmsg_flags & NLM_F_REPLACE);
 446                break;
 447        case RTM_DELADDRLABEL:
 448                err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
 449                                     ifal->ifal_index);
 450                break;
 451        default:
 452                err = -EOPNOTSUPP;
 453        }
 454        return err;
 455}
 456
 457static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
 458                              int prefixlen, int ifindex, u32 lseq)
 459{
 460        struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
 461        ifal->ifal_family = AF_INET6;
 462        ifal->ifal_prefixlen = prefixlen;
 463        ifal->ifal_flags = 0;
 464        ifal->ifal_index = ifindex;
 465        ifal->ifal_seq = lseq;
 466};
 467
 468static int ip6addrlbl_fill(struct sk_buff *skb,
 469                           struct ip6addrlbl_entry *p,
 470                           u32 lseq,
 471                           u32 portid, u32 seq, int event,
 472                           unsigned int flags)
 473{
 474        struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
 475                                         sizeof(struct ifaddrlblmsg), flags);
 476        if (!nlh)
 477                return -EMSGSIZE;
 478
 479        ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
 480
 481        if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 ||
 482            nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
 483                nlmsg_cancel(skb, nlh);
 484                return -EMSGSIZE;
 485        }
 486
 487        nlmsg_end(skb, nlh);
 488        return 0;
 489}
 490
 491static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 492{
 493        struct net *net = sock_net(skb->sk);
 494        struct ip6addrlbl_entry *p;
 495        int idx = 0, s_idx = cb->args[0];
 496        int err;
 497
 498        rcu_read_lock();
 499        hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
 500                if (idx >= s_idx &&
 501                    net_eq(ip6addrlbl_net(p), net)) {
 502                        err = ip6addrlbl_fill(skb, p,
 503                                              ip6addrlbl_table.seq,
 504                                              NETLINK_CB(cb->skb).portid,
 505                                              cb->nlh->nlmsg_seq,
 506                                              RTM_NEWADDRLABEL,
 507                                              NLM_F_MULTI);
 508                        if (err < 0)
 509                                break;
 510                }
 511                idx++;
 512        }
 513        rcu_read_unlock();
 514        cb->args[0] = idx;
 515        return skb->len;
 516}
 517
 518static inline int ip6addrlbl_msgsize(void)
 519{
 520        return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
 521                + nla_total_size(16)    /* IFAL_ADDRESS */
 522                + nla_total_size(4);    /* IFAL_LABEL */
 523}
 524
 525static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
 526{
 527        struct net *net = sock_net(in_skb->sk);
 528        struct ifaddrlblmsg *ifal;
 529        struct nlattr *tb[IFAL_MAX+1];
 530        struct in6_addr *addr;
 531        u32 lseq;
 532        int err = 0;
 533        struct ip6addrlbl_entry *p;
 534        struct sk_buff *skb;
 535
 536        err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
 537        if (err < 0)
 538                return err;
 539
 540        ifal = nlmsg_data(nlh);
 541
 542        if (ifal->ifal_family != AF_INET6 ||
 543            ifal->ifal_prefixlen != 128)
 544                return -EINVAL;
 545
 546        if (ifal->ifal_index &&
 547            !__dev_get_by_index(net, ifal->ifal_index))
 548                return -EINVAL;
 549
 550        if (!tb[IFAL_ADDRESS])
 551                return -EINVAL;
 552        addr = nla_data(tb[IFAL_ADDRESS]);
 553
 554        rcu_read_lock();
 555        p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
 556        if (p && !ip6addrlbl_hold(p))
 557                p = NULL;
 558        lseq = ip6addrlbl_table.seq;
 559        rcu_read_unlock();
 560
 561        if (!p) {
 562                err = -ESRCH;
 563                goto out;
 564        }
 565
 566        skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
 567        if (!skb) {
 568                ip6addrlbl_put(p);
 569                return -ENOBUFS;
 570        }
 571
 572        err = ip6addrlbl_fill(skb, p, lseq,
 573                              NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 574                              RTM_NEWADDRLABEL, 0);
 575
 576        ip6addrlbl_put(p);
 577
 578        if (err < 0) {
 579                WARN_ON(err == -EMSGSIZE);
 580                kfree_skb(skb);
 581                goto out;
 582        }
 583
 584        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 585out:
 586        return err;
 587}
 588
 589void __init ipv6_addr_label_rtnl_register(void)
 590{
 591        __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
 592                        NULL, NULL);
 593        __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
 594                        NULL, NULL);
 595        __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
 596                        ip6addrlbl_dump, NULL);
 597}
 598
 599