linux/net/ipv6/anycast.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Anycast support for IPv6
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      David L Stevens (dlstevens@us.ibm.com)
   8 *
   9 *      based heavily on net/ipv6/mcast.c
  10 */
  11
  12#include <linux/capability.h>
  13#include <linux/module.h>
  14#include <linux/errno.h>
  15#include <linux/types.h>
  16#include <linux/random.h>
  17#include <linux/string.h>
  18#include <linux/socket.h>
  19#include <linux/sockios.h>
  20#include <linux/net.h>
  21#include <linux/in6.h>
  22#include <linux/netdevice.h>
  23#include <linux/if_arp.h>
  24#include <linux/route.h>
  25#include <linux/init.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/slab.h>
  29
  30#include <net/net_namespace.h>
  31#include <net/sock.h>
  32#include <net/snmp.h>
  33
  34#include <net/ipv6.h>
  35#include <net/protocol.h>
  36#include <net/if_inet6.h>
  37#include <net/ndisc.h>
  38#include <net/addrconf.h>
  39#include <net/ip6_route.h>
  40
  41#include <net/checksum.h>
  42
  43#define IN6_ADDR_HSIZE_SHIFT    8
  44#define IN6_ADDR_HSIZE          BIT(IN6_ADDR_HSIZE_SHIFT)
  45/*      anycast address hash table
  46 */
  47static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
  48static DEFINE_SPINLOCK(acaddr_hash_lock);
  49
  50static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
  51
  52static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
  53{
  54        u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
  55
  56        return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
  57}
  58
  59/*
  60 *      socket join an anycast group
  61 */
  62
  63int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
  64{
  65        struct ipv6_pinfo *np = inet6_sk(sk);
  66        struct net_device *dev = NULL;
  67        struct inet6_dev *idev;
  68        struct ipv6_ac_socklist *pac;
  69        struct net *net = sock_net(sk);
  70        int     ishost = !net->ipv6.devconf_all->forwarding;
  71        int     err = 0;
  72
  73        ASSERT_RTNL();
  74
  75        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  76                return -EPERM;
  77        if (ipv6_addr_is_multicast(addr))
  78                return -EINVAL;
  79
  80        if (ifindex)
  81                dev = __dev_get_by_index(net, ifindex);
  82
  83        if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
  84                return -EINVAL;
  85
  86        pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
  87        if (!pac)
  88                return -ENOMEM;
  89        pac->acl_next = NULL;
  90        pac->acl_addr = *addr;
  91
  92        if (ifindex == 0) {
  93                struct rt6_info *rt;
  94
  95                rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
  96                if (rt) {
  97                        dev = rt->dst.dev;
  98                        ip6_rt_put(rt);
  99                } else if (ishost) {
 100                        err = -EADDRNOTAVAIL;
 101                        goto error;
 102                } else {
 103                        /* router, no matching interface: just pick one */
 104                        dev = __dev_get_by_flags(net, IFF_UP,
 105                                                 IFF_UP | IFF_LOOPBACK);
 106                }
 107        }
 108
 109        if (!dev) {
 110                err = -ENODEV;
 111                goto error;
 112        }
 113
 114        idev = __in6_dev_get(dev);
 115        if (!idev) {
 116                if (ifindex)
 117                        err = -ENODEV;
 118                else
 119                        err = -EADDRNOTAVAIL;
 120                goto error;
 121        }
 122        /* reset ishost, now that we have a specific device */
 123        ishost = !idev->cnf.forwarding;
 124
 125        pac->acl_ifindex = dev->ifindex;
 126
 127        /* XXX
 128         * For hosts, allow link-local or matching prefix anycasts.
 129         * This obviates the need for propagating anycast routes while
 130         * still allowing some non-router anycast participation.
 131         */
 132        if (!ipv6_chk_prefix(addr, dev)) {
 133                if (ishost)
 134                        err = -EADDRNOTAVAIL;
 135                if (err)
 136                        goto error;
 137        }
 138
 139        err = __ipv6_dev_ac_inc(idev, addr);
 140        if (!err) {
 141                pac->acl_next = np->ipv6_ac_list;
 142                np->ipv6_ac_list = pac;
 143                pac = NULL;
 144        }
 145
 146error:
 147        if (pac)
 148                sock_kfree_s(sk, pac, sizeof(*pac));
 149        return err;
 150}
 151
 152/*
 153 *      socket leave an anycast group
 154 */
 155int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 156{
 157        struct ipv6_pinfo *np = inet6_sk(sk);
 158        struct net_device *dev;
 159        struct ipv6_ac_socklist *pac, *prev_pac;
 160        struct net *net = sock_net(sk);
 161
 162        ASSERT_RTNL();
 163
 164        prev_pac = NULL;
 165        for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
 166                if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
 167                     ipv6_addr_equal(&pac->acl_addr, addr))
 168                        break;
 169                prev_pac = pac;
 170        }
 171        if (!pac)
 172                return -ENOENT;
 173        if (prev_pac)
 174                prev_pac->acl_next = pac->acl_next;
 175        else
 176                np->ipv6_ac_list = pac->acl_next;
 177
 178        dev = __dev_get_by_index(net, pac->acl_ifindex);
 179        if (dev)
 180                ipv6_dev_ac_dec(dev, &pac->acl_addr);
 181
 182        sock_kfree_s(sk, pac, sizeof(*pac));
 183        return 0;
 184}
 185
 186void __ipv6_sock_ac_close(struct sock *sk)
 187{
 188        struct ipv6_pinfo *np = inet6_sk(sk);
 189        struct net_device *dev = NULL;
 190        struct ipv6_ac_socklist *pac;
 191        struct net *net = sock_net(sk);
 192        int     prev_index;
 193
 194        ASSERT_RTNL();
 195        pac = np->ipv6_ac_list;
 196        np->ipv6_ac_list = NULL;
 197
 198        prev_index = 0;
 199        while (pac) {
 200                struct ipv6_ac_socklist *next = pac->acl_next;
 201
 202                if (pac->acl_ifindex != prev_index) {
 203                        dev = __dev_get_by_index(net, pac->acl_ifindex);
 204                        prev_index = pac->acl_ifindex;
 205                }
 206                if (dev)
 207                        ipv6_dev_ac_dec(dev, &pac->acl_addr);
 208                sock_kfree_s(sk, pac, sizeof(*pac));
 209                pac = next;
 210        }
 211}
 212
 213void ipv6_sock_ac_close(struct sock *sk)
 214{
 215        struct ipv6_pinfo *np = inet6_sk(sk);
 216
 217        if (!np->ipv6_ac_list)
 218                return;
 219        rtnl_lock();
 220        __ipv6_sock_ac_close(sk);
 221        rtnl_unlock();
 222}
 223
 224static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
 225{
 226        unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
 227
 228        spin_lock(&acaddr_hash_lock);
 229        hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
 230        spin_unlock(&acaddr_hash_lock);
 231}
 232
 233static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
 234{
 235        spin_lock(&acaddr_hash_lock);
 236        hlist_del_init_rcu(&aca->aca_addr_lst);
 237        spin_unlock(&acaddr_hash_lock);
 238}
 239
 240static void aca_get(struct ifacaddr6 *aca)
 241{
 242        refcount_inc(&aca->aca_refcnt);
 243}
 244
 245static void aca_free_rcu(struct rcu_head *h)
 246{
 247        struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
 248
 249        fib6_info_release(aca->aca_rt);
 250        kfree(aca);
 251}
 252
 253static void aca_put(struct ifacaddr6 *ac)
 254{
 255        if (refcount_dec_and_test(&ac->aca_refcnt)) {
 256                call_rcu(&ac->rcu, aca_free_rcu);
 257        }
 258}
 259
 260static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 261                                   const struct in6_addr *addr)
 262{
 263        struct ifacaddr6 *aca;
 264
 265        aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
 266        if (!aca)
 267                return NULL;
 268
 269        aca->aca_addr = *addr;
 270        fib6_info_hold(f6i);
 271        aca->aca_rt = f6i;
 272        INIT_HLIST_NODE(&aca->aca_addr_lst);
 273        aca->aca_users = 1;
 274        /* aca_tstamp should be updated upon changes */
 275        aca->aca_cstamp = aca->aca_tstamp = jiffies;
 276        refcount_set(&aca->aca_refcnt, 1);
 277
 278        return aca;
 279}
 280
 281/*
 282 *      device anycast group inc (add if not found)
 283 */
 284int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 285{
 286        struct ifacaddr6 *aca;
 287        struct fib6_info *f6i;
 288        struct net *net;
 289        int err;
 290
 291        ASSERT_RTNL();
 292
 293        write_lock_bh(&idev->lock);
 294        if (idev->dead) {
 295                err = -ENODEV;
 296                goto out;
 297        }
 298
 299        for (aca = idev->ac_list; aca; aca = aca->aca_next) {
 300                if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 301                        aca->aca_users++;
 302                        err = 0;
 303                        goto out;
 304                }
 305        }
 306
 307        net = dev_net(idev->dev);
 308        f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
 309        if (IS_ERR(f6i)) {
 310                err = PTR_ERR(f6i);
 311                goto out;
 312        }
 313        aca = aca_alloc(f6i, addr);
 314        if (!aca) {
 315                fib6_info_release(f6i);
 316                err = -ENOMEM;
 317                goto out;
 318        }
 319
 320        aca->aca_next = idev->ac_list;
 321        idev->ac_list = aca;
 322
 323        /* Hold this for addrconf_join_solict() below before we unlock,
 324         * it is already exposed via idev->ac_list.
 325         */
 326        aca_get(aca);
 327        write_unlock_bh(&idev->lock);
 328
 329        ipv6_add_acaddr_hash(net, aca);
 330
 331        ip6_ins_rt(net, f6i);
 332
 333        addrconf_join_solict(idev->dev, &aca->aca_addr);
 334
 335        aca_put(aca);
 336        return 0;
 337out:
 338        write_unlock_bh(&idev->lock);
 339        return err;
 340}
 341
 342/*
 343 *      device anycast group decrement
 344 */
 345int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 346{
 347        struct ifacaddr6 *aca, *prev_aca;
 348
 349        ASSERT_RTNL();
 350
 351        write_lock_bh(&idev->lock);
 352        prev_aca = NULL;
 353        for (aca = idev->ac_list; aca; aca = aca->aca_next) {
 354                if (ipv6_addr_equal(&aca->aca_addr, addr))
 355                        break;
 356                prev_aca = aca;
 357        }
 358        if (!aca) {
 359                write_unlock_bh(&idev->lock);
 360                return -ENOENT;
 361        }
 362        if (--aca->aca_users > 0) {
 363                write_unlock_bh(&idev->lock);
 364                return 0;
 365        }
 366        if (prev_aca)
 367                prev_aca->aca_next = aca->aca_next;
 368        else
 369                idev->ac_list = aca->aca_next;
 370        write_unlock_bh(&idev->lock);
 371        ipv6_del_acaddr_hash(aca);
 372        addrconf_leave_solict(idev, &aca->aca_addr);
 373
 374        ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
 375
 376        aca_put(aca);
 377        return 0;
 378}
 379
 380/* called with rtnl_lock() */
 381static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 382{
 383        struct inet6_dev *idev = __in6_dev_get(dev);
 384
 385        if (!idev)
 386                return -ENODEV;
 387        return __ipv6_dev_ac_dec(idev, addr);
 388}
 389
 390void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 391{
 392        struct ifacaddr6 *aca;
 393
 394        write_lock_bh(&idev->lock);
 395        while ((aca = idev->ac_list) != NULL) {
 396                idev->ac_list = aca->aca_next;
 397                write_unlock_bh(&idev->lock);
 398
 399                ipv6_del_acaddr_hash(aca);
 400
 401                addrconf_leave_solict(idev, &aca->aca_addr);
 402
 403                ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
 404
 405                aca_put(aca);
 406
 407                write_lock_bh(&idev->lock);
 408        }
 409        write_unlock_bh(&idev->lock);
 410}
 411
 412/*
 413 *      check if the interface has this anycast address
 414 *      called with rcu_read_lock()
 415 */
 416static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
 417{
 418        struct inet6_dev *idev;
 419        struct ifacaddr6 *aca;
 420
 421        idev = __in6_dev_get(dev);
 422        if (idev) {
 423                read_lock_bh(&idev->lock);
 424                for (aca = idev->ac_list; aca; aca = aca->aca_next)
 425                        if (ipv6_addr_equal(&aca->aca_addr, addr))
 426                                break;
 427                read_unlock_bh(&idev->lock);
 428                return aca != NULL;
 429        }
 430        return false;
 431}
 432
 433/*
 434 *      check if given interface (or any, if dev==0) has this anycast address
 435 */
 436bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 437                         const struct in6_addr *addr)
 438{
 439        struct net_device *nh_dev;
 440        struct ifacaddr6 *aca;
 441        bool found = false;
 442
 443        rcu_read_lock();
 444        if (dev)
 445                found = ipv6_chk_acast_dev(dev, addr);
 446        else {
 447                unsigned int hash = inet6_acaddr_hash(net, addr);
 448
 449                hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
 450                                         aca_addr_lst) {
 451                        nh_dev = fib6_info_nh_dev(aca->aca_rt);
 452                        if (!nh_dev || !net_eq(dev_net(nh_dev), net))
 453                                continue;
 454                        if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 455                                found = true;
 456                                break;
 457                        }
 458                }
 459        }
 460        rcu_read_unlock();
 461        return found;
 462}
 463
 464/*      check if this anycast address is link-local on given interface or
 465 *      is global
 466 */
 467bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 468                             const struct in6_addr *addr)
 469{
 470        return ipv6_chk_acast_addr(net,
 471                                   (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
 472                                    dev : NULL),
 473                                   addr);
 474}
 475
 476#ifdef CONFIG_PROC_FS
 477struct ac6_iter_state {
 478        struct seq_net_private p;
 479        struct net_device *dev;
 480        struct inet6_dev *idev;
 481};
 482
 483#define ac6_seq_private(seq)    ((struct ac6_iter_state *)(seq)->private)
 484
 485static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 486{
 487        struct ifacaddr6 *im = NULL;
 488        struct ac6_iter_state *state = ac6_seq_private(seq);
 489        struct net *net = seq_file_net(seq);
 490
 491        state->idev = NULL;
 492        for_each_netdev_rcu(net, state->dev) {
 493                struct inet6_dev *idev;
 494                idev = __in6_dev_get(state->dev);
 495                if (!idev)
 496                        continue;
 497                read_lock_bh(&idev->lock);
 498                im = idev->ac_list;
 499                if (im) {
 500                        state->idev = idev;
 501                        break;
 502                }
 503                read_unlock_bh(&idev->lock);
 504        }
 505        return im;
 506}
 507
 508static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
 509{
 510        struct ac6_iter_state *state = ac6_seq_private(seq);
 511
 512        im = im->aca_next;
 513        while (!im) {
 514                if (likely(state->idev != NULL))
 515                        read_unlock_bh(&state->idev->lock);
 516
 517                state->dev = next_net_device_rcu(state->dev);
 518                if (!state->dev) {
 519                        state->idev = NULL;
 520                        break;
 521                }
 522                state->idev = __in6_dev_get(state->dev);
 523                if (!state->idev)
 524                        continue;
 525                read_lock_bh(&state->idev->lock);
 526                im = state->idev->ac_list;
 527        }
 528        return im;
 529}
 530
 531static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
 532{
 533        struct ifacaddr6 *im = ac6_get_first(seq);
 534        if (im)
 535                while (pos && (im = ac6_get_next(seq, im)) != NULL)
 536                        --pos;
 537        return pos ? NULL : im;
 538}
 539
 540static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
 541        __acquires(RCU)
 542{
 543        rcu_read_lock();
 544        return ac6_get_idx(seq, *pos);
 545}
 546
 547static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 548{
 549        struct ifacaddr6 *im = ac6_get_next(seq, v);
 550
 551        ++*pos;
 552        return im;
 553}
 554
 555static void ac6_seq_stop(struct seq_file *seq, void *v)
 556        __releases(RCU)
 557{
 558        struct ac6_iter_state *state = ac6_seq_private(seq);
 559
 560        if (likely(state->idev != NULL)) {
 561                read_unlock_bh(&state->idev->lock);
 562                state->idev = NULL;
 563        }
 564        rcu_read_unlock();
 565}
 566
 567static int ac6_seq_show(struct seq_file *seq, void *v)
 568{
 569        struct ifacaddr6 *im = (struct ifacaddr6 *)v;
 570        struct ac6_iter_state *state = ac6_seq_private(seq);
 571
 572        seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
 573                   state->dev->ifindex, state->dev->name,
 574                   &im->aca_addr, im->aca_users);
 575        return 0;
 576}
 577
 578static const struct seq_operations ac6_seq_ops = {
 579        .start  =       ac6_seq_start,
 580        .next   =       ac6_seq_next,
 581        .stop   =       ac6_seq_stop,
 582        .show   =       ac6_seq_show,
 583};
 584
 585int __net_init ac6_proc_init(struct net *net)
 586{
 587        if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
 588                        sizeof(struct ac6_iter_state)))
 589                return -ENOMEM;
 590
 591        return 0;
 592}
 593
 594void ac6_proc_exit(struct net *net)
 595{
 596        remove_proc_entry("anycast6", net->proc_net);
 597}
 598#endif
 599
 600/*      Init / cleanup code
 601 */
 602int __init ipv6_anycast_init(void)
 603{
 604        int i;
 605
 606        for (i = 0; i < IN6_ADDR_HSIZE; i++)
 607                INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
 608        return 0;
 609}
 610
 611void ipv6_anycast_cleanup(void)
 612{
 613        int i;
 614
 615        spin_lock(&acaddr_hash_lock);
 616        for (i = 0; i < IN6_ADDR_HSIZE; i++)
 617                WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
 618        spin_unlock(&acaddr_hash_lock);
 619}
 620