linux/net/ipv6/anycast.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      Anycast support for IPv6
   4 *      Linux INET6 implementation
   5 *
   6 *      Authors:
   7 *      David L Stevens (dlstevens@us.ibm.com)
   8 *
   9 *      based heavily on net/ipv6/mcast.c
  10 */
  11
  12#include <linux/capability.h>
  13#include <linux/module.h>
  14#include <linux/errno.h>
  15#include <linux/types.h>
  16#include <linux/random.h>
  17#include <linux/string.h>
  18#include <linux/socket.h>
  19#include <linux/sockios.h>
  20#include <linux/net.h>
  21#include <linux/in6.h>
  22#include <linux/netdevice.h>
  23#include <linux/if_arp.h>
  24#include <linux/route.h>
  25#include <linux/init.h>
  26#include <linux/proc_fs.h>
  27#include <linux/seq_file.h>
  28#include <linux/slab.h>
  29
  30#include <net/net_namespace.h>
  31#include <net/sock.h>
  32#include <net/snmp.h>
  33
  34#include <net/ipv6.h>
  35#include <net/protocol.h>
  36#include <net/if_inet6.h>
  37#include <net/ndisc.h>
  38#include <net/addrconf.h>
  39#include <net/ip6_route.h>
  40
  41#include <net/checksum.h>
  42
  43#define IN6_ADDR_HSIZE_SHIFT    8
  44#define IN6_ADDR_HSIZE          BIT(IN6_ADDR_HSIZE_SHIFT)
  45/*      anycast address hash table
  46 */
  47static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
  48static DEFINE_SPINLOCK(acaddr_hash_lock);
  49
  50static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
  51
  52static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
  53{
  54        u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
  55
  56        return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
  57}
  58
  59/*
  60 *      socket join an anycast group
  61 */
  62
  63int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
  64{
  65        struct ipv6_pinfo *np = inet6_sk(sk);
  66        struct net_device *dev = NULL;
  67        struct inet6_dev *idev;
  68        struct ipv6_ac_socklist *pac;
  69        struct net *net = sock_net(sk);
  70        int     ishost = !net->ipv6.devconf_all->forwarding;
  71        int     err = 0;
  72
  73        ASSERT_RTNL();
  74
  75        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  76                return -EPERM;
  77        if (ipv6_addr_is_multicast(addr))
  78                return -EINVAL;
  79
  80        if (ifindex)
  81                dev = __dev_get_by_index(net, ifindex);
  82
  83        if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
  84                return -EINVAL;
  85
  86        pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
  87        if (!pac)
  88                return -ENOMEM;
  89        pac->acl_next = NULL;
  90        pac->acl_addr = *addr;
  91
  92        if (ifindex == 0) {
  93                struct rt6_info *rt;
  94
  95                rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
  96                if (rt) {
  97                        dev = rt->dst.dev;
  98                        ip6_rt_put(rt);
  99                } else if (ishost) {
 100                        err = -EADDRNOTAVAIL;
 101                        goto error;
 102                } else {
 103                        /* router, no matching interface: just pick one */
 104                        dev = __dev_get_by_flags(net, IFF_UP,
 105                                                 IFF_UP | IFF_LOOPBACK);
 106                }
 107        }
 108
 109        if (!dev) {
 110                err = -ENODEV;
 111                goto error;
 112        }
 113
 114        idev = __in6_dev_get(dev);
 115        if (!idev) {
 116                if (ifindex)
 117                        err = -ENODEV;
 118                else
 119                        err = -EADDRNOTAVAIL;
 120                goto error;
 121        }
 122        /* reset ishost, now that we have a specific device */
 123        ishost = !idev->cnf.forwarding;
 124
 125        pac->acl_ifindex = dev->ifindex;
 126
 127        /* XXX
 128         * For hosts, allow link-local or matching prefix anycasts.
 129         * This obviates the need for propagating anycast routes while
 130         * still allowing some non-router anycast participation.
 131         */
 132        if (!ipv6_chk_prefix(addr, dev)) {
 133                if (ishost)
 134                        err = -EADDRNOTAVAIL;
 135                if (err)
 136                        goto error;
 137        }
 138
 139        err = __ipv6_dev_ac_inc(idev, addr);
 140        if (!err) {
 141                pac->acl_next = np->ipv6_ac_list;
 142                np->ipv6_ac_list = pac;
 143                pac = NULL;
 144        }
 145
 146error:
 147        if (pac)
 148                sock_kfree_s(sk, pac, sizeof(*pac));
 149        return err;
 150}
 151
 152/*
 153 *      socket leave an anycast group
 154 */
 155int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 156{
 157        struct ipv6_pinfo *np = inet6_sk(sk);
 158        struct net_device *dev;
 159        struct ipv6_ac_socklist *pac, *prev_pac;
 160        struct net *net = sock_net(sk);
 161
 162        ASSERT_RTNL();
 163
 164        prev_pac = NULL;
 165        for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
 166                if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
 167                     ipv6_addr_equal(&pac->acl_addr, addr))
 168                        break;
 169                prev_pac = pac;
 170        }
 171        if (!pac)
 172                return -ENOENT;
 173        if (prev_pac)
 174                prev_pac->acl_next = pac->acl_next;
 175        else
 176                np->ipv6_ac_list = pac->acl_next;
 177
 178        dev = __dev_get_by_index(net, pac->acl_ifindex);
 179        if (dev)
 180                ipv6_dev_ac_dec(dev, &pac->acl_addr);
 181
 182        sock_kfree_s(sk, pac, sizeof(*pac));
 183        return 0;
 184}
 185
 186void ipv6_sock_ac_close(struct sock *sk)
 187{
 188        struct ipv6_pinfo *np = inet6_sk(sk);
 189        struct net_device *dev = NULL;
 190        struct ipv6_ac_socklist *pac;
 191        struct net *net = sock_net(sk);
 192        int     prev_index;
 193
 194        if (!np->ipv6_ac_list)
 195                return;
 196
 197        rtnl_lock();
 198        pac = np->ipv6_ac_list;
 199        np->ipv6_ac_list = NULL;
 200
 201        prev_index = 0;
 202        while (pac) {
 203                struct ipv6_ac_socklist *next = pac->acl_next;
 204
 205                if (pac->acl_ifindex != prev_index) {
 206                        dev = __dev_get_by_index(net, pac->acl_ifindex);
 207                        prev_index = pac->acl_ifindex;
 208                }
 209                if (dev)
 210                        ipv6_dev_ac_dec(dev, &pac->acl_addr);
 211                sock_kfree_s(sk, pac, sizeof(*pac));
 212                pac = next;
 213        }
 214        rtnl_unlock();
 215}
 216
 217static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
 218{
 219        unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
 220
 221        spin_lock(&acaddr_hash_lock);
 222        hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
 223        spin_unlock(&acaddr_hash_lock);
 224}
 225
 226static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
 227{
 228        spin_lock(&acaddr_hash_lock);
 229        hlist_del_init_rcu(&aca->aca_addr_lst);
 230        spin_unlock(&acaddr_hash_lock);
 231}
 232
 233static void aca_get(struct ifacaddr6 *aca)
 234{
 235        refcount_inc(&aca->aca_refcnt);
 236}
 237
 238static void aca_free_rcu(struct rcu_head *h)
 239{
 240        struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
 241
 242        fib6_info_release(aca->aca_rt);
 243        kfree(aca);
 244}
 245
 246static void aca_put(struct ifacaddr6 *ac)
 247{
 248        if (refcount_dec_and_test(&ac->aca_refcnt)) {
 249                call_rcu(&ac->rcu, aca_free_rcu);
 250        }
 251}
 252
 253static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 254                                   const struct in6_addr *addr)
 255{
 256        struct ifacaddr6 *aca;
 257
 258        aca = kzalloc(sizeof(*aca), GFP_ATOMIC);
 259        if (!aca)
 260                return NULL;
 261
 262        aca->aca_addr = *addr;
 263        fib6_info_hold(f6i);
 264        aca->aca_rt = f6i;
 265        INIT_HLIST_NODE(&aca->aca_addr_lst);
 266        aca->aca_users = 1;
 267        /* aca_tstamp should be updated upon changes */
 268        aca->aca_cstamp = aca->aca_tstamp = jiffies;
 269        refcount_set(&aca->aca_refcnt, 1);
 270
 271        return aca;
 272}
 273
 274/*
 275 *      device anycast group inc (add if not found)
 276 */
 277int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 278{
 279        struct ifacaddr6 *aca;
 280        struct fib6_info *f6i;
 281        struct net *net;
 282        int err;
 283
 284        ASSERT_RTNL();
 285
 286        write_lock_bh(&idev->lock);
 287        if (idev->dead) {
 288                err = -ENODEV;
 289                goto out;
 290        }
 291
 292        for (aca = idev->ac_list; aca; aca = aca->aca_next) {
 293                if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 294                        aca->aca_users++;
 295                        err = 0;
 296                        goto out;
 297                }
 298        }
 299
 300        net = dev_net(idev->dev);
 301        f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC);
 302        if (IS_ERR(f6i)) {
 303                err = PTR_ERR(f6i);
 304                goto out;
 305        }
 306        aca = aca_alloc(f6i, addr);
 307        if (!aca) {
 308                fib6_info_release(f6i);
 309                err = -ENOMEM;
 310                goto out;
 311        }
 312
 313        aca->aca_next = idev->ac_list;
 314        idev->ac_list = aca;
 315
 316        /* Hold this for addrconf_join_solict() below before we unlock,
 317         * it is already exposed via idev->ac_list.
 318         */
 319        aca_get(aca);
 320        write_unlock_bh(&idev->lock);
 321
 322        ipv6_add_acaddr_hash(net, aca);
 323
 324        ip6_ins_rt(net, f6i);
 325
 326        addrconf_join_solict(idev->dev, &aca->aca_addr);
 327
 328        aca_put(aca);
 329        return 0;
 330out:
 331        write_unlock_bh(&idev->lock);
 332        return err;
 333}
 334
 335/*
 336 *      device anycast group decrement
 337 */
 338int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 339{
 340        struct ifacaddr6 *aca, *prev_aca;
 341
 342        ASSERT_RTNL();
 343
 344        write_lock_bh(&idev->lock);
 345        prev_aca = NULL;
 346        for (aca = idev->ac_list; aca; aca = aca->aca_next) {
 347                if (ipv6_addr_equal(&aca->aca_addr, addr))
 348                        break;
 349                prev_aca = aca;
 350        }
 351        if (!aca) {
 352                write_unlock_bh(&idev->lock);
 353                return -ENOENT;
 354        }
 355        if (--aca->aca_users > 0) {
 356                write_unlock_bh(&idev->lock);
 357                return 0;
 358        }
 359        if (prev_aca)
 360                prev_aca->aca_next = aca->aca_next;
 361        else
 362                idev->ac_list = aca->aca_next;
 363        write_unlock_bh(&idev->lock);
 364        ipv6_del_acaddr_hash(aca);
 365        addrconf_leave_solict(idev, &aca->aca_addr);
 366
 367        ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 368
 369        aca_put(aca);
 370        return 0;
 371}
 372
 373/* called with rtnl_lock() */
 374static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 375{
 376        struct inet6_dev *idev = __in6_dev_get(dev);
 377
 378        if (!idev)
 379                return -ENODEV;
 380        return __ipv6_dev_ac_dec(idev, addr);
 381}
 382
 383void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 384{
 385        struct ifacaddr6 *aca;
 386
 387        write_lock_bh(&idev->lock);
 388        while ((aca = idev->ac_list) != NULL) {
 389                idev->ac_list = aca->aca_next;
 390                write_unlock_bh(&idev->lock);
 391
 392                ipv6_del_acaddr_hash(aca);
 393
 394                addrconf_leave_solict(idev, &aca->aca_addr);
 395
 396                ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 397
 398                aca_put(aca);
 399
 400                write_lock_bh(&idev->lock);
 401        }
 402        write_unlock_bh(&idev->lock);
 403}
 404
 405/*
 406 *      check if the interface has this anycast address
 407 *      called with rcu_read_lock()
 408 */
 409static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
 410{
 411        struct inet6_dev *idev;
 412        struct ifacaddr6 *aca;
 413
 414        idev = __in6_dev_get(dev);
 415        if (idev) {
 416                read_lock_bh(&idev->lock);
 417                for (aca = idev->ac_list; aca; aca = aca->aca_next)
 418                        if (ipv6_addr_equal(&aca->aca_addr, addr))
 419                                break;
 420                read_unlock_bh(&idev->lock);
 421                return aca != NULL;
 422        }
 423        return false;
 424}
 425
 426/*
 427 *      check if given interface (or any, if dev==0) has this anycast address
 428 */
 429bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 430                         const struct in6_addr *addr)
 431{
 432        struct net_device *nh_dev;
 433        struct ifacaddr6 *aca;
 434        bool found = false;
 435
 436        rcu_read_lock();
 437        if (dev)
 438                found = ipv6_chk_acast_dev(dev, addr);
 439        else {
 440                unsigned int hash = inet6_acaddr_hash(net, addr);
 441
 442                hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
 443                                         aca_addr_lst) {
 444                        nh_dev = fib6_info_nh_dev(aca->aca_rt);
 445                        if (!nh_dev || !net_eq(dev_net(nh_dev), net))
 446                                continue;
 447                        if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 448                                found = true;
 449                                break;
 450                        }
 451                }
 452        }
 453        rcu_read_unlock();
 454        return found;
 455}
 456
 457/*      check if this anycast address is link-local on given interface or
 458 *      is global
 459 */
 460bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 461                             const struct in6_addr *addr)
 462{
 463        return ipv6_chk_acast_addr(net,
 464                                   (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ?
 465                                    dev : NULL),
 466                                   addr);
 467}
 468
 469#ifdef CONFIG_PROC_FS
 470struct ac6_iter_state {
 471        struct seq_net_private p;
 472        struct net_device *dev;
 473        struct inet6_dev *idev;
 474};
 475
 476#define ac6_seq_private(seq)    ((struct ac6_iter_state *)(seq)->private)
 477
 478static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 479{
 480        struct ifacaddr6 *im = NULL;
 481        struct ac6_iter_state *state = ac6_seq_private(seq);
 482        struct net *net = seq_file_net(seq);
 483
 484        state->idev = NULL;
 485        for_each_netdev_rcu(net, state->dev) {
 486                struct inet6_dev *idev;
 487                idev = __in6_dev_get(state->dev);
 488                if (!idev)
 489                        continue;
 490                read_lock_bh(&idev->lock);
 491                im = idev->ac_list;
 492                if (im) {
 493                        state->idev = idev;
 494                        break;
 495                }
 496                read_unlock_bh(&idev->lock);
 497        }
 498        return im;
 499}
 500
 501static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
 502{
 503        struct ac6_iter_state *state = ac6_seq_private(seq);
 504
 505        im = im->aca_next;
 506        while (!im) {
 507                if (likely(state->idev != NULL))
 508                        read_unlock_bh(&state->idev->lock);
 509
 510                state->dev = next_net_device_rcu(state->dev);
 511                if (!state->dev) {
 512                        state->idev = NULL;
 513                        break;
 514                }
 515                state->idev = __in6_dev_get(state->dev);
 516                if (!state->idev)
 517                        continue;
 518                read_lock_bh(&state->idev->lock);
 519                im = state->idev->ac_list;
 520        }
 521        return im;
 522}
 523
 524static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
 525{
 526        struct ifacaddr6 *im = ac6_get_first(seq);
 527        if (im)
 528                while (pos && (im = ac6_get_next(seq, im)) != NULL)
 529                        --pos;
 530        return pos ? NULL : im;
 531}
 532
 533static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
 534        __acquires(RCU)
 535{
 536        rcu_read_lock();
 537        return ac6_get_idx(seq, *pos);
 538}
 539
 540static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 541{
 542        struct ifacaddr6 *im = ac6_get_next(seq, v);
 543
 544        ++*pos;
 545        return im;
 546}
 547
 548static void ac6_seq_stop(struct seq_file *seq, void *v)
 549        __releases(RCU)
 550{
 551        struct ac6_iter_state *state = ac6_seq_private(seq);
 552
 553        if (likely(state->idev != NULL)) {
 554                read_unlock_bh(&state->idev->lock);
 555                state->idev = NULL;
 556        }
 557        rcu_read_unlock();
 558}
 559
 560static int ac6_seq_show(struct seq_file *seq, void *v)
 561{
 562        struct ifacaddr6 *im = (struct ifacaddr6 *)v;
 563        struct ac6_iter_state *state = ac6_seq_private(seq);
 564
 565        seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
 566                   state->dev->ifindex, state->dev->name,
 567                   &im->aca_addr, im->aca_users);
 568        return 0;
 569}
 570
 571static const struct seq_operations ac6_seq_ops = {
 572        .start  =       ac6_seq_start,
 573        .next   =       ac6_seq_next,
 574        .stop   =       ac6_seq_stop,
 575        .show   =       ac6_seq_show,
 576};
 577
 578int __net_init ac6_proc_init(struct net *net)
 579{
 580        if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
 581                        sizeof(struct ac6_iter_state)))
 582                return -ENOMEM;
 583
 584        return 0;
 585}
 586
 587void ac6_proc_exit(struct net *net)
 588{
 589        remove_proc_entry("anycast6", net->proc_net);
 590}
 591#endif
 592
 593/*      Init / cleanup code
 594 */
 595int __init ipv6_anycast_init(void)
 596{
 597        int i;
 598
 599        for (i = 0; i < IN6_ADDR_HSIZE; i++)
 600                INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
 601        return 0;
 602}
 603
 604void ipv6_anycast_cleanup(void)
 605{
 606        int i;
 607
 608        spin_lock(&acaddr_hash_lock);
 609        for (i = 0; i < IN6_ADDR_HSIZE; i++)
 610                WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
 611        spin_unlock(&acaddr_hash_lock);
 612}
 613