linux/net/ipv4/nexthop.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Generic nexthop implementation
   3 *
   4 * Copyright (c) 2017-19 Cumulus Networks
   5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
   6 */
   7
   8#include <linux/nexthop.h>
   9#include <linux/rtnetlink.h>
  10#include <linux/slab.h>
  11#include <net/arp.h>
  12#include <net/ipv6_stubs.h>
  13#include <net/lwtunnel.h>
  14#include <net/ndisc.h>
  15#include <net/nexthop.h>
  16#include <net/route.h>
  17#include <net/sock.h>
  18
  19static void remove_nexthop(struct net *net, struct nexthop *nh,
  20                           struct nl_info *nlinfo);
  21
  22#define NH_DEV_HASHBITS  8
  23#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
  24
  25static const struct nla_policy rtm_nh_policy_new[] = {
  26        [NHA_ID]                = { .type = NLA_U32 },
  27        [NHA_GROUP]             = { .type = NLA_BINARY },
  28        [NHA_GROUP_TYPE]        = { .type = NLA_U16 },
  29        [NHA_BLACKHOLE]         = { .type = NLA_FLAG },
  30        [NHA_OIF]               = { .type = NLA_U32 },
  31        [NHA_GATEWAY]           = { .type = NLA_BINARY },
  32        [NHA_ENCAP_TYPE]        = { .type = NLA_U16 },
  33        [NHA_ENCAP]             = { .type = NLA_NESTED },
  34        [NHA_FDB]               = { .type = NLA_FLAG },
  35};
  36
  37static const struct nla_policy rtm_nh_policy_get[] = {
  38        [NHA_ID]                = { .type = NLA_U32 },
  39};
  40
  41static const struct nla_policy rtm_nh_policy_dump[] = {
  42        [NHA_OIF]               = { .type = NLA_U32 },
  43        [NHA_GROUPS]            = { .type = NLA_FLAG },
  44        [NHA_MASTER]            = { .type = NLA_U32 },
  45        [NHA_FDB]               = { .type = NLA_FLAG },
  46};
  47
  48static bool nexthop_notifiers_is_empty(struct net *net)
  49{
  50        return !net->nexthop.notifier_chain.head;
  51}
  52
  53static void
  54__nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
  55                               const struct nexthop *nh)
  56{
  57        struct nh_info *nhi = rtnl_dereference(nh->nh_info);
  58
  59        nh_info->dev = nhi->fib_nhc.nhc_dev;
  60        nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
  61        if (nh_info->gw_family == AF_INET)
  62                nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
  63        else if (nh_info->gw_family == AF_INET6)
  64                nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
  65
  66        nh_info->is_reject = nhi->reject_nh;
  67        nh_info->is_fdb = nhi->fdb_nh;
  68        nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
  69}
  70
  71static int nh_notifier_single_info_init(struct nh_notifier_info *info,
  72                                        const struct nexthop *nh)
  73{
  74        info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
  75        info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
  76        if (!info->nh)
  77                return -ENOMEM;
  78
  79        __nh_notifier_single_info_init(info->nh, nh);
  80
  81        return 0;
  82}
  83
  84static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
  85{
  86        kfree(info->nh);
  87}
  88
  89static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
  90                                    struct nh_group *nhg)
  91{
  92        u16 num_nh = nhg->num_nh;
  93        int i;
  94
  95        info->type = NH_NOTIFIER_INFO_TYPE_GRP;
  96        info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
  97                               GFP_KERNEL);
  98        if (!info->nh_grp)
  99                return -ENOMEM;
 100
 101        info->nh_grp->num_nh = num_nh;
 102        info->nh_grp->is_fdb = nhg->fdb_nh;
 103
 104        for (i = 0; i < num_nh; i++) {
 105                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 106
 107                info->nh_grp->nh_entries[i].id = nhge->nh->id;
 108                info->nh_grp->nh_entries[i].weight = nhge->weight;
 109                __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
 110                                               nhge->nh);
 111        }
 112
 113        return 0;
 114}
 115
 116static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
 117                                     const struct nexthop *nh)
 118{
 119        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 120
 121        if (nhg->mpath)
 122                return nh_notifier_mp_info_init(info, nhg);
 123        return -EINVAL;
 124}
 125
 126static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
 127                                      const struct nexthop *nh)
 128{
 129        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 130
 131        if (nhg->mpath)
 132                kfree(info->nh_grp);
 133}
 134
 135static int nh_notifier_info_init(struct nh_notifier_info *info,
 136                                 const struct nexthop *nh)
 137{
 138        info->id = nh->id;
 139
 140        if (nh->is_group)
 141                return nh_notifier_grp_info_init(info, nh);
 142        else
 143                return nh_notifier_single_info_init(info, nh);
 144}
 145
 146static void nh_notifier_info_fini(struct nh_notifier_info *info,
 147                                  const struct nexthop *nh)
 148{
 149        if (nh->is_group)
 150                nh_notifier_grp_info_fini(info, nh);
 151        else
 152                nh_notifier_single_info_fini(info);
 153}
 154
 155static int call_nexthop_notifiers(struct net *net,
 156                                  enum nexthop_event_type event_type,
 157                                  struct nexthop *nh,
 158                                  struct netlink_ext_ack *extack)
 159{
 160        struct nh_notifier_info info = {
 161                .net = net,
 162                .extack = extack,
 163        };
 164        int err;
 165
 166        ASSERT_RTNL();
 167
 168        if (nexthop_notifiers_is_empty(net))
 169                return 0;
 170
 171        err = nh_notifier_info_init(&info, nh);
 172        if (err) {
 173                NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
 174                return err;
 175        }
 176
 177        err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
 178                                           event_type, &info);
 179        nh_notifier_info_fini(&info, nh);
 180
 181        return notifier_to_errno(err);
 182}
 183
 184static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
 185                                 enum nexthop_event_type event_type,
 186                                 struct nexthop *nh,
 187                                 struct netlink_ext_ack *extack)
 188{
 189        struct nh_notifier_info info = {
 190                .net = net,
 191                .extack = extack,
 192        };
 193        int err;
 194
 195        err = nh_notifier_info_init(&info, nh);
 196        if (err)
 197                return err;
 198
 199        err = nb->notifier_call(nb, event_type, &info);
 200        nh_notifier_info_fini(&info, nh);
 201
 202        return notifier_to_errno(err);
 203}
 204
 205static unsigned int nh_dev_hashfn(unsigned int val)
 206{
 207        unsigned int mask = NH_DEV_HASHSIZE - 1;
 208
 209        return (val ^
 210                (val >> NH_DEV_HASHBITS) ^
 211                (val >> (NH_DEV_HASHBITS * 2))) & mask;
 212}
 213
 214static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
 215{
 216        struct net_device *dev = nhi->fib_nhc.nhc_dev;
 217        struct hlist_head *head;
 218        unsigned int hash;
 219
 220        WARN_ON(!dev);
 221
 222        hash = nh_dev_hashfn(dev->ifindex);
 223        head = &net->nexthop.devhash[hash];
 224        hlist_add_head(&nhi->dev_hash, head);
 225}
 226
 227static void nexthop_free_group(struct nexthop *nh)
 228{
 229        struct nh_group *nhg;
 230        int i;
 231
 232        nhg = rcu_dereference_raw(nh->nh_grp);
 233        for (i = 0; i < nhg->num_nh; ++i) {
 234                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 235
 236                WARN_ON(!list_empty(&nhge->nh_list));
 237                nexthop_put(nhge->nh);
 238        }
 239
 240        WARN_ON(nhg->spare == nhg);
 241
 242        kfree(nhg->spare);
 243        kfree(nhg);
 244}
 245
 246static void nexthop_free_single(struct nexthop *nh)
 247{
 248        struct nh_info *nhi;
 249
 250        nhi = rcu_dereference_raw(nh->nh_info);
 251        switch (nhi->family) {
 252        case AF_INET:
 253                fib_nh_release(nh->net, &nhi->fib_nh);
 254                break;
 255        case AF_INET6:
 256                ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
 257                break;
 258        }
 259        kfree(nhi);
 260}
 261
 262void nexthop_free_rcu(struct rcu_head *head)
 263{
 264        struct nexthop *nh = container_of(head, struct nexthop, rcu);
 265
 266        if (nh->is_group)
 267                nexthop_free_group(nh);
 268        else
 269                nexthop_free_single(nh);
 270
 271        kfree(nh);
 272}
 273EXPORT_SYMBOL_GPL(nexthop_free_rcu);
 274
 275static struct nexthop *nexthop_alloc(void)
 276{
 277        struct nexthop *nh;
 278
 279        nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
 280        if (nh) {
 281                INIT_LIST_HEAD(&nh->fi_list);
 282                INIT_LIST_HEAD(&nh->f6i_list);
 283                INIT_LIST_HEAD(&nh->grp_list);
 284                INIT_LIST_HEAD(&nh->fdb_list);
 285        }
 286        return nh;
 287}
 288
 289static struct nh_group *nexthop_grp_alloc(u16 num_nh)
 290{
 291        struct nh_group *nhg;
 292
 293        nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
 294        if (nhg)
 295                nhg->num_nh = num_nh;
 296
 297        return nhg;
 298}
 299
 300static void nh_base_seq_inc(struct net *net)
 301{
 302        while (++net->nexthop.seq == 0)
 303                ;
 304}
 305
 306/* no reference taken; rcu lock or rtnl must be held */
 307struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
 308{
 309        struct rb_node **pp, *parent = NULL, *next;
 310
 311        pp = &net->nexthop.rb_root.rb_node;
 312        while (1) {
 313                struct nexthop *nh;
 314
 315                next = rcu_dereference_raw(*pp);
 316                if (!next)
 317                        break;
 318                parent = next;
 319
 320                nh = rb_entry(parent, struct nexthop, rb_node);
 321                if (id < nh->id)
 322                        pp = &next->rb_left;
 323                else if (id > nh->id)
 324                        pp = &next->rb_right;
 325                else
 326                        return nh;
 327        }
 328        return NULL;
 329}
 330EXPORT_SYMBOL_GPL(nexthop_find_by_id);
 331
 332/* used for auto id allocation; called with rtnl held */
 333static u32 nh_find_unused_id(struct net *net)
 334{
 335        u32 id_start = net->nexthop.last_id_allocated;
 336
 337        while (1) {
 338                net->nexthop.last_id_allocated++;
 339                if (net->nexthop.last_id_allocated == id_start)
 340                        break;
 341
 342                if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
 343                        return net->nexthop.last_id_allocated;
 344        }
 345        return 0;
 346}
 347
 348static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
 349{
 350        struct nexthop_grp *p;
 351        size_t len = nhg->num_nh * sizeof(*p);
 352        struct nlattr *nla;
 353        u16 group_type = 0;
 354        int i;
 355
 356        if (nhg->mpath)
 357                group_type = NEXTHOP_GRP_TYPE_MPATH;
 358
 359        if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
 360                goto nla_put_failure;
 361
 362        nla = nla_reserve(skb, NHA_GROUP, len);
 363        if (!nla)
 364                goto nla_put_failure;
 365
 366        p = nla_data(nla);
 367        for (i = 0; i < nhg->num_nh; ++i) {
 368                p->id = nhg->nh_entries[i].nh->id;
 369                p->weight = nhg->nh_entries[i].weight - 1;
 370                p += 1;
 371        }
 372
 373        return 0;
 374
 375nla_put_failure:
 376        return -EMSGSIZE;
 377}
 378
 379static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
 380                        int event, u32 portid, u32 seq, unsigned int nlflags)
 381{
 382        struct fib6_nh *fib6_nh;
 383        struct fib_nh *fib_nh;
 384        struct nlmsghdr *nlh;
 385        struct nh_info *nhi;
 386        struct nhmsg *nhm;
 387
 388        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
 389        if (!nlh)
 390                return -EMSGSIZE;
 391
 392        nhm = nlmsg_data(nlh);
 393        nhm->nh_family = AF_UNSPEC;
 394        nhm->nh_flags = nh->nh_flags;
 395        nhm->nh_protocol = nh->protocol;
 396        nhm->nh_scope = 0;
 397        nhm->resvd = 0;
 398
 399        if (nla_put_u32(skb, NHA_ID, nh->id))
 400                goto nla_put_failure;
 401
 402        if (nh->is_group) {
 403                struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 404
 405                if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
 406                        goto nla_put_failure;
 407                if (nla_put_nh_group(skb, nhg))
 408                        goto nla_put_failure;
 409                goto out;
 410        }
 411
 412        nhi = rtnl_dereference(nh->nh_info);
 413        nhm->nh_family = nhi->family;
 414        if (nhi->reject_nh) {
 415                if (nla_put_flag(skb, NHA_BLACKHOLE))
 416                        goto nla_put_failure;
 417                goto out;
 418        } else if (nhi->fdb_nh) {
 419                if (nla_put_flag(skb, NHA_FDB))
 420                        goto nla_put_failure;
 421        } else {
 422                const struct net_device *dev;
 423
 424                dev = nhi->fib_nhc.nhc_dev;
 425                if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
 426                        goto nla_put_failure;
 427        }
 428
 429        nhm->nh_scope = nhi->fib_nhc.nhc_scope;
 430        switch (nhi->family) {
 431        case AF_INET:
 432                fib_nh = &nhi->fib_nh;
 433                if (fib_nh->fib_nh_gw_family &&
 434                    nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
 435                        goto nla_put_failure;
 436                break;
 437
 438        case AF_INET6:
 439                fib6_nh = &nhi->fib6_nh;
 440                if (fib6_nh->fib_nh_gw_family &&
 441                    nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
 442                        goto nla_put_failure;
 443                break;
 444        }
 445
 446        if (nhi->fib_nhc.nhc_lwtstate &&
 447            lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
 448                                NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
 449                goto nla_put_failure;
 450
 451out:
 452        nlmsg_end(skb, nlh);
 453        return 0;
 454
 455nla_put_failure:
 456        nlmsg_cancel(skb, nlh);
 457        return -EMSGSIZE;
 458}
 459
 460static size_t nh_nlmsg_size_grp(struct nexthop *nh)
 461{
 462        struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 463        size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
 464
 465        return nla_total_size(sz) +
 466               nla_total_size(2);  /* NHA_GROUP_TYPE */
 467}
 468
 469static size_t nh_nlmsg_size_single(struct nexthop *nh)
 470{
 471        struct nh_info *nhi = rtnl_dereference(nh->nh_info);
 472        size_t sz;
 473
 474        /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
 475         * are mutually exclusive
 476         */
 477        sz = nla_total_size(4);  /* NHA_OIF */
 478
 479        switch (nhi->family) {
 480        case AF_INET:
 481                if (nhi->fib_nh.fib_nh_gw_family)
 482                        sz += nla_total_size(4);  /* NHA_GATEWAY */
 483                break;
 484
 485        case AF_INET6:
 486                /* NHA_GATEWAY */
 487                if (nhi->fib6_nh.fib_nh_gw_family)
 488                        sz += nla_total_size(sizeof(const struct in6_addr));
 489                break;
 490        }
 491
 492        if (nhi->fib_nhc.nhc_lwtstate) {
 493                sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
 494                sz += nla_total_size(2);  /* NHA_ENCAP_TYPE */
 495        }
 496
 497        return sz;
 498}
 499
 500static size_t nh_nlmsg_size(struct nexthop *nh)
 501{
 502        size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
 503
 504        sz += nla_total_size(4); /* NHA_ID */
 505
 506        if (nh->is_group)
 507                sz += nh_nlmsg_size_grp(nh);
 508        else
 509                sz += nh_nlmsg_size_single(nh);
 510
 511        return sz;
 512}
 513
 514static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
 515{
 516        unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
 517        u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
 518        struct sk_buff *skb;
 519        int err = -ENOBUFS;
 520
 521        skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
 522        if (!skb)
 523                goto errout;
 524
 525        err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
 526        if (err < 0) {
 527                /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
 528                WARN_ON(err == -EMSGSIZE);
 529                kfree_skb(skb);
 530                goto errout;
 531        }
 532
 533        rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
 534                    info->nlh, gfp_any());
 535        return;
 536errout:
 537        if (err < 0)
 538                rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
 539}
 540
 541static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 542                           bool *is_fdb, struct netlink_ext_ack *extack)
 543{
 544        if (nh->is_group) {
 545                struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 546
 547                /* nested multipath (group within a group) is not
 548                 * supported
 549                 */
 550                if (nhg->mpath) {
 551                        NL_SET_ERR_MSG(extack,
 552                                       "Multipath group can not be a nexthop within a group");
 553                        return false;
 554                }
 555                *is_fdb = nhg->fdb_nh;
 556        } else {
 557                struct nh_info *nhi = rtnl_dereference(nh->nh_info);
 558
 559                if (nhi->reject_nh && npaths > 1) {
 560                        NL_SET_ERR_MSG(extack,
 561                                       "Blackhole nexthop can not be used in a group with more than 1 path");
 562                        return false;
 563                }
 564                *is_fdb = nhi->fdb_nh;
 565        }
 566
 567        return true;
 568}
 569
 570static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
 571                                   struct netlink_ext_ack *extack)
 572{
 573        struct nh_info *nhi;
 574
 575        nhi = rtnl_dereference(nh->nh_info);
 576
 577        if (!nhi->fdb_nh) {
 578                NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
 579                return -EINVAL;
 580        }
 581
 582        if (*nh_family == AF_UNSPEC) {
 583                *nh_family = nhi->family;
 584        } else if (*nh_family != nhi->family) {
 585                NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
 586                return -EINVAL;
 587        }
 588
 589        return 0;
 590}
 591
 592static int nh_check_attr_group(struct net *net,
 593                               struct nlattr *tb[], size_t tb_size,
 594                               struct netlink_ext_ack *extack)
 595{
 596        unsigned int len = nla_len(tb[NHA_GROUP]);
 597        u8 nh_family = AF_UNSPEC;
 598        struct nexthop_grp *nhg;
 599        unsigned int i, j;
 600        u8 nhg_fdb = 0;
 601
 602        if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
 603                NL_SET_ERR_MSG(extack,
 604                               "Invalid length for nexthop group attribute");
 605                return -EINVAL;
 606        }
 607
 608        /* convert len to number of nexthop ids */
 609        len /= sizeof(*nhg);
 610
 611        nhg = nla_data(tb[NHA_GROUP]);
 612        for (i = 0; i < len; ++i) {
 613                if (nhg[i].resvd1 || nhg[i].resvd2) {
 614                        NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
 615                        return -EINVAL;
 616                }
 617                if (nhg[i].weight > 254) {
 618                        NL_SET_ERR_MSG(extack, "Invalid value for weight");
 619                        return -EINVAL;
 620                }
 621                for (j = i + 1; j < len; ++j) {
 622                        if (nhg[i].id == nhg[j].id) {
 623                                NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
 624                                return -EINVAL;
 625                        }
 626                }
 627        }
 628
 629        if (tb[NHA_FDB])
 630                nhg_fdb = 1;
 631        nhg = nla_data(tb[NHA_GROUP]);
 632        for (i = 0; i < len; ++i) {
 633                struct nexthop *nh;
 634                bool is_fdb_nh;
 635
 636                nh = nexthop_find_by_id(net, nhg[i].id);
 637                if (!nh) {
 638                        NL_SET_ERR_MSG(extack, "Invalid nexthop id");
 639                        return -EINVAL;
 640                }
 641                if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
 642                        return -EINVAL;
 643
 644                if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
 645                        return -EINVAL;
 646
 647                if (!nhg_fdb && is_fdb_nh) {
 648                        NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
 649                        return -EINVAL;
 650                }
 651        }
 652        for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
 653                if (!tb[i])
 654                        continue;
 655                if (i == NHA_FDB)
 656                        continue;
 657                NL_SET_ERR_MSG(extack,
 658                               "No other attributes can be set in nexthop groups");
 659                return -EINVAL;
 660        }
 661
 662        return 0;
 663}
 664
 665static bool ipv6_good_nh(const struct fib6_nh *nh)
 666{
 667        int state = NUD_REACHABLE;
 668        struct neighbour *n;
 669
 670        rcu_read_lock_bh();
 671
 672        n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
 673        if (n)
 674                state = n->nud_state;
 675
 676        rcu_read_unlock_bh();
 677
 678        return !!(state & NUD_VALID);
 679}
 680
 681static bool ipv4_good_nh(const struct fib_nh *nh)
 682{
 683        int state = NUD_REACHABLE;
 684        struct neighbour *n;
 685
 686        rcu_read_lock_bh();
 687
 688        n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
 689                                      (__force u32)nh->fib_nh_gw4);
 690        if (n)
 691                state = n->nud_state;
 692
 693        rcu_read_unlock_bh();
 694
 695        return !!(state & NUD_VALID);
 696}
 697
 698static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
 699{
 700        struct nexthop *rc = NULL;
 701        int i;
 702
 703        for (i = 0; i < nhg->num_nh; ++i) {
 704                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 705                struct nh_info *nhi;
 706
 707                if (hash > atomic_read(&nhge->mpath.upper_bound))
 708                        continue;
 709
 710                nhi = rcu_dereference(nhge->nh->nh_info);
 711                if (nhi->fdb_nh)
 712                        return nhge->nh;
 713
 714                /* nexthops always check if it is good and does
 715                 * not rely on a sysctl for this behavior
 716                 */
 717                switch (nhi->family) {
 718                case AF_INET:
 719                        if (ipv4_good_nh(&nhi->fib_nh))
 720                                return nhge->nh;
 721                        break;
 722                case AF_INET6:
 723                        if (ipv6_good_nh(&nhi->fib6_nh))
 724                                return nhge->nh;
 725                        break;
 726                }
 727
 728                if (!rc)
 729                        rc = nhge->nh;
 730        }
 731
 732        return rc;
 733}
 734
 735struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 736{
 737        struct nh_group *nhg;
 738
 739        if (!nh->is_group)
 740                return nh;
 741
 742        nhg = rcu_dereference(nh->nh_grp);
 743        if (nhg->mpath)
 744                return nexthop_select_path_mp(nhg, hash);
 745
 746        /* Unreachable. */
 747        return NULL;
 748}
 749EXPORT_SYMBOL_GPL(nexthop_select_path);
 750
 751int nexthop_for_each_fib6_nh(struct nexthop *nh,
 752                             int (*cb)(struct fib6_nh *nh, void *arg),
 753                             void *arg)
 754{
 755        struct nh_info *nhi;
 756        int err;
 757
 758        if (nh->is_group) {
 759                struct nh_group *nhg;
 760                int i;
 761
 762                nhg = rcu_dereference_rtnl(nh->nh_grp);
 763                for (i = 0; i < nhg->num_nh; i++) {
 764                        struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 765
 766                        nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
 767                        err = cb(&nhi->fib6_nh, arg);
 768                        if (err)
 769                                return err;
 770                }
 771        } else {
 772                nhi = rcu_dereference_rtnl(nh->nh_info);
 773                err = cb(&nhi->fib6_nh, arg);
 774                if (err)
 775                        return err;
 776        }
 777
 778        return 0;
 779}
 780EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
 781
 782static int check_src_addr(const struct in6_addr *saddr,
 783                          struct netlink_ext_ack *extack)
 784{
 785        if (!ipv6_addr_any(saddr)) {
 786                NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
 787                return -EINVAL;
 788        }
 789        return 0;
 790}
 791
 792int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
 793                       struct netlink_ext_ack *extack)
 794{
 795        struct nh_info *nhi;
 796        bool is_fdb_nh;
 797
 798        /* fib6_src is unique to a fib6_info and limits the ability to cache
 799         * routes in fib6_nh within a nexthop that is potentially shared
 800         * across multiple fib entries. If the config wants to use source
 801         * routing it can not use nexthop objects. mlxsw also does not allow
 802         * fib6_src on routes.
 803         */
 804        if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
 805                return -EINVAL;
 806
 807        if (nh->is_group) {
 808                struct nh_group *nhg;
 809
 810                nhg = rtnl_dereference(nh->nh_grp);
 811                if (nhg->has_v4)
 812                        goto no_v4_nh;
 813                is_fdb_nh = nhg->fdb_nh;
 814        } else {
 815                nhi = rtnl_dereference(nh->nh_info);
 816                if (nhi->family == AF_INET)
 817                        goto no_v4_nh;
 818                is_fdb_nh = nhi->fdb_nh;
 819        }
 820
 821        if (is_fdb_nh) {
 822                NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
 823                return -EINVAL;
 824        }
 825
 826        return 0;
 827no_v4_nh:
 828        NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
 829        return -EINVAL;
 830}
 831EXPORT_SYMBOL_GPL(fib6_check_nexthop);
 832
 833/* if existing nexthop has ipv6 routes linked to it, need
 834 * to verify this new spec works with ipv6
 835 */
 836static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
 837                              struct netlink_ext_ack *extack)
 838{
 839        struct fib6_info *f6i;
 840
 841        if (list_empty(&old->f6i_list))
 842                return 0;
 843
 844        list_for_each_entry(f6i, &old->f6i_list, nh_list) {
 845                if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
 846                        return -EINVAL;
 847        }
 848
 849        return fib6_check_nexthop(new, NULL, extack);
 850}
 851
 852static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
 853                               struct netlink_ext_ack *extack)
 854{
 855        if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
 856                NL_SET_ERR_MSG(extack,
 857                               "Route with host scope can not have a gateway");
 858                return -EINVAL;
 859        }
 860
 861        if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
 862                NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
 863                return -EINVAL;
 864        }
 865
 866        return 0;
 867}
 868
 869/* Invoked by fib add code to verify nexthop by id is ok with
 870 * config for prefix; parts of fib_check_nh not done when nexthop
 871 * object is used.
 872 */
 873int fib_check_nexthop(struct nexthop *nh, u8 scope,
 874                      struct netlink_ext_ack *extack)
 875{
 876        struct nh_info *nhi;
 877        int err = 0;
 878
 879        if (nh->is_group) {
 880                struct nh_group *nhg;
 881
 882                nhg = rtnl_dereference(nh->nh_grp);
 883                if (nhg->fdb_nh) {
 884                        NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
 885                        err = -EINVAL;
 886                        goto out;
 887                }
 888
 889                if (scope == RT_SCOPE_HOST) {
 890                        NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
 891                        err = -EINVAL;
 892                        goto out;
 893                }
 894
 895                /* all nexthops in a group have the same scope */
 896                nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
 897                err = nexthop_check_scope(nhi, scope, extack);
 898        } else {
 899                nhi = rtnl_dereference(nh->nh_info);
 900                if (nhi->fdb_nh) {
 901                        NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
 902                        err = -EINVAL;
 903                        goto out;
 904                }
 905                err = nexthop_check_scope(nhi, scope, extack);
 906        }
 907
 908out:
 909        return err;
 910}
 911
 912static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
 913                             struct netlink_ext_ack *extack)
 914{
 915        struct fib_info *fi;
 916
 917        list_for_each_entry(fi, &old->fi_list, nh_list) {
 918                int err;
 919
 920                err = fib_check_nexthop(new, fi->fib_scope, extack);
 921                if (err)
 922                        return err;
 923        }
 924        return 0;
 925}
 926
 927static void nh_group_rebalance(struct nh_group *nhg)
 928{
 929        int total = 0;
 930        int w = 0;
 931        int i;
 932
 933        for (i = 0; i < nhg->num_nh; ++i)
 934                total += nhg->nh_entries[i].weight;
 935
 936        for (i = 0; i < nhg->num_nh; ++i) {
 937                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
 938                int upper_bound;
 939
 940                w += nhge->weight;
 941                upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
 942                atomic_set(&nhge->mpath.upper_bound, upper_bound);
 943        }
 944}
 945
 946static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
 947                                struct nl_info *nlinfo)
 948{
 949        struct nh_grp_entry *nhges, *new_nhges;
 950        struct nexthop *nhp = nhge->nh_parent;
 951        struct netlink_ext_ack extack;
 952        struct nexthop *nh = nhge->nh;
 953        struct nh_group *nhg, *newg;
 954        int i, j, err;
 955
 956        WARN_ON(!nh);
 957
 958        nhg = rtnl_dereference(nhp->nh_grp);
 959        newg = nhg->spare;
 960
 961        /* last entry, keep it visible and remove the parent */
 962        if (nhg->num_nh == 1) {
 963                remove_nexthop(net, nhp, nlinfo);
 964                return;
 965        }
 966
 967        newg->has_v4 = false;
 968        newg->mpath = nhg->mpath;
 969        newg->fdb_nh = nhg->fdb_nh;
 970        newg->num_nh = nhg->num_nh;
 971
 972        /* copy old entries to new except the one getting removed */
 973        nhges = nhg->nh_entries;
 974        new_nhges = newg->nh_entries;
 975        for (i = 0, j = 0; i < nhg->num_nh; ++i) {
 976                struct nh_info *nhi;
 977
 978                /* current nexthop getting removed */
 979                if (nhg->nh_entries[i].nh == nh) {
 980                        newg->num_nh--;
 981                        continue;
 982                }
 983
 984                nhi = rtnl_dereference(nhges[i].nh->nh_info);
 985                if (nhi->family == AF_INET)
 986                        newg->has_v4 = true;
 987
 988                list_del(&nhges[i].nh_list);
 989                new_nhges[j].nh_parent = nhges[i].nh_parent;
 990                new_nhges[j].nh = nhges[i].nh;
 991                new_nhges[j].weight = nhges[i].weight;
 992                list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
 993                j++;
 994        }
 995
 996        nh_group_rebalance(newg);
 997        rcu_assign_pointer(nhp->nh_grp, newg);
 998
 999        list_del(&nhge->nh_list);
1000        nexthop_put(nhge->nh);
1001
1002        err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
1003        if (err)
1004                pr_err("%s\n", extack._msg);
1005
1006        if (nlinfo)
1007                nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
1008}
1009
1010static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
1011                                       struct nl_info *nlinfo)
1012{
1013        struct nh_grp_entry *nhge, *tmp;
1014
1015        list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
1016                remove_nh_grp_entry(net, nhge, nlinfo);
1017
1018        /* make sure all see the newly published array before releasing rtnl */
1019        synchronize_net();
1020}
1021
1022static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
1023{
1024        struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
1025        int i, num_nh = nhg->num_nh;
1026
1027        for (i = 0; i < num_nh; ++i) {
1028                struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1029
1030                if (WARN_ON(!nhge->nh))
1031                        continue;
1032
1033                list_del_init(&nhge->nh_list);
1034        }
1035}
1036
1037/* not called for nexthop replace */
1038static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
1039{
1040        struct fib6_info *f6i, *tmp;
1041        bool do_flush = false;
1042        struct fib_info *fi;
1043
1044        list_for_each_entry(fi, &nh->fi_list, nh_list) {
1045                fi->fib_flags |= RTNH_F_DEAD;
1046                do_flush = true;
1047        }
1048        if (do_flush)
1049                fib_flush(net);
1050
1051        /* ip6_del_rt removes the entry from this list hence the _safe */
1052        list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
1053                /* __ip6_del_rt does a release, so do a hold here */
1054                fib6_info_hold(f6i);
1055                ipv6_stub->ip6_del_rt(net, f6i,
1056                                      !net->ipv4.sysctl_nexthop_compat_mode);
1057        }
1058}
1059
1060static void __remove_nexthop(struct net *net, struct nexthop *nh,
1061                             struct nl_info *nlinfo)
1062{
1063        __remove_nexthop_fib(net, nh);
1064
1065        if (nh->is_group) {
1066                remove_nexthop_group(nh, nlinfo);
1067        } else {
1068                struct nh_info *nhi;
1069
1070                nhi = rtnl_dereference(nh->nh_info);
1071                if (nhi->fib_nhc.nhc_dev)
1072                        hlist_del(&nhi->dev_hash);
1073
1074                remove_nexthop_from_groups(net, nh, nlinfo);
1075        }
1076}
1077
1078static void remove_nexthop(struct net *net, struct nexthop *nh,
1079                           struct nl_info *nlinfo)
1080{
1081        call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
1082
1083        /* remove from the tree */
1084        rb_erase(&nh->rb_node, &net->nexthop.rb_root);
1085
1086        if (nlinfo)
1087                nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
1088
1089        __remove_nexthop(net, nh, nlinfo);
1090        nh_base_seq_inc(net);
1091
1092        nexthop_put(nh);
1093}
1094
1095/* if any FIB entries reference this nexthop, any dst entries
1096 * need to be regenerated
1097 */
1098static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
1099{
1100        struct fib6_info *f6i;
1101
1102        if (!list_empty(&nh->fi_list))
1103                rt_cache_flush(net);
1104
1105        list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1106                ipv6_stub->fib6_update_sernum(net, f6i);
1107}
1108
1109static int replace_nexthop_grp(struct net *net, struct nexthop *old,
1110                               struct nexthop *new,
1111                               struct netlink_ext_ack *extack)
1112{
1113        struct nh_group *oldg, *newg;
1114        int i, err;
1115
1116        if (!new->is_group) {
1117                NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
1118                return -EINVAL;
1119        }
1120
1121        err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1122        if (err)
1123                return err;
1124
1125        oldg = rtnl_dereference(old->nh_grp);
1126        newg = rtnl_dereference(new->nh_grp);
1127
1128        /* update parents - used by nexthop code for cleanup */
1129        for (i = 0; i < newg->num_nh; i++)
1130                newg->nh_entries[i].nh_parent = old;
1131
1132        rcu_assign_pointer(old->nh_grp, newg);
1133
1134        for (i = 0; i < oldg->num_nh; i++)
1135                oldg->nh_entries[i].nh_parent = new;
1136
1137        rcu_assign_pointer(new->nh_grp, oldg);
1138
1139        return 0;
1140}
1141
1142static void nh_group_v4_update(struct nh_group *nhg)
1143{
1144        struct nh_grp_entry *nhges;
1145        bool has_v4 = false;
1146        int i;
1147
1148        nhges = nhg->nh_entries;
1149        for (i = 0; i < nhg->num_nh; i++) {
1150                struct nh_info *nhi;
1151
1152                nhi = rtnl_dereference(nhges[i].nh->nh_info);
1153                if (nhi->family == AF_INET)
1154                        has_v4 = true;
1155        }
1156        nhg->has_v4 = has_v4;
1157}
1158
1159static int replace_nexthop_single(struct net *net, struct nexthop *old,
1160                                  struct nexthop *new,
1161                                  struct netlink_ext_ack *extack)
1162{
1163        u8 old_protocol, old_nh_flags;
1164        struct nh_info *oldi, *newi;
1165        struct nh_grp_entry *nhge;
1166        int err;
1167
1168        if (new->is_group) {
1169                NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
1170                return -EINVAL;
1171        }
1172
1173        err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1174        if (err)
1175                return err;
1176
1177        /* Hardware flags were set on 'old' as 'new' is not in the red-black
1178         * tree. Therefore, inherit the flags from 'old' to 'new'.
1179         */
1180        new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
1181
1182        oldi = rtnl_dereference(old->nh_info);
1183        newi = rtnl_dereference(new->nh_info);
1184
1185        newi->nh_parent = old;
1186        oldi->nh_parent = new;
1187
1188        old_protocol = old->protocol;
1189        old_nh_flags = old->nh_flags;
1190
1191        old->protocol = new->protocol;
1192        old->nh_flags = new->nh_flags;
1193
1194        rcu_assign_pointer(old->nh_info, newi);
1195        rcu_assign_pointer(new->nh_info, oldi);
1196
1197        /* Send a replace notification for all the groups using the nexthop. */
1198        list_for_each_entry(nhge, &old->grp_list, nh_list) {
1199                struct nexthop *nhp = nhge->nh_parent;
1200
1201                err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
1202                                             extack);
1203                if (err)
1204                        goto err_notify;
1205        }
1206
1207        /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
1208         * update IPv4 indication in all the groups using the nexthop.
1209         */
1210        if (oldi->family == AF_INET && newi->family == AF_INET6) {
1211                list_for_each_entry(nhge, &old->grp_list, nh_list) {
1212                        struct nexthop *nhp = nhge->nh_parent;
1213                        struct nh_group *nhg;
1214
1215                        nhg = rtnl_dereference(nhp->nh_grp);
1216                        nh_group_v4_update(nhg);
1217                }
1218        }
1219
1220        return 0;
1221
1222err_notify:
1223        rcu_assign_pointer(new->nh_info, newi);
1224        rcu_assign_pointer(old->nh_info, oldi);
1225        old->nh_flags = old_nh_flags;
1226        old->protocol = old_protocol;
1227        oldi->nh_parent = old;
1228        newi->nh_parent = new;
1229        list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
1230                struct nexthop *nhp = nhge->nh_parent;
1231
1232                call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
1233        }
1234        call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
1235        return err;
1236}
1237
1238static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
1239                                     struct nl_info *info)
1240{
1241        struct fib6_info *f6i;
1242
1243        if (!list_empty(&nh->fi_list)) {
1244                struct fib_info *fi;
1245
1246                /* expectation is a few fib_info per nexthop and then
1247                 * a lot of routes per fib_info. So mark the fib_info
1248                 * and then walk the fib tables once
1249                 */
1250                list_for_each_entry(fi, &nh->fi_list, nh_list)
1251                        fi->nh_updated = true;
1252
1253                fib_info_notify_update(net, info);
1254
1255                list_for_each_entry(fi, &nh->fi_list, nh_list)
1256                        fi->nh_updated = false;
1257        }
1258
1259        list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1260                ipv6_stub->fib6_rt_update(net, f6i, info);
1261}
1262
1263/* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
1264 * linked to this nexthop and for all groups that the nexthop
1265 * is a member of
1266 */
1267static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
1268                                   struct nl_info *info)
1269{
1270        struct nh_grp_entry *nhge;
1271
1272        __nexthop_replace_notify(net, nh, info);
1273
1274        list_for_each_entry(nhge, &nh->grp_list, nh_list)
1275                __nexthop_replace_notify(net, nhge->nh_parent, info);
1276}
1277
1278static int replace_nexthop(struct net *net, struct nexthop *old,
1279                           struct nexthop *new, struct netlink_ext_ack *extack)
1280{
1281        bool new_is_reject = false;
1282        struct nh_grp_entry *nhge;
1283        int err;
1284
1285        /* check that existing FIB entries are ok with the
1286         * new nexthop definition
1287         */
1288        err = fib_check_nh_list(old, new, extack);
1289        if (err)
1290                return err;
1291
1292        err = fib6_check_nh_list(old, new, extack);
1293        if (err)
1294                return err;
1295
1296        if (!new->is_group) {
1297                struct nh_info *nhi = rtnl_dereference(new->nh_info);
1298
1299                new_is_reject = nhi->reject_nh;
1300        }
1301
1302        list_for_each_entry(nhge, &old->grp_list, nh_list) {
1303                /* if new nexthop is a blackhole, any groups using this
1304                 * nexthop cannot have more than 1 path
1305                 */
1306                if (new_is_reject &&
1307                    nexthop_num_path(nhge->nh_parent) > 1) {
1308                        NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
1309                        return -EINVAL;
1310                }
1311
1312                err = fib_check_nh_list(nhge->nh_parent, new, extack);
1313                if (err)
1314                        return err;
1315
1316                err = fib6_check_nh_list(nhge->nh_parent, new, extack);
1317                if (err)
1318                        return err;
1319        }
1320
1321        if (old->is_group)
1322                err = replace_nexthop_grp(net, old, new, extack);
1323        else
1324                err = replace_nexthop_single(net, old, new, extack);
1325
1326        if (!err) {
1327                nh_rt_cache_flush(net, old);
1328
1329                __remove_nexthop(net, new, NULL);
1330                nexthop_put(new);
1331        }
1332
1333        return err;
1334}
1335
1336/* called with rtnl_lock held */
1337static int insert_nexthop(struct net *net, struct nexthop *new_nh,
1338                          struct nh_config *cfg, struct netlink_ext_ack *extack)
1339{
1340        struct rb_node **pp, *parent = NULL, *next;
1341        struct rb_root *root = &net->nexthop.rb_root;
1342        bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
1343        bool create = !!(cfg->nlflags & NLM_F_CREATE);
1344        u32 new_id = new_nh->id;
1345        int replace_notify = 0;
1346        int rc = -EEXIST;
1347
1348        pp = &root->rb_node;
1349        while (1) {
1350                struct nexthop *nh;
1351
1352                next = *pp;
1353                if (!next)
1354                        break;
1355
1356                parent = next;
1357
1358                nh = rb_entry(parent, struct nexthop, rb_node);
1359                if (new_id < nh->id) {
1360                        pp = &next->rb_left;
1361                } else if (new_id > nh->id) {
1362                        pp = &next->rb_right;
1363                } else if (replace) {
1364                        rc = replace_nexthop(net, nh, new_nh, extack);
1365                        if (!rc) {
1366                                new_nh = nh; /* send notification with old nh */
1367                                replace_notify = 1;
1368                        }
1369                        goto out;
1370                } else {
1371                        /* id already exists and not a replace */
1372                        goto out;
1373                }
1374        }
1375
1376        if (replace && !create) {
1377                NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
1378                rc = -ENOENT;
1379                goto out;
1380        }
1381
1382        rb_link_node_rcu(&new_nh->rb_node, parent, pp);
1383        rb_insert_color(&new_nh->rb_node, root);
1384
1385        rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
1386        if (rc)
1387                rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
1388
1389out:
1390        if (!rc) {
1391                nh_base_seq_inc(net);
1392                nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
1393                if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
1394                        nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
1395        }
1396
1397        return rc;
1398}
1399
1400/* rtnl */
1401/* remove all nexthops tied to a device being deleted */
1402static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
1403{
1404        unsigned int hash = nh_dev_hashfn(dev->ifindex);
1405        struct net *net = dev_net(dev);
1406        struct hlist_head *head = &net->nexthop.devhash[hash];
1407        struct hlist_node *n;
1408        struct nh_info *nhi;
1409
1410        hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
1411                if (nhi->fib_nhc.nhc_dev != dev)
1412                        continue;
1413
1414                if (nhi->reject_nh &&
1415                    (event == NETDEV_DOWN || event == NETDEV_CHANGE))
1416                        continue;
1417
1418                remove_nexthop(net, nhi->nh_parent, NULL);
1419        }
1420}
1421
1422/* rtnl; called when net namespace is deleted */
1423static void flush_all_nexthops(struct net *net)
1424{
1425        struct rb_root *root = &net->nexthop.rb_root;
1426        struct rb_node *node;
1427        struct nexthop *nh;
1428
1429        while ((node = rb_first(root))) {
1430                nh = rb_entry(node, struct nexthop, rb_node);
1431                remove_nexthop(net, nh, NULL);
1432                cond_resched();
1433        }
1434}
1435
1436static struct nexthop *nexthop_create_group(struct net *net,
1437                                            struct nh_config *cfg)
1438{
1439        struct nlattr *grps_attr = cfg->nh_grp;
1440        struct nexthop_grp *entry = nla_data(grps_attr);
1441        u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
1442        struct nh_group *nhg;
1443        struct nexthop *nh;
1444        int i;
1445
1446        if (WARN_ON(!num_nh))
1447                return ERR_PTR(-EINVAL);
1448
1449        nh = nexthop_alloc();
1450        if (!nh)
1451                return ERR_PTR(-ENOMEM);
1452
1453        nh->is_group = 1;
1454
1455        nhg = nexthop_grp_alloc(num_nh);
1456        if (!nhg) {
1457                kfree(nh);
1458                return ERR_PTR(-ENOMEM);
1459        }
1460
1461        /* spare group used for removals */
1462        nhg->spare = nexthop_grp_alloc(num_nh);
1463        if (!nhg->spare) {
1464                kfree(nhg);
1465                kfree(nh);
1466                return ERR_PTR(-ENOMEM);
1467        }
1468        nhg->spare->spare = nhg;
1469
1470        for (i = 0; i < nhg->num_nh; ++i) {
1471                struct nexthop *nhe;
1472                struct nh_info *nhi;
1473
1474                nhe = nexthop_find_by_id(net, entry[i].id);
1475                if (!nexthop_get(nhe))
1476                        goto out_no_nh;
1477
1478                nhi = rtnl_dereference(nhe->nh_info);
1479                if (nhi->family == AF_INET)
1480                        nhg->has_v4 = true;
1481
1482                nhg->nh_entries[i].nh = nhe;
1483                nhg->nh_entries[i].weight = entry[i].weight + 1;
1484                list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
1485                nhg->nh_entries[i].nh_parent = nh;
1486        }
1487
1488        if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH)
1489                nhg->mpath = 1;
1490
1491        WARN_ON_ONCE(nhg->mpath != 1);
1492
1493        if (nhg->mpath)
1494                nh_group_rebalance(nhg);
1495
1496        if (cfg->nh_fdb)
1497                nhg->fdb_nh = 1;
1498
1499        rcu_assign_pointer(nh->nh_grp, nhg);
1500
1501        return nh;
1502
1503out_no_nh:
1504        for (i--; i >= 0; --i) {
1505                list_del(&nhg->nh_entries[i].nh_list);
1506                nexthop_put(nhg->nh_entries[i].nh);
1507        }
1508
1509        kfree(nhg->spare);
1510        kfree(nhg);
1511        kfree(nh);
1512
1513        return ERR_PTR(-ENOENT);
1514}
1515
1516static int nh_create_ipv4(struct net *net, struct nexthop *nh,
1517                          struct nh_info *nhi, struct nh_config *cfg,
1518                          struct netlink_ext_ack *extack)
1519{
1520        struct fib_nh *fib_nh = &nhi->fib_nh;
1521        struct fib_config fib_cfg = {
1522                .fc_oif   = cfg->nh_ifindex,
1523                .fc_gw4   = cfg->gw.ipv4,
1524                .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
1525                .fc_flags = cfg->nh_flags,
1526                .fc_encap = cfg->nh_encap,
1527                .fc_encap_type = cfg->nh_encap_type,
1528        };
1529        u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
1530        int err;
1531
1532        err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
1533        if (err) {
1534                fib_nh_release(net, fib_nh);
1535                goto out;
1536        }
1537
1538        if (nhi->fdb_nh)
1539                goto out;
1540
1541        /* sets nh_dev if successful */
1542        err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
1543        if (!err) {
1544                nh->nh_flags = fib_nh->fib_nh_flags;
1545                fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
1546                                          fib_nh->fib_nh_scope);
1547        } else {
1548                fib_nh_release(net, fib_nh);
1549        }
1550out:
1551        return err;
1552}
1553
1554static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
1555                          struct nh_info *nhi, struct nh_config *cfg,
1556                          struct netlink_ext_ack *extack)
1557{
1558        struct fib6_nh *fib6_nh = &nhi->fib6_nh;
1559        struct fib6_config fib6_cfg = {
1560                .fc_table = l3mdev_fib_table(cfg->dev),
1561                .fc_ifindex = cfg->nh_ifindex,
1562                .fc_gateway = cfg->gw.ipv6,
1563                .fc_flags = cfg->nh_flags,
1564                .fc_encap = cfg->nh_encap,
1565                .fc_encap_type = cfg->nh_encap_type,
1566                .fc_is_fdb = cfg->nh_fdb,
1567        };
1568        int err;
1569
1570        if (!ipv6_addr_any(&cfg->gw.ipv6))
1571                fib6_cfg.fc_flags |= RTF_GATEWAY;
1572
1573        /* sets nh_dev if successful */
1574        err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
1575                                      extack);
1576        if (err)
1577                ipv6_stub->fib6_nh_release(fib6_nh);
1578        else
1579                nh->nh_flags = fib6_nh->fib_nh_flags;
1580
1581        return err;
1582}
1583
1584static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
1585                                      struct netlink_ext_ack *extack)
1586{
1587        struct nh_info *nhi;
1588        struct nexthop *nh;
1589        int err = 0;
1590
1591        nh = nexthop_alloc();
1592        if (!nh)
1593                return ERR_PTR(-ENOMEM);
1594
1595        nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
1596        if (!nhi) {
1597                kfree(nh);
1598                return ERR_PTR(-ENOMEM);
1599        }
1600
1601        nh->nh_flags = cfg->nh_flags;
1602        nh->net = net;
1603
1604        nhi->nh_parent = nh;
1605        nhi->family = cfg->nh_family;
1606        nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
1607
1608        if (cfg->nh_fdb)
1609                nhi->fdb_nh = 1;
1610
1611        if (cfg->nh_blackhole) {
1612                nhi->reject_nh = 1;
1613                cfg->nh_ifindex = net->loopback_dev->ifindex;
1614        }
1615
1616        switch (cfg->nh_family) {
1617        case AF_INET:
1618                err = nh_create_ipv4(net, nh, nhi, cfg, extack);
1619                break;
1620        case AF_INET6:
1621                err = nh_create_ipv6(net, nh, nhi, cfg, extack);
1622                break;
1623        }
1624
1625        if (err) {
1626                kfree(nhi);
1627                kfree(nh);
1628                return ERR_PTR(err);
1629        }
1630
1631        /* add the entry to the device based hash */
1632        if (!nhi->fdb_nh)
1633                nexthop_devhash_add(net, nhi);
1634
1635        rcu_assign_pointer(nh->nh_info, nhi);
1636
1637        return nh;
1638}
1639
1640/* called with rtnl lock held */
1641static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
1642                                   struct netlink_ext_ack *extack)
1643{
1644        struct nexthop *nh;
1645        int err;
1646
1647        if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
1648                NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
1649                return ERR_PTR(-EINVAL);
1650        }
1651
1652        if (!cfg->nh_id) {
1653                cfg->nh_id = nh_find_unused_id(net);
1654                if (!cfg->nh_id) {
1655                        NL_SET_ERR_MSG(extack, "No unused id");
1656                        return ERR_PTR(-EINVAL);
1657                }
1658        }
1659
1660        if (cfg->nh_grp)
1661                nh = nexthop_create_group(net, cfg);
1662        else
1663                nh = nexthop_create(net, cfg, extack);
1664
1665        if (IS_ERR(nh))
1666                return nh;
1667
1668        refcount_set(&nh->refcnt, 1);
1669        nh->id = cfg->nh_id;
1670        nh->protocol = cfg->nh_protocol;
1671        nh->net = net;
1672
1673        err = insert_nexthop(net, nh, cfg, extack);
1674        if (err) {
1675                __remove_nexthop(net, nh, NULL);
1676                nexthop_put(nh);
1677                nh = ERR_PTR(err);
1678        }
1679
1680        return nh;
1681}
1682
1683static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
1684                            struct nlmsghdr *nlh, struct nh_config *cfg,
1685                            struct netlink_ext_ack *extack)
1686{
1687        struct nhmsg *nhm = nlmsg_data(nlh);
1688        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
1689        int err;
1690
1691        err = nlmsg_parse(nlh, sizeof(*nhm), tb,
1692                          ARRAY_SIZE(rtm_nh_policy_new) - 1,
1693                          rtm_nh_policy_new, extack);
1694        if (err < 0)
1695                return err;
1696
1697        err = -EINVAL;
1698        if (nhm->resvd || nhm->nh_scope) {
1699                NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
1700                goto out;
1701        }
1702        if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
1703                NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
1704                goto out;
1705        }
1706
1707        switch (nhm->nh_family) {
1708        case AF_INET:
1709        case AF_INET6:
1710                break;
1711        case AF_UNSPEC:
1712                if (tb[NHA_GROUP])
1713                        break;
1714                fallthrough;
1715        default:
1716                NL_SET_ERR_MSG(extack, "Invalid address family");
1717                goto out;
1718        }
1719
1720        memset(cfg, 0, sizeof(*cfg));
1721        cfg->nlflags = nlh->nlmsg_flags;
1722        cfg->nlinfo.portid = NETLINK_CB(skb).portid;
1723        cfg->nlinfo.nlh = nlh;
1724        cfg->nlinfo.nl_net = net;
1725
1726        cfg->nh_family = nhm->nh_family;
1727        cfg->nh_protocol = nhm->nh_protocol;
1728        cfg->nh_flags = nhm->nh_flags;
1729
1730        if (tb[NHA_ID])
1731                cfg->nh_id = nla_get_u32(tb[NHA_ID]);
1732
1733        if (tb[NHA_FDB]) {
1734                if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
1735                    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
1736                        NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
1737                        goto out;
1738                }
1739                if (nhm->nh_flags) {
1740                        NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
1741                        goto out;
1742                }
1743                cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
1744        }
1745
1746        if (tb[NHA_GROUP]) {
1747                if (nhm->nh_family != AF_UNSPEC) {
1748                        NL_SET_ERR_MSG(extack, "Invalid family for group");
1749                        goto out;
1750                }
1751                cfg->nh_grp = tb[NHA_GROUP];
1752
1753                cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
1754                if (tb[NHA_GROUP_TYPE])
1755                        cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
1756
1757                if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
1758                        NL_SET_ERR_MSG(extack, "Invalid group type");
1759                        goto out;
1760                }
1761                err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
1762
1763                /* no other attributes should be set */
1764                goto out;
1765        }
1766
1767        if (tb[NHA_BLACKHOLE]) {
1768                if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
1769                    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
1770                        NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
1771                        goto out;
1772                }
1773
1774                cfg->nh_blackhole = 1;
1775                err = 0;
1776                goto out;
1777        }
1778
1779        if (!cfg->nh_fdb && !tb[NHA_OIF]) {
1780                NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
1781                goto out;
1782        }
1783
1784        if (!cfg->nh_fdb && tb[NHA_OIF]) {
1785                cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
1786                if (cfg->nh_ifindex)
1787                        cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
1788
1789                if (!cfg->dev) {
1790                        NL_SET_ERR_MSG(extack, "Invalid device index");
1791                        goto out;
1792                } else if (!(cfg->dev->flags & IFF_UP)) {
1793                        NL_SET_ERR_MSG(extack, "Nexthop device is not up");
1794                        err = -ENETDOWN;
1795                        goto out;
1796                } else if (!netif_carrier_ok(cfg->dev)) {
1797                        NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
1798                        err = -ENETDOWN;
1799                        goto out;
1800                }
1801        }
1802
1803        err = -EINVAL;
1804        if (tb[NHA_GATEWAY]) {
1805                struct nlattr *gwa = tb[NHA_GATEWAY];
1806
1807                switch (cfg->nh_family) {
1808                case AF_INET:
1809                        if (nla_len(gwa) != sizeof(u32)) {
1810                                NL_SET_ERR_MSG(extack, "Invalid gateway");
1811                                goto out;
1812                        }
1813                        cfg->gw.ipv4 = nla_get_be32(gwa);
1814                        break;
1815                case AF_INET6:
1816                        if (nla_len(gwa) != sizeof(struct in6_addr)) {
1817                                NL_SET_ERR_MSG(extack, "Invalid gateway");
1818                                goto out;
1819                        }
1820                        cfg->gw.ipv6 = nla_get_in6_addr(gwa);
1821                        break;
1822                default:
1823                        NL_SET_ERR_MSG(extack,
1824                                       "Unknown address family for gateway");
1825                        goto out;
1826                }
1827        } else {
1828                /* device only nexthop (no gateway) */
1829                if (cfg->nh_flags & RTNH_F_ONLINK) {
1830                        NL_SET_ERR_MSG(extack,
1831                                       "ONLINK flag can not be set for nexthop without a gateway");
1832                        goto out;
1833                }
1834        }
1835
1836        if (tb[NHA_ENCAP]) {
1837                cfg->nh_encap = tb[NHA_ENCAP];
1838
1839                if (!tb[NHA_ENCAP_TYPE]) {
1840                        NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
1841                        goto out;
1842                }
1843
1844                cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
1845                err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
1846                if (err < 0)
1847                        goto out;
1848
1849        } else if (tb[NHA_ENCAP_TYPE]) {
1850                NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
1851                goto out;
1852        }
1853
1854
1855        err = 0;
1856out:
1857        return err;
1858}
1859
1860/* rtnl */
1861static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1862                           struct netlink_ext_ack *extack)
1863{
1864        struct net *net = sock_net(skb->sk);
1865        struct nh_config cfg;
1866        struct nexthop *nh;
1867        int err;
1868
1869        err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
1870        if (!err) {
1871                nh = nexthop_add(net, &cfg, extack);
1872                if (IS_ERR(nh))
1873                        err = PTR_ERR(nh);
1874        }
1875
1876        return err;
1877}
1878
1879static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
1880                                  struct nlattr **tb, u32 *id,
1881                                  struct netlink_ext_ack *extack)
1882{
1883        struct nhmsg *nhm = nlmsg_data(nlh);
1884
1885        if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
1886                NL_SET_ERR_MSG(extack, "Invalid values in header");
1887                return -EINVAL;
1888        }
1889
1890        if (!tb[NHA_ID]) {
1891                NL_SET_ERR_MSG(extack, "Nexthop id is missing");
1892                return -EINVAL;
1893        }
1894
1895        *id = nla_get_u32(tb[NHA_ID]);
1896        if (!(*id)) {
1897                NL_SET_ERR_MSG(extack, "Invalid nexthop id");
1898                return -EINVAL;
1899        }
1900
1901        return 0;
1902}
1903
1904static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
1905                                struct netlink_ext_ack *extack)
1906{
1907        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
1908        int err;
1909
1910        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
1911                          ARRAY_SIZE(rtm_nh_policy_get) - 1,
1912                          rtm_nh_policy_get, extack);
1913        if (err < 0)
1914                return err;
1915
1916        return __nh_valid_get_del_req(nlh, tb, id, extack);
1917}
1918
1919/* rtnl */
1920static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1921                           struct netlink_ext_ack *extack)
1922{
1923        struct net *net = sock_net(skb->sk);
1924        struct nl_info nlinfo = {
1925                .nlh = nlh,
1926                .nl_net = net,
1927                .portid = NETLINK_CB(skb).portid,
1928        };
1929        struct nexthop *nh;
1930        int err;
1931        u32 id;
1932
1933        err = nh_valid_get_del_req(nlh, &id, extack);
1934        if (err)
1935                return err;
1936
1937        nh = nexthop_find_by_id(net, id);
1938        if (!nh)
1939                return -ENOENT;
1940
1941        remove_nexthop(net, nh, &nlinfo);
1942
1943        return 0;
1944}
1945
1946/* rtnl */
1947static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
1948                           struct netlink_ext_ack *extack)
1949{
1950        struct net *net = sock_net(in_skb->sk);
1951        struct sk_buff *skb = NULL;
1952        struct nexthop *nh;
1953        int err;
1954        u32 id;
1955
1956        err = nh_valid_get_del_req(nlh, &id, extack);
1957        if (err)
1958                return err;
1959
1960        err = -ENOBUFS;
1961        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1962        if (!skb)
1963                goto out;
1964
1965        err = -ENOENT;
1966        nh = nexthop_find_by_id(net, id);
1967        if (!nh)
1968                goto errout_free;
1969
1970        err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
1971                           nlh->nlmsg_seq, 0);
1972        if (err < 0) {
1973                WARN_ON(err == -EMSGSIZE);
1974                goto errout_free;
1975        }
1976
1977        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1978out:
1979        return err;
1980errout_free:
1981        kfree_skb(skb);
1982        goto out;
1983}
1984
1985struct nh_dump_filter {
1986        int dev_idx;
1987        int master_idx;
1988        bool group_filter;
1989        bool fdb_filter;
1990};
1991
1992static bool nh_dump_filtered(struct nexthop *nh,
1993                             struct nh_dump_filter *filter, u8 family)
1994{
1995        const struct net_device *dev;
1996        const struct nh_info *nhi;
1997
1998        if (filter->group_filter && !nh->is_group)
1999                return true;
2000
2001        if (!filter->dev_idx && !filter->master_idx && !family)
2002                return false;
2003
2004        if (nh->is_group)
2005                return true;
2006
2007        nhi = rtnl_dereference(nh->nh_info);
2008        if (family && nhi->family != family)
2009                return true;
2010
2011        dev = nhi->fib_nhc.nhc_dev;
2012        if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
2013                return true;
2014
2015        if (filter->master_idx) {
2016                struct net_device *master;
2017
2018                if (!dev)
2019                        return true;
2020
2021                master = netdev_master_upper_dev_get((struct net_device *)dev);
2022                if (!master || master->ifindex != filter->master_idx)
2023                        return true;
2024        }
2025
2026        return false;
2027}
2028
2029static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
2030                               struct nh_dump_filter *filter,
2031                               struct netlink_ext_ack *extack)
2032{
2033        struct nhmsg *nhm;
2034        u32 idx;
2035
2036        if (tb[NHA_OIF]) {
2037                idx = nla_get_u32(tb[NHA_OIF]);
2038                if (idx > INT_MAX) {
2039                        NL_SET_ERR_MSG(extack, "Invalid device index");
2040                        return -EINVAL;
2041                }
2042                filter->dev_idx = idx;
2043        }
2044        if (tb[NHA_MASTER]) {
2045                idx = nla_get_u32(tb[NHA_MASTER]);
2046                if (idx > INT_MAX) {
2047                        NL_SET_ERR_MSG(extack, "Invalid master device index");
2048                        return -EINVAL;
2049                }
2050                filter->master_idx = idx;
2051        }
2052        filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
2053        filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);
2054
2055        nhm = nlmsg_data(nlh);
2056        if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
2057                NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
2058                return -EINVAL;
2059        }
2060
2061        return 0;
2062}
2063
2064static int nh_valid_dump_req(const struct nlmsghdr *nlh,
2065                             struct nh_dump_filter *filter,
2066                             struct netlink_callback *cb)
2067{
2068        struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
2069        int err;
2070
2071        err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
2072                          ARRAY_SIZE(rtm_nh_policy_dump) - 1,
2073                          rtm_nh_policy_dump, cb->extack);
2074        if (err < 0)
2075                return err;
2076
2077        return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
2078}
2079
2080struct rtm_dump_nh_ctx {
2081        u32 idx;
2082};
2083
2084static struct rtm_dump_nh_ctx *
2085rtm_dump_nh_ctx(struct netlink_callback *cb)
2086{
2087        struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;
2088
2089        BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
2090        return ctx;
2091}
2092
2093static int rtm_dump_walk_nexthops(struct sk_buff *skb,
2094                                  struct netlink_callback *cb,
2095                                  struct rb_root *root,
2096                                  struct rtm_dump_nh_ctx *ctx,
2097                                  int (*nh_cb)(struct sk_buff *skb,
2098                                               struct netlink_callback *cb,
2099                                               struct nexthop *nh, void *data),
2100                                  void *data)
2101{
2102        struct rb_node *node;
2103        int idx = 0, s_idx;
2104        int err;
2105
2106        s_idx = ctx->idx;
2107        for (node = rb_first(root); node; node = rb_next(node)) {
2108                struct nexthop *nh;
2109
2110                if (idx < s_idx)
2111                        goto cont;
2112
2113                nh = rb_entry(node, struct nexthop, rb_node);
2114                ctx->idx = idx;
2115                err = nh_cb(skb, cb, nh, data);
2116                if (err)
2117                        return err;
2118cont:
2119                idx++;
2120        }
2121
2122        ctx->idx = idx;
2123        return 0;
2124}
2125
2126static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
2127                               struct nexthop *nh, void *data)
2128{
2129        struct nhmsg *nhm = nlmsg_data(cb->nlh);
2130        struct nh_dump_filter *filter = data;
2131
2132        if (nh_dump_filtered(nh, filter, nhm->nh_family))
2133                return 0;
2134
2135        return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
2136                            NETLINK_CB(cb->skb).portid,
2137                            cb->nlh->nlmsg_seq, NLM_F_MULTI);
2138}
2139
2140/* rtnl */
2141static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
2142{
2143        struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
2144        struct net *net = sock_net(skb->sk);
2145        struct rb_root *root = &net->nexthop.rb_root;
2146        struct nh_dump_filter filter = {};
2147        int err;
2148
2149        err = nh_valid_dump_req(cb->nlh, &filter, cb);
2150        if (err < 0)
2151                return err;
2152
2153        err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
2154                                     &rtm_dump_nexthop_cb, &filter);
2155        if (err < 0) {
2156                if (likely(skb->len))
2157                        goto out;
2158                goto out_err;
2159        }
2160
2161out:
2162        err = skb->len;
2163out_err:
2164        cb->seq = net->nexthop.seq;
2165        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2166        return err;
2167}
2168
2169static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
2170{
2171        unsigned int hash = nh_dev_hashfn(dev->ifindex);
2172        struct net *net = dev_net(dev);
2173        struct hlist_head *head = &net->nexthop.devhash[hash];
2174        struct hlist_node *n;
2175        struct nh_info *nhi;
2176
2177        hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
2178                if (nhi->fib_nhc.nhc_dev == dev) {
2179                        if (nhi->family == AF_INET)
2180                                fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
2181                                                   orig_mtu);
2182                }
2183        }
2184}
2185
2186/* rtnl */
2187static int nh_netdev_event(struct notifier_block *this,
2188                           unsigned long event, void *ptr)
2189{
2190        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2191        struct netdev_notifier_info_ext *info_ext;
2192
2193        switch (event) {
2194        case NETDEV_DOWN:
2195        case NETDEV_UNREGISTER:
2196                nexthop_flush_dev(dev, event);
2197                break;
2198        case NETDEV_CHANGE:
2199                if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
2200                        nexthop_flush_dev(dev, event);
2201                break;
2202        case NETDEV_CHANGEMTU:
2203                info_ext = ptr;
2204                nexthop_sync_mtu(dev, info_ext->ext.mtu);
2205                rt_cache_flush(dev_net(dev));
2206                break;
2207        }
2208        return NOTIFY_DONE;
2209}
2210
2211static struct notifier_block nh_netdev_notifier = {
2212        .notifier_call = nh_netdev_event,
2213};
2214
2215static int nexthops_dump(struct net *net, struct notifier_block *nb,
2216                         struct netlink_ext_ack *extack)
2217{
2218        struct rb_root *root = &net->nexthop.rb_root;
2219        struct rb_node *node;
2220        int err = 0;
2221
2222        for (node = rb_first(root); node; node = rb_next(node)) {
2223                struct nexthop *nh;
2224
2225                nh = rb_entry(node, struct nexthop, rb_node);
2226                err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
2227                                            extack);
2228                if (err)
2229                        break;
2230        }
2231
2232        return err;
2233}
2234
2235int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
2236                              struct netlink_ext_ack *extack)
2237{
2238        int err;
2239
2240        rtnl_lock();
2241        err = nexthops_dump(net, nb, extack);
2242        if (err)
2243                goto unlock;
2244        err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
2245                                               nb);
2246unlock:
2247        rtnl_unlock();
2248        return err;
2249}
2250EXPORT_SYMBOL(register_nexthop_notifier);
2251
2252int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
2253{
2254        return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
2255                                                  nb);
2256}
2257EXPORT_SYMBOL(unregister_nexthop_notifier);
2258
2259void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
2260{
2261        struct nexthop *nexthop;
2262
2263        rcu_read_lock();
2264
2265        nexthop = nexthop_find_by_id(net, id);
2266        if (!nexthop)
2267                goto out;
2268
2269        nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
2270        if (offload)
2271                nexthop->nh_flags |= RTNH_F_OFFLOAD;
2272        if (trap)
2273                nexthop->nh_flags |= RTNH_F_TRAP;
2274
2275out:
2276        rcu_read_unlock();
2277}
2278EXPORT_SYMBOL(nexthop_set_hw_flags);
2279
2280static void __net_exit nexthop_net_exit(struct net *net)
2281{
2282        rtnl_lock();
2283        flush_all_nexthops(net);
2284        rtnl_unlock();
2285        kfree(net->nexthop.devhash);
2286}
2287
2288static int __net_init nexthop_net_init(struct net *net)
2289{
2290        size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
2291
2292        net->nexthop.rb_root = RB_ROOT;
2293        net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
2294        if (!net->nexthop.devhash)
2295                return -ENOMEM;
2296        BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
2297
2298        return 0;
2299}
2300
2301static struct pernet_operations nexthop_net_ops = {
2302        .init = nexthop_net_init,
2303        .exit = nexthop_net_exit,
2304};
2305
2306static int __init nexthop_init(void)
2307{
2308        register_pernet_subsys(&nexthop_net_ops);
2309
2310        register_netdevice_notifier(&nh_netdev_notifier);
2311
2312        rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2313        rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
2314        rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
2315                      rtm_dump_nexthop, 0);
2316
2317        rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2318        rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2319
2320        rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2321        rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2322
2323        return 0;
2324}
2325subsys_initcall(nexthop_init);
2326