linux/net/ipv4/fib_frontend.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 Forwarding Information Base: FIB frontend.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 *
  10 *              This program is free software; you can redistribute it and/or
  11 *              modify it under the terms of the GNU General Public License
  12 *              as published by the Free Software Foundation; either version
  13 *              2 of the License, or (at your option) any later version.
  14 */
  15
  16#include <linux/module.h>
  17#include <asm/uaccess.h>
  18#include <linux/bitops.h>
  19#include <linux/capability.h>
  20#include <linux/types.h>
  21#include <linux/kernel.h>
  22#include <linux/mm.h>
  23#include <linux/string.h>
  24#include <linux/socket.h>
  25#include <linux/sockios.h>
  26#include <linux/errno.h>
  27#include <linux/in.h>
  28#include <linux/inet.h>
  29#include <linux/inetdevice.h>
  30#include <linux/netdevice.h>
  31#include <linux/if_addr.h>
  32#include <linux/if_arp.h>
  33#include <linux/skbuff.h>
  34#include <linux/cache.h>
  35#include <linux/init.h>
  36#include <linux/list.h>
  37#include <linux/slab.h>
  38
  39#include <net/ip.h>
  40#include <net/protocol.h>
  41#include <net/route.h>
  42#include <net/tcp.h>
  43#include <net/sock.h>
  44#include <net/arp.h>
  45#include <net/ip_fib.h>
  46#include <net/rtnetlink.h>
  47#include <net/xfrm.h>
  48#include <net/l3mdev.h>
  49#include <trace/events/fib.h>
  50
  51#ifndef CONFIG_IP_MULTIPLE_TABLES
  52
  53static int __net_init fib4_rules_init(struct net *net)
  54{
  55        struct fib_table *local_table, *main_table;
  56
  57        main_table  = fib_trie_table(RT_TABLE_MAIN, NULL);
  58        if (!main_table)
  59                return -ENOMEM;
  60
  61        local_table = fib_trie_table(RT_TABLE_LOCAL, main_table);
  62        if (!local_table)
  63                goto fail;
  64
  65        hlist_add_head_rcu(&local_table->tb_hlist,
  66                                &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
  67        hlist_add_head_rcu(&main_table->tb_hlist,
  68                                &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
  69        return 0;
  70
  71fail:
  72        fib_free_table(main_table);
  73        return -ENOMEM;
  74}
  75#else
  76
  77struct fib_table *fib_new_table(struct net *net, u32 id)
  78{
  79        struct fib_table *tb, *alias = NULL;
  80        unsigned int h;
  81
  82        if (id == 0)
  83                id = RT_TABLE_MAIN;
  84        tb = fib_get_table(net, id);
  85        if (tb)
  86                return tb;
  87
  88        if (id == RT_TABLE_LOCAL)
  89                alias = fib_new_table(net, RT_TABLE_MAIN);
  90
  91        tb = fib_trie_table(id, alias);
  92        if (!tb)
  93                return NULL;
  94
  95        switch (id) {
  96        case RT_TABLE_MAIN:
  97                rcu_assign_pointer(net->ipv4.fib_main, tb);
  98                break;
  99        case RT_TABLE_DEFAULT:
 100                rcu_assign_pointer(net->ipv4.fib_default, tb);
 101                break;
 102        default:
 103                break;
 104        }
 105
 106        h = id & (FIB_TABLE_HASHSZ - 1);
 107        hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
 108        return tb;
 109}
 110EXPORT_SYMBOL_GPL(fib_new_table);
 111
 112/* caller must hold either rtnl or rcu read lock */
 113struct fib_table *fib_get_table(struct net *net, u32 id)
 114{
 115        struct fib_table *tb;
 116        struct hlist_head *head;
 117        unsigned int h;
 118
 119        if (id == 0)
 120                id = RT_TABLE_MAIN;
 121        h = id & (FIB_TABLE_HASHSZ - 1);
 122
 123        head = &net->ipv4.fib_table_hash[h];
 124        hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 125                if (tb->tb_id == id)
 126                        return tb;
 127        }
 128        return NULL;
 129}
 130#endif /* CONFIG_IP_MULTIPLE_TABLES */
 131
 132static void fib_replace_table(struct net *net, struct fib_table *old,
 133                              struct fib_table *new)
 134{
 135#ifdef CONFIG_IP_MULTIPLE_TABLES
 136        switch (new->tb_id) {
 137        case RT_TABLE_MAIN:
 138                rcu_assign_pointer(net->ipv4.fib_main, new);
 139                break;
 140        case RT_TABLE_DEFAULT:
 141                rcu_assign_pointer(net->ipv4.fib_default, new);
 142                break;
 143        default:
 144                break;
 145        }
 146
 147#endif
 148        /* replace the old table in the hlist */
 149        hlist_replace_rcu(&old->tb_hlist, &new->tb_hlist);
 150}
 151
 152int fib_unmerge(struct net *net)
 153{
 154        struct fib_table *old, *new, *main_table;
 155
 156        /* attempt to fetch local table if it has been allocated */
 157        old = fib_get_table(net, RT_TABLE_LOCAL);
 158        if (!old)
 159                return 0;
 160
 161        new = fib_trie_unmerge(old);
 162        if (!new)
 163                return -ENOMEM;
 164
 165        /* table is already unmerged */
 166        if (new == old)
 167                return 0;
 168
 169        /* replace merged table with clean table */
 170        fib_replace_table(net, old, new);
 171        fib_free_table(old);
 172
 173        /* attempt to fetch main table if it has been allocated */
 174        main_table = fib_get_table(net, RT_TABLE_MAIN);
 175        if (!main_table)
 176                return 0;
 177
 178        /* flush local entries from main table */
 179        fib_table_flush_external(main_table);
 180
 181        return 0;
 182}
 183
 184static void fib_flush(struct net *net)
 185{
 186        int flushed = 0;
 187        unsigned int h;
 188
 189        for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 190                struct hlist_head *head = &net->ipv4.fib_table_hash[h];
 191                struct hlist_node *tmp;
 192                struct fib_table *tb;
 193
 194                hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
 195                        flushed += fib_table_flush(net, tb);
 196        }
 197
 198        if (flushed)
 199                rt_cache_flush(net);
 200}
 201
 202/*
 203 * Find address type as if only "dev" was present in the system. If
 204 * on_dev is NULL then all interfaces are taken into consideration.
 205 */
 206static inline unsigned int __inet_dev_addr_type(struct net *net,
 207                                                const struct net_device *dev,
 208                                                __be32 addr, u32 tb_id)
 209{
 210        struct flowi4           fl4 = { .daddr = addr };
 211        struct fib_result       res;
 212        unsigned int ret = RTN_BROADCAST;
 213        struct fib_table *table;
 214
 215        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 216                return RTN_BROADCAST;
 217        if (ipv4_is_multicast(addr))
 218                return RTN_MULTICAST;
 219
 220        rcu_read_lock();
 221
 222        table = fib_get_table(net, tb_id);
 223        if (table) {
 224                ret = RTN_UNICAST;
 225                if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
 226                        if (!dev || dev == res.fi->fib_dev)
 227                                ret = res.type;
 228                }
 229        }
 230
 231        rcu_read_unlock();
 232        return ret;
 233}
 234
 235unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
 236{
 237        return __inet_dev_addr_type(net, NULL, addr, tb_id);
 238}
 239EXPORT_SYMBOL(inet_addr_type_table);
 240
 241unsigned int inet_addr_type(struct net *net, __be32 addr)
 242{
 243        return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
 244}
 245EXPORT_SYMBOL(inet_addr_type);
 246
 247unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 248                                __be32 addr)
 249{
 250        u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
 251
 252        return __inet_dev_addr_type(net, dev, addr, rt_table);
 253}
 254EXPORT_SYMBOL(inet_dev_addr_type);
 255
 256/* inet_addr_type with dev == NULL but using the table from a dev
 257 * if one is associated
 258 */
 259unsigned int inet_addr_type_dev_table(struct net *net,
 260                                      const struct net_device *dev,
 261                                      __be32 addr)
 262{
 263        u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
 264
 265        return __inet_dev_addr_type(net, NULL, addr, rt_table);
 266}
 267EXPORT_SYMBOL(inet_addr_type_dev_table);
 268
 269__be32 fib_compute_spec_dst(struct sk_buff *skb)
 270{
 271        struct net_device *dev = skb->dev;
 272        struct in_device *in_dev;
 273        struct fib_result res;
 274        struct rtable *rt;
 275        struct net *net;
 276        int scope;
 277
 278        rt = skb_rtable(skb);
 279        if ((rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL)) ==
 280            RTCF_LOCAL)
 281                return ip_hdr(skb)->daddr;
 282
 283        in_dev = __in_dev_get_rcu(dev);
 284        BUG_ON(!in_dev);
 285
 286        net = dev_net(dev);
 287
 288        scope = RT_SCOPE_UNIVERSE;
 289        if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
 290                struct flowi4 fl4 = {
 291                        .flowi4_iif = LOOPBACK_IFINDEX,
 292                        .daddr = ip_hdr(skb)->saddr,
 293                        .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
 294                        .flowi4_scope = scope,
 295                        .flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0,
 296                };
 297                if (!fib_lookup(net, &fl4, &res, 0))
 298                        return FIB_RES_PREFSRC(net, res);
 299        } else {
 300                scope = RT_SCOPE_LINK;
 301        }
 302
 303        return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
 304}
 305
 306/* Given (packet source, input interface) and optional (dst, oif, tos):
 307 * - (main) check, that source is valid i.e. not broadcast or our local
 308 *   address.
 309 * - figure out what "logical" interface this packet arrived
 310 *   and calculate "specific destination" address.
 311 * - check, that packet arrived from expected physical interface.
 312 * called with rcu_read_lock()
 313 */
 314static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 315                                 u8 tos, int oif, struct net_device *dev,
 316                                 int rpf, struct in_device *idev, u32 *itag)
 317{
 318        int ret, no_addr;
 319        struct fib_result res;
 320        struct flowi4 fl4;
 321        struct net *net;
 322        bool dev_match;
 323
 324        fl4.flowi4_oif = 0;
 325        fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
 326        if (!fl4.flowi4_iif)
 327                fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
 328        fl4.daddr = src;
 329        fl4.saddr = dst;
 330        fl4.flowi4_tos = tos;
 331        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 332        fl4.flowi4_tun_key.tun_id = 0;
 333        fl4.flowi4_flags = 0;
 334
 335        no_addr = idev->ifa_list == NULL;
 336
 337        fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
 338
 339        trace_fib_validate_source(dev, &fl4);
 340
 341        net = dev_net(dev);
 342        if (fib_lookup(net, &fl4, &res, 0))
 343                goto last_resort;
 344        if (res.type != RTN_UNICAST &&
 345            (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
 346                goto e_inval;
 347        if (!rpf && !fib_num_tclassid_users(dev_net(dev)) &&
 348            (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
 349                goto last_resort;
 350        fib_combine_itag(itag, &res);
 351        dev_match = false;
 352
 353#ifdef CONFIG_IP_ROUTE_MULTIPATH
 354        for (ret = 0; ret < res.fi->fib_nhs; ret++) {
 355                struct fib_nh *nh = &res.fi->fib_nh[ret];
 356
 357                if (nh->nh_dev == dev) {
 358                        dev_match = true;
 359                        break;
 360                } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
 361                        dev_match = true;
 362                        break;
 363                }
 364        }
 365#else
 366        if (FIB_RES_DEV(res) == dev)
 367                dev_match = true;
 368#endif
 369        if (dev_match) {
 370                ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 371                return ret;
 372        }
 373        if (no_addr)
 374                goto last_resort;
 375        if (rpf == 1)
 376                goto e_rpf;
 377        fl4.flowi4_oif = dev->ifindex;
 378
 379        ret = 0;
 380        if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
 381                if (res.type == RTN_UNICAST)
 382                        ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 383        }
 384        return ret;
 385
 386last_resort:
 387        if (rpf)
 388                goto e_rpf;
 389        *itag = 0;
 390        return 0;
 391
 392e_inval:
 393        return -EINVAL;
 394e_rpf:
 395        return -EXDEV;
 396}
 397
 398/* Ignore rp_filter for packets protected by IPsec. */
 399int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 400                        u8 tos, int oif, struct net_device *dev,
 401                        struct in_device *idev, u32 *itag)
 402{
 403        int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
 404
 405        if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
 406            IN_DEV_ACCEPT_LOCAL(idev) &&
 407            (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
 408                *itag = 0;
 409                return 0;
 410        }
 411        return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
 412}
 413
 414static inline __be32 sk_extract_addr(struct sockaddr *addr)
 415{
 416        return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 417}
 418
 419static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
 420{
 421        struct nlattr *nla;
 422
 423        nla = (struct nlattr *) ((char *) mx + len);
 424        nla->nla_type = type;
 425        nla->nla_len = nla_attr_size(4);
 426        *(u32 *) nla_data(nla) = value;
 427
 428        return len + nla_total_size(4);
 429}
 430
 431static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 432                                 struct fib_config *cfg)
 433{
 434        __be32 addr;
 435        int plen;
 436
 437        memset(cfg, 0, sizeof(*cfg));
 438        cfg->fc_nlinfo.nl_net = net;
 439
 440        if (rt->rt_dst.sa_family != AF_INET)
 441                return -EAFNOSUPPORT;
 442
 443        /*
 444         * Check mask for validity:
 445         * a) it must be contiguous.
 446         * b) destination must have all host bits clear.
 447         * c) if application forgot to set correct family (AF_INET),
 448         *    reject request unless it is absolutely clear i.e.
 449         *    both family and mask are zero.
 450         */
 451        plen = 32;
 452        addr = sk_extract_addr(&rt->rt_dst);
 453        if (!(rt->rt_flags & RTF_HOST)) {
 454                __be32 mask = sk_extract_addr(&rt->rt_genmask);
 455
 456                if (rt->rt_genmask.sa_family != AF_INET) {
 457                        if (mask || rt->rt_genmask.sa_family)
 458                                return -EAFNOSUPPORT;
 459                }
 460
 461                if (bad_mask(mask, addr))
 462                        return -EINVAL;
 463
 464                plen = inet_mask_len(mask);
 465        }
 466
 467        cfg->fc_dst_len = plen;
 468        cfg->fc_dst = addr;
 469
 470        if (cmd != SIOCDELRT) {
 471                cfg->fc_nlflags = NLM_F_CREATE;
 472                cfg->fc_protocol = RTPROT_BOOT;
 473        }
 474
 475        if (rt->rt_metric)
 476                cfg->fc_priority = rt->rt_metric - 1;
 477
 478        if (rt->rt_flags & RTF_REJECT) {
 479                cfg->fc_scope = RT_SCOPE_HOST;
 480                cfg->fc_type = RTN_UNREACHABLE;
 481                return 0;
 482        }
 483
 484        cfg->fc_scope = RT_SCOPE_NOWHERE;
 485        cfg->fc_type = RTN_UNICAST;
 486
 487        if (rt->rt_dev) {
 488                char *colon;
 489                struct net_device *dev;
 490                char devname[IFNAMSIZ];
 491
 492                if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
 493                        return -EFAULT;
 494
 495                devname[IFNAMSIZ-1] = 0;
 496                colon = strchr(devname, ':');
 497                if (colon)
 498                        *colon = 0;
 499                dev = __dev_get_by_name(net, devname);
 500                if (!dev)
 501                        return -ENODEV;
 502                cfg->fc_oif = dev->ifindex;
 503                cfg->fc_table = l3mdev_fib_table(dev);
 504                if (colon) {
 505                        struct in_ifaddr *ifa;
 506                        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 507                        if (!in_dev)
 508                                return -ENODEV;
 509                        *colon = ':';
 510                        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
 511                                if (strcmp(ifa->ifa_label, devname) == 0)
 512                                        break;
 513                        if (!ifa)
 514                                return -ENODEV;
 515                        cfg->fc_prefsrc = ifa->ifa_local;
 516                }
 517        }
 518
 519        addr = sk_extract_addr(&rt->rt_gateway);
 520        if (rt->rt_gateway.sa_family == AF_INET && addr) {
 521                unsigned int addr_type;
 522
 523                cfg->fc_gw = addr;
 524                addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
 525                if (rt->rt_flags & RTF_GATEWAY &&
 526                    addr_type == RTN_UNICAST)
 527                        cfg->fc_scope = RT_SCOPE_UNIVERSE;
 528        }
 529
 530        if (cmd == SIOCDELRT)
 531                return 0;
 532
 533        if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
 534                return -EINVAL;
 535
 536        if (cfg->fc_scope == RT_SCOPE_NOWHERE)
 537                cfg->fc_scope = RT_SCOPE_LINK;
 538
 539        if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
 540                struct nlattr *mx;
 541                int len = 0;
 542
 543                mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
 544                if (!mx)
 545                        return -ENOMEM;
 546
 547                if (rt->rt_flags & RTF_MTU)
 548                        len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
 549
 550                if (rt->rt_flags & RTF_WINDOW)
 551                        len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
 552
 553                if (rt->rt_flags & RTF_IRTT)
 554                        len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
 555
 556                cfg->fc_mx = mx;
 557                cfg->fc_mx_len = len;
 558        }
 559
 560        return 0;
 561}
 562
 563/*
 564 * Handle IP routing ioctl calls.
 565 * These are used to manipulate the routing tables
 566 */
 567int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 568{
 569        struct fib_config cfg;
 570        struct rtentry rt;
 571        int err;
 572
 573        switch (cmd) {
 574        case SIOCADDRT:         /* Add a route */
 575        case SIOCDELRT:         /* Delete a route */
 576                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 577                        return -EPERM;
 578
 579                if (copy_from_user(&rt, arg, sizeof(rt)))
 580                        return -EFAULT;
 581
 582                rtnl_lock();
 583                err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
 584                if (err == 0) {
 585                        struct fib_table *tb;
 586
 587                        if (cmd == SIOCDELRT) {
 588                                tb = fib_get_table(net, cfg.fc_table);
 589                                if (tb)
 590                                        err = fib_table_delete(net, tb, &cfg);
 591                                else
 592                                        err = -ESRCH;
 593                        } else {
 594                                tb = fib_new_table(net, cfg.fc_table);
 595                                if (tb)
 596                                        err = fib_table_insert(net, tb, &cfg);
 597                                else
 598                                        err = -ENOBUFS;
 599                        }
 600
 601                        /* allocated by rtentry_to_fib_config() */
 602                        kfree(cfg.fc_mx);
 603                }
 604                rtnl_unlock();
 605                return err;
 606        }
 607        return -EINVAL;
 608}
 609
 610const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 611        [RTA_DST]               = { .type = NLA_U32 },
 612        [RTA_SRC]               = { .type = NLA_U32 },
 613        [RTA_IIF]               = { .type = NLA_U32 },
 614        [RTA_OIF]               = { .type = NLA_U32 },
 615        [RTA_GATEWAY]           = { .type = NLA_U32 },
 616        [RTA_PRIORITY]          = { .type = NLA_U32 },
 617        [RTA_PREFSRC]           = { .type = NLA_U32 },
 618        [RTA_METRICS]           = { .type = NLA_NESTED },
 619        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
 620        [RTA_FLOW]              = { .type = NLA_U32 },
 621        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
 622        [RTA_ENCAP]             = { .type = NLA_NESTED },
 623};
 624
 625static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 626                             struct nlmsghdr *nlh, struct fib_config *cfg)
 627{
 628        struct nlattr *attr;
 629        int err, remaining;
 630        struct rtmsg *rtm;
 631
 632        err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
 633        if (err < 0)
 634                goto errout;
 635
 636        memset(cfg, 0, sizeof(*cfg));
 637
 638        rtm = nlmsg_data(nlh);
 639        cfg->fc_dst_len = rtm->rtm_dst_len;
 640        cfg->fc_tos = rtm->rtm_tos;
 641        cfg->fc_table = rtm->rtm_table;
 642        cfg->fc_protocol = rtm->rtm_protocol;
 643        cfg->fc_scope = rtm->rtm_scope;
 644        cfg->fc_type = rtm->rtm_type;
 645        cfg->fc_flags = rtm->rtm_flags;
 646        cfg->fc_nlflags = nlh->nlmsg_flags;
 647
 648        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
 649        cfg->fc_nlinfo.nlh = nlh;
 650        cfg->fc_nlinfo.nl_net = net;
 651
 652        if (cfg->fc_type > RTN_MAX) {
 653                err = -EINVAL;
 654                goto errout;
 655        }
 656
 657        nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
 658                switch (nla_type(attr)) {
 659                case RTA_DST:
 660                        cfg->fc_dst = nla_get_be32(attr);
 661                        break;
 662                case RTA_OIF:
 663                        cfg->fc_oif = nla_get_u32(attr);
 664                        break;
 665                case RTA_GATEWAY:
 666                        cfg->fc_gw = nla_get_be32(attr);
 667                        break;
 668                case RTA_PRIORITY:
 669                        cfg->fc_priority = nla_get_u32(attr);
 670                        break;
 671                case RTA_PREFSRC:
 672                        cfg->fc_prefsrc = nla_get_be32(attr);
 673                        break;
 674                case RTA_METRICS:
 675                        cfg->fc_mx = nla_data(attr);
 676                        cfg->fc_mx_len = nla_len(attr);
 677                        break;
 678                case RTA_MULTIPATH:
 679                        cfg->fc_mp = nla_data(attr);
 680                        cfg->fc_mp_len = nla_len(attr);
 681                        break;
 682                case RTA_FLOW:
 683                        cfg->fc_flow = nla_get_u32(attr);
 684                        break;
 685                case RTA_TABLE:
 686                        cfg->fc_table = nla_get_u32(attr);
 687                        break;
 688                case RTA_ENCAP:
 689                        cfg->fc_encap = attr;
 690                        break;
 691                case RTA_ENCAP_TYPE:
 692                        cfg->fc_encap_type = nla_get_u16(attr);
 693                        break;
 694                }
 695        }
 696
 697        return 0;
 698errout:
 699        return err;
 700}
 701
 702static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 703{
 704        struct net *net = sock_net(skb->sk);
 705        struct fib_config cfg;
 706        struct fib_table *tb;
 707        int err;
 708
 709        err = rtm_to_fib_config(net, skb, nlh, &cfg);
 710        if (err < 0)
 711                goto errout;
 712
 713        tb = fib_get_table(net, cfg.fc_table);
 714        if (!tb) {
 715                err = -ESRCH;
 716                goto errout;
 717        }
 718
 719        err = fib_table_delete(net, tb, &cfg);
 720errout:
 721        return err;
 722}
 723
 724static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 725{
 726        struct net *net = sock_net(skb->sk);
 727        struct fib_config cfg;
 728        struct fib_table *tb;
 729        int err;
 730
 731        err = rtm_to_fib_config(net, skb, nlh, &cfg);
 732        if (err < 0)
 733                goto errout;
 734
 735        tb = fib_new_table(net, cfg.fc_table);
 736        if (!tb) {
 737                err = -ENOBUFS;
 738                goto errout;
 739        }
 740
 741        err = fib_table_insert(net, tb, &cfg);
 742errout:
 743        return err;
 744}
 745
 746static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 747{
 748        struct net *net = sock_net(skb->sk);
 749        unsigned int h, s_h;
 750        unsigned int e = 0, s_e;
 751        struct fib_table *tb;
 752        struct hlist_head *head;
 753        int dumped = 0;
 754
 755        if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 756            ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 757                return skb->len;
 758
 759        s_h = cb->args[0];
 760        s_e = cb->args[1];
 761
 762        rcu_read_lock();
 763
 764        for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 765                e = 0;
 766                head = &net->ipv4.fib_table_hash[h];
 767                hlist_for_each_entry_rcu(tb, head, tb_hlist) {
 768                        if (e < s_e)
 769                                goto next;
 770                        if (dumped)
 771                                memset(&cb->args[2], 0, sizeof(cb->args) -
 772                                                 2 * sizeof(cb->args[0]));
 773                        if (fib_table_dump(tb, skb, cb) < 0)
 774                                goto out;
 775                        dumped = 1;
 776next:
 777                        e++;
 778                }
 779        }
 780out:
 781        rcu_read_unlock();
 782
 783        cb->args[1] = e;
 784        cb->args[0] = h;
 785
 786        return skb->len;
 787}
 788
 789/* Prepare and feed intra-kernel routing request.
 790 * Really, it should be netlink message, but :-( netlink
 791 * can be not configured, so that we feed it directly
 792 * to fib engine. It is legal, because all events occur
 793 * only when netlink is already locked.
 794 */
 795static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 796{
 797        struct net *net = dev_net(ifa->ifa_dev->dev);
 798        u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
 799        struct fib_table *tb;
 800        struct fib_config cfg = {
 801                .fc_protocol = RTPROT_KERNEL,
 802                .fc_type = type,
 803                .fc_dst = dst,
 804                .fc_dst_len = dst_len,
 805                .fc_prefsrc = ifa->ifa_local,
 806                .fc_oif = ifa->ifa_dev->dev->ifindex,
 807                .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
 808                .fc_nlinfo = {
 809                        .nl_net = net,
 810                },
 811        };
 812
 813        if (!tb_id)
 814                tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
 815
 816        tb = fib_new_table(net, tb_id);
 817        if (!tb)
 818                return;
 819
 820        cfg.fc_table = tb->tb_id;
 821
 822        if (type != RTN_LOCAL)
 823                cfg.fc_scope = RT_SCOPE_LINK;
 824        else
 825                cfg.fc_scope = RT_SCOPE_HOST;
 826
 827        if (cmd == RTM_NEWROUTE)
 828                fib_table_insert(net, tb, &cfg);
 829        else
 830                fib_table_delete(net, tb, &cfg);
 831}
 832
 833void fib_add_ifaddr(struct in_ifaddr *ifa)
 834{
 835        struct in_device *in_dev = ifa->ifa_dev;
 836        struct net_device *dev = in_dev->dev;
 837        struct in_ifaddr *prim = ifa;
 838        __be32 mask = ifa->ifa_mask;
 839        __be32 addr = ifa->ifa_local;
 840        __be32 prefix = ifa->ifa_address & mask;
 841
 842        if (ifa->ifa_flags & IFA_F_SECONDARY) {
 843                prim = inet_ifa_byprefix(in_dev, prefix, mask);
 844                if (!prim) {
 845                        pr_warn("%s: bug: prim == NULL\n", __func__);
 846                        return;
 847                }
 848        }
 849
 850        fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
 851
 852        if (!(dev->flags & IFF_UP))
 853                return;
 854
 855        /* Add broadcast address, if it is explicitly assigned. */
 856        if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
 857                fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 858
 859        if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
 860            (prefix != addr || ifa->ifa_prefixlen < 32)) {
 861                if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
 862                        fib_magic(RTM_NEWROUTE,
 863                                  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
 864                                  prefix, ifa->ifa_prefixlen, prim);
 865
 866                /* Add network specific broadcasts, when it takes a sense */
 867                if (ifa->ifa_prefixlen < 31) {
 868                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
 869                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
 870                                  32, prim);
 871                }
 872        }
 873}
 874
 875/* Delete primary or secondary address.
 876 * Optionally, on secondary address promotion consider the addresses
 877 * from subnet iprim as deleted, even if they are in device list.
 878 * In this case the secondary ifa can be in device list.
 879 */
 880void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 881{
 882        struct in_device *in_dev = ifa->ifa_dev;
 883        struct net_device *dev = in_dev->dev;
 884        struct in_ifaddr *ifa1;
 885        struct in_ifaddr *prim = ifa, *prim1 = NULL;
 886        __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
 887        __be32 any = ifa->ifa_address & ifa->ifa_mask;
 888#define LOCAL_OK        1
 889#define BRD_OK          2
 890#define BRD0_OK         4
 891#define BRD1_OK         8
 892        unsigned int ok = 0;
 893        int subnet = 0;         /* Primary network */
 894        int gone = 1;           /* Address is missing */
 895        int same_prefsrc = 0;   /* Another primary with same IP */
 896
 897        if (ifa->ifa_flags & IFA_F_SECONDARY) {
 898                prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 899                if (!prim) {
 900                        /* if the device has been deleted, we don't perform
 901                         * address promotion
 902                         */
 903                        if (!in_dev->dead)
 904                                pr_warn("%s: bug: prim == NULL\n", __func__);
 905                        return;
 906                }
 907                if (iprim && iprim != prim) {
 908                        pr_warn("%s: bug: iprim != prim\n", __func__);
 909                        return;
 910                }
 911        } else if (!ipv4_is_zeronet(any) &&
 912                   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
 913                if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
 914                        fib_magic(RTM_DELROUTE,
 915                                  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
 916                                  any, ifa->ifa_prefixlen, prim);
 917                subnet = 1;
 918        }
 919
 920        if (in_dev->dead)
 921                goto no_promotions;
 922
 923        /* Deletion is more complicated than add.
 924         * We should take care of not to delete too much :-)
 925         *
 926         * Scan address list to be sure that addresses are really gone.
 927         */
 928
 929        for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
 930                if (ifa1 == ifa) {
 931                        /* promotion, keep the IP */
 932                        gone = 0;
 933                        continue;
 934                }
 935                /* Ignore IFAs from our subnet */
 936                if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
 937                    inet_ifa_match(ifa1->ifa_address, iprim))
 938                        continue;
 939
 940                /* Ignore ifa1 if it uses different primary IP (prefsrc) */
 941                if (ifa1->ifa_flags & IFA_F_SECONDARY) {
 942                        /* Another address from our subnet? */
 943                        if (ifa1->ifa_mask == prim->ifa_mask &&
 944                            inet_ifa_match(ifa1->ifa_address, prim))
 945                                prim1 = prim;
 946                        else {
 947                                /* We reached the secondaries, so
 948                                 * same_prefsrc should be determined.
 949                                 */
 950                                if (!same_prefsrc)
 951                                        continue;
 952                                /* Search new prim1 if ifa1 is not
 953                                 * using the current prim1
 954                                 */
 955                                if (!prim1 ||
 956                                    ifa1->ifa_mask != prim1->ifa_mask ||
 957                                    !inet_ifa_match(ifa1->ifa_address, prim1))
 958                                        prim1 = inet_ifa_byprefix(in_dev,
 959                                                        ifa1->ifa_address,
 960                                                        ifa1->ifa_mask);
 961                                if (!prim1)
 962                                        continue;
 963                                if (prim1->ifa_local != prim->ifa_local)
 964                                        continue;
 965                        }
 966                } else {
 967                        if (prim->ifa_local != ifa1->ifa_local)
 968                                continue;
 969                        prim1 = ifa1;
 970                        if (prim != prim1)
 971                                same_prefsrc = 1;
 972                }
 973                if (ifa->ifa_local == ifa1->ifa_local)
 974                        ok |= LOCAL_OK;
 975                if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
 976                        ok |= BRD_OK;
 977                if (brd == ifa1->ifa_broadcast)
 978                        ok |= BRD1_OK;
 979                if (any == ifa1->ifa_broadcast)
 980                        ok |= BRD0_OK;
 981                /* primary has network specific broadcasts */
 982                if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
 983                        __be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
 984                        __be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
 985
 986                        if (!ipv4_is_zeronet(any1)) {
 987                                if (ifa->ifa_broadcast == brd1 ||
 988                                    ifa->ifa_broadcast == any1)
 989                                        ok |= BRD_OK;
 990                                if (brd == brd1 || brd == any1)
 991                                        ok |= BRD1_OK;
 992                                if (any == brd1 || any == any1)
 993                                        ok |= BRD0_OK;
 994                        }
 995                }
 996        }
 997
 998no_promotions:
 999        if (!(ok & BRD_OK))
1000                fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
1001        if (subnet && ifa->ifa_prefixlen < 31) {
1002                if (!(ok & BRD1_OK))
1003                        fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
1004                if (!(ok & BRD0_OK))
1005                        fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
1006        }
1007        if (!(ok & LOCAL_OK)) {
1008                unsigned int addr_type;
1009
1010                fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
1011
1012                /* Check, that this local address finally disappeared. */
1013                addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
1014                                                     ifa->ifa_local);
1015                if (gone && addr_type != RTN_LOCAL) {
1016                        /* And the last, but not the least thing.
1017                         * We must flush stray FIB entries.
1018                         *
1019                         * First of all, we scan fib_info list searching
1020                         * for stray nexthop entries, then ignite fib_flush.
1021                         */
1022                        if (fib_sync_down_addr(dev, ifa->ifa_local))
1023                                fib_flush(dev_net(dev));
1024                }
1025        }
1026#undef LOCAL_OK
1027#undef BRD_OK
1028#undef BRD0_OK
1029#undef BRD1_OK
1030}
1031
1032static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
1033{
1034
1035        struct fib_result       res;
1036        struct flowi4           fl4 = {
1037                .flowi4_mark = frn->fl_mark,
1038                .daddr = frn->fl_addr,
1039                .flowi4_tos = frn->fl_tos,
1040                .flowi4_scope = frn->fl_scope,
1041        };
1042        struct fib_table *tb;
1043
1044        rcu_read_lock();
1045
1046        tb = fib_get_table(net, frn->tb_id_in);
1047
1048        frn->err = -ENOENT;
1049        if (tb) {
1050                local_bh_disable();
1051
1052                frn->tb_id = tb->tb_id;
1053                frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
1054
1055                if (!frn->err) {
1056                        frn->prefixlen = res.prefixlen;
1057                        frn->nh_sel = res.nh_sel;
1058                        frn->type = res.type;
1059                        frn->scope = res.scope;
1060                }
1061                local_bh_enable();
1062        }
1063
1064        rcu_read_unlock();
1065}
1066
1067static void nl_fib_input(struct sk_buff *skb)
1068{
1069        struct net *net;
1070        struct fib_result_nl *frn;
1071        struct nlmsghdr *nlh;
1072        u32 portid;
1073
1074        net = sock_net(skb->sk);
1075        nlh = nlmsg_hdr(skb);
1076        if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
1077            nlmsg_len(nlh) < sizeof(*frn))
1078                return;
1079
1080        skb = netlink_skb_clone(skb, GFP_KERNEL);
1081        if (!skb)
1082                return;
1083        nlh = nlmsg_hdr(skb);
1084
1085        frn = (struct fib_result_nl *) nlmsg_data(nlh);
1086        nl_fib_lookup(net, frn);
1087
1088        portid = NETLINK_CB(skb).portid;      /* netlink portid */
1089        NETLINK_CB(skb).portid = 0;        /* from kernel */
1090        NETLINK_CB(skb).dst_group = 0;  /* unicast */
1091        netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
1092}
1093
1094static int __net_init nl_fib_lookup_init(struct net *net)
1095{
1096        struct sock *sk;
1097        struct netlink_kernel_cfg cfg = {
1098                .input  = nl_fib_input,
1099        };
1100
1101        sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg);
1102        if (!sk)
1103                return -EAFNOSUPPORT;
1104        net->ipv4.fibnl = sk;
1105        return 0;
1106}
1107
1108static void nl_fib_lookup_exit(struct net *net)
1109{
1110        netlink_kernel_release(net->ipv4.fibnl);
1111        net->ipv4.fibnl = NULL;
1112}
1113
1114static void fib_disable_ip(struct net_device *dev, unsigned long event,
1115                           bool force)
1116{
1117        if (fib_sync_down_dev(dev, event, force))
1118                fib_flush(dev_net(dev));
1119        rt_cache_flush(dev_net(dev));
1120        arp_ifdown(dev);
1121}
1122
1123static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
1124{
1125        struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1126        struct net_device *dev = ifa->ifa_dev->dev;
1127        struct net *net = dev_net(dev);
1128
1129        switch (event) {
1130        case NETDEV_UP:
1131                fib_add_ifaddr(ifa);
1132#ifdef CONFIG_IP_ROUTE_MULTIPATH
1133                fib_sync_up(dev, RTNH_F_DEAD);
1134#endif
1135                atomic_inc(&net->ipv4.dev_addr_genid);
1136                rt_cache_flush(dev_net(dev));
1137                break;
1138        case NETDEV_DOWN:
1139                fib_del_ifaddr(ifa, NULL);
1140                atomic_inc(&net->ipv4.dev_addr_genid);
1141                if (!ifa->ifa_dev->ifa_list) {
1142                        /* Last address was deleted from this interface.
1143                         * Disable IP.
1144                         */
1145                        fib_disable_ip(dev, event, true);
1146                } else {
1147                        rt_cache_flush(dev_net(dev));
1148                }
1149                break;
1150        }
1151        return NOTIFY_DONE;
1152}
1153
1154static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1155{
1156        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1157        struct netdev_notifier_changeupper_info *info;
1158        struct in_device *in_dev;
1159        struct net *net = dev_net(dev);
1160        unsigned int flags;
1161
1162        if (event == NETDEV_UNREGISTER) {
1163                fib_disable_ip(dev, event, true);
1164                rt_flush_dev(dev);
1165                return NOTIFY_DONE;
1166        }
1167
1168        in_dev = __in_dev_get_rtnl(dev);
1169        if (!in_dev)
1170                return NOTIFY_DONE;
1171
1172        switch (event) {
1173        case NETDEV_UP:
1174                for_ifa(in_dev) {
1175                        fib_add_ifaddr(ifa);
1176                } endfor_ifa(in_dev);
1177#ifdef CONFIG_IP_ROUTE_MULTIPATH
1178                fib_sync_up(dev, RTNH_F_DEAD);
1179#endif
1180                atomic_inc(&net->ipv4.dev_addr_genid);
1181                rt_cache_flush(net);
1182                break;
1183        case NETDEV_DOWN:
1184                fib_disable_ip(dev, event, false);
1185                break;
1186        case NETDEV_CHANGE:
1187                flags = dev_get_flags(dev);
1188                if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1189                        fib_sync_up(dev, RTNH_F_LINKDOWN);
1190                else
1191                        fib_sync_down_dev(dev, event, false);
1192                /* fall through */
1193        case NETDEV_CHANGEMTU:
1194                rt_cache_flush(net);
1195                break;
1196        case NETDEV_CHANGEUPPER:
1197                info = ptr;
1198                /* flush all routes if dev is linked to or unlinked from
1199                 * an L3 master device (e.g., VRF)
1200                 */
1201                if (info->upper_dev && netif_is_l3_master(info->upper_dev))
1202                        fib_disable_ip(dev, NETDEV_DOWN, true);
1203                break;
1204        }
1205        return NOTIFY_DONE;
1206}
1207
1208static struct notifier_block fib_inetaddr_notifier = {
1209        .notifier_call = fib_inetaddr_event,
1210};
1211
1212static struct notifier_block fib_netdev_notifier = {
1213        .notifier_call = fib_netdev_event,
1214};
1215
1216static int __net_init ip_fib_net_init(struct net *net)
1217{
1218        int err;
1219        size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1220
1221        /* Avoid false sharing : Use at least a full cache line */
1222        size = max_t(size_t, size, L1_CACHE_BYTES);
1223
1224        net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
1225        if (!net->ipv4.fib_table_hash)
1226                return -ENOMEM;
1227
1228        err = fib4_rules_init(net);
1229        if (err < 0)
1230                goto fail;
1231        return 0;
1232
1233fail:
1234        kfree(net->ipv4.fib_table_hash);
1235        return err;
1236}
1237
1238static void ip_fib_net_exit(struct net *net)
1239{
1240        unsigned int i;
1241
1242        rtnl_lock();
1243#ifdef CONFIG_IP_MULTIPLE_TABLES
1244        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
1245        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
1246#endif
1247        for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1248                struct hlist_head *head = &net->ipv4.fib_table_hash[i];
1249                struct hlist_node *tmp;
1250                struct fib_table *tb;
1251
1252                hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
1253                        hlist_del(&tb->tb_hlist);
1254                        fib_table_flush(net, tb);
1255                        fib_free_table(tb);
1256                }
1257        }
1258
1259#ifdef CONFIG_IP_MULTIPLE_TABLES
1260        fib4_rules_exit(net);
1261#endif
1262        rtnl_unlock();
1263        kfree(net->ipv4.fib_table_hash);
1264}
1265
1266static int __net_init fib_net_init(struct net *net)
1267{
1268        int error;
1269
1270#ifdef CONFIG_IP_ROUTE_CLASSID
1271        net->ipv4.fib_num_tclassid_users = 0;
1272#endif
1273        error = ip_fib_net_init(net);
1274        if (error < 0)
1275                goto out;
1276        error = nl_fib_lookup_init(net);
1277        if (error < 0)
1278                goto out_nlfl;
1279        error = fib_proc_init(net);
1280        if (error < 0)
1281                goto out_proc;
1282out:
1283        return error;
1284
1285out_proc:
1286        nl_fib_lookup_exit(net);
1287out_nlfl:
1288        ip_fib_net_exit(net);
1289        goto out;
1290}
1291
1292static void __net_exit fib_net_exit(struct net *net)
1293{
1294        fib_proc_exit(net);
1295        nl_fib_lookup_exit(net);
1296        ip_fib_net_exit(net);
1297}
1298
1299static struct pernet_operations fib_net_ops = {
1300        .init = fib_net_init,
1301        .exit = fib_net_exit,
1302};
1303
1304void __init ip_fib_init(void)
1305{
1306        rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
1307        rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
1308        rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
1309
1310        register_pernet_subsys(&fib_net_ops);
1311        register_netdevice_notifier(&fib_netdev_notifier);
1312        register_inetaddr_notifier(&fib_inetaddr_notifier);
1313
1314        fib_trie_init();
1315}
1316