linux/net/ipv4/fib_frontend.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              IPv4 Forwarding Information Base: FIB frontend.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 *
  10 *              This program is free software; you can redistribute it and/or
  11 *              modify it under the terms of the GNU General Public License
  12 *              as published by the Free Software Foundation; either version
  13 *              2 of the License, or (at your option) any later version.
  14 */
  15
  16#include <linux/module.h>
  17#include <asm/uaccess.h>
  18#include <asm/system.h>
  19#include <linux/bitops.h>
  20#include <linux/capability.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/mm.h>
  24#include <linux/string.h>
  25#include <linux/socket.h>
  26#include <linux/sockios.h>
  27#include <linux/errno.h>
  28#include <linux/in.h>
  29#include <linux/inet.h>
  30#include <linux/inetdevice.h>
  31#include <linux/netdevice.h>
  32#include <linux/if_addr.h>
  33#include <linux/if_arp.h>
  34#include <linux/skbuff.h>
  35#include <linux/init.h>
  36#include <linux/list.h>
  37
  38#include <net/ip.h>
  39#include <net/protocol.h>
  40#include <net/route.h>
  41#include <net/tcp.h>
  42#include <net/sock.h>
  43#include <net/arp.h>
  44#include <net/ip_fib.h>
  45#include <net/rtnetlink.h>
  46
  47#ifndef CONFIG_IP_MULTIPLE_TABLES
  48
  49static int __net_init fib4_rules_init(struct net *net)
  50{
  51        struct fib_table *local_table, *main_table;
  52
  53        local_table = fib_hash_table(RT_TABLE_LOCAL);
  54        if (local_table == NULL)
  55                return -ENOMEM;
  56
  57        main_table  = fib_hash_table(RT_TABLE_MAIN);
  58        if (main_table == NULL)
  59                goto fail;
  60
  61        hlist_add_head_rcu(&local_table->tb_hlist,
  62                                &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
  63        hlist_add_head_rcu(&main_table->tb_hlist,
  64                                &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
  65        return 0;
  66
  67fail:
  68        kfree(local_table);
  69        return -ENOMEM;
  70}
  71#else
  72
  73struct fib_table *fib_new_table(struct net *net, u32 id)
  74{
  75        struct fib_table *tb;
  76        unsigned int h;
  77
  78        if (id == 0)
  79                id = RT_TABLE_MAIN;
  80        tb = fib_get_table(net, id);
  81        if (tb)
  82                return tb;
  83
  84        tb = fib_hash_table(id);
  85        if (!tb)
  86                return NULL;
  87        h = id & (FIB_TABLE_HASHSZ - 1);
  88        hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
  89        return tb;
  90}
  91
  92struct fib_table *fib_get_table(struct net *net, u32 id)
  93{
  94        struct fib_table *tb;
  95        struct hlist_node *node;
  96        struct hlist_head *head;
  97        unsigned int h;
  98
  99        if (id == 0)
 100                id = RT_TABLE_MAIN;
 101        h = id & (FIB_TABLE_HASHSZ - 1);
 102
 103        rcu_read_lock();
 104        head = &net->ipv4.fib_table_hash[h];
 105        hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
 106                if (tb->tb_id == id) {
 107                        rcu_read_unlock();
 108                        return tb;
 109                }
 110        }
 111        rcu_read_unlock();
 112        return NULL;
 113}
 114#endif /* CONFIG_IP_MULTIPLE_TABLES */
 115
 116void fib_select_default(struct net *net,
 117                        const struct flowi *flp, struct fib_result *res)
 118{
 119        struct fib_table *tb;
 120        int table = RT_TABLE_MAIN;
 121#ifdef CONFIG_IP_MULTIPLE_TABLES
 122        if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
 123                return;
 124        table = res->r->table;
 125#endif
 126        tb = fib_get_table(net, table);
 127        if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 128                tb->tb_select_default(tb, flp, res);
 129}
 130
 131static void fib_flush(struct net *net)
 132{
 133        int flushed = 0;
 134        struct fib_table *tb;
 135        struct hlist_node *node;
 136        struct hlist_head *head;
 137        unsigned int h;
 138
 139        for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
 140                head = &net->ipv4.fib_table_hash[h];
 141                hlist_for_each_entry(tb, node, head, tb_hlist)
 142                        flushed += tb->tb_flush(tb);
 143        }
 144
 145        if (flushed)
 146                rt_cache_flush(net, -1);
 147}
 148
 149/*
 150 *      Find the first device with a given source address.
 151 */
 152
 153struct net_device * ip_dev_find(struct net *net, __be32 addr)
 154{
 155        struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 156        struct fib_result res;
 157        struct net_device *dev = NULL;
 158        struct fib_table *local_table;
 159
 160#ifdef CONFIG_IP_MULTIPLE_TABLES
 161        res.r = NULL;
 162#endif
 163
 164        local_table = fib_get_table(net, RT_TABLE_LOCAL);
 165        if (!local_table || local_table->tb_lookup(local_table, &fl, &res))
 166                return NULL;
 167        if (res.type != RTN_LOCAL)
 168                goto out;
 169        dev = FIB_RES_DEV(res);
 170
 171        if (dev)
 172                dev_hold(dev);
 173out:
 174        fib_res_put(&res);
 175        return dev;
 176}
 177
 178/*
 179 * Find address type as if only "dev" was present in the system. If
 180 * on_dev is NULL then all interfaces are taken into consideration.
 181 */
 182static inline unsigned __inet_dev_addr_type(struct net *net,
 183                                            const struct net_device *dev,
 184                                            __be32 addr)
 185{
 186        struct flowi            fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
 187        struct fib_result       res;
 188        unsigned ret = RTN_BROADCAST;
 189        struct fib_table *local_table;
 190
 191        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 192                return RTN_BROADCAST;
 193        if (ipv4_is_multicast(addr))
 194                return RTN_MULTICAST;
 195
 196#ifdef CONFIG_IP_MULTIPLE_TABLES
 197        res.r = NULL;
 198#endif
 199
 200        local_table = fib_get_table(net, RT_TABLE_LOCAL);
 201        if (local_table) {
 202                ret = RTN_UNICAST;
 203                if (!local_table->tb_lookup(local_table, &fl, &res)) {
 204                        if (!dev || dev == res.fi->fib_dev)
 205                                ret = res.type;
 206                        fib_res_put(&res);
 207                }
 208        }
 209        return ret;
 210}
 211
 212unsigned int inet_addr_type(struct net *net, __be32 addr)
 213{
 214        return __inet_dev_addr_type(net, NULL, addr);
 215}
 216
 217unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 218                                __be32 addr)
 219{
 220       return __inet_dev_addr_type(net, dev, addr);
 221}
 222
 223/* Given (packet source, input interface) and optional (dst, oif, tos):
 224   - (main) check, that source is valid i.e. not broadcast or our local
 225     address.
 226   - figure out what "logical" interface this packet arrived
 227     and calculate "specific destination" address.
 228   - check, that packet arrived from expected physical interface.
 229 */
 230
 231int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 232                        struct net_device *dev, __be32 *spec_dst,
 233                        u32 *itag, u32 mark)
 234{
 235        struct in_device *in_dev;
 236        struct flowi fl = { .nl_u = { .ip4_u =
 237                                      { .daddr = src,
 238                                        .saddr = dst,
 239                                        .tos = tos } },
 240                            .mark = mark,
 241                            .iif = oif };
 242
 243        struct fib_result res;
 244        int no_addr, rpf;
 245        int ret;
 246        struct net *net;
 247
 248        no_addr = rpf = 0;
 249        rcu_read_lock();
 250        in_dev = __in_dev_get_rcu(dev);
 251        if (in_dev) {
 252                no_addr = in_dev->ifa_list == NULL;
 253                rpf = IN_DEV_RPFILTER(in_dev);
 254        }
 255        rcu_read_unlock();
 256
 257        if (in_dev == NULL)
 258                goto e_inval;
 259
 260        net = dev_net(dev);
 261        if (fib_lookup(net, &fl, &res))
 262                goto last_resort;
 263        if (res.type != RTN_UNICAST)
 264                goto e_inval_res;
 265        *spec_dst = FIB_RES_PREFSRC(res);
 266        fib_combine_itag(itag, &res);
 267#ifdef CONFIG_IP_ROUTE_MULTIPATH
 268        if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
 269#else
 270        if (FIB_RES_DEV(res) == dev)
 271#endif
 272        {
 273                ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 274                fib_res_put(&res);
 275                return ret;
 276        }
 277        fib_res_put(&res);
 278        if (no_addr)
 279                goto last_resort;
 280        if (rpf == 1)
 281                goto e_inval;
 282        fl.oif = dev->ifindex;
 283
 284        ret = 0;
 285        if (fib_lookup(net, &fl, &res) == 0) {
 286                if (res.type == RTN_UNICAST) {
 287                        *spec_dst = FIB_RES_PREFSRC(res);
 288                        ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 289                }
 290                fib_res_put(&res);
 291        }
 292        return ret;
 293
 294last_resort:
 295        if (rpf)
 296                goto e_inval;
 297        *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 298        *itag = 0;
 299        return 0;
 300
 301e_inval_res:
 302        fib_res_put(&res);
 303e_inval:
 304        return -EINVAL;
 305}
 306
 307static inline __be32 sk_extract_addr(struct sockaddr *addr)
 308{
 309        return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 310}
 311
 312static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
 313{
 314        struct nlattr *nla;
 315
 316        nla = (struct nlattr *) ((char *) mx + len);
 317        nla->nla_type = type;
 318        nla->nla_len = nla_attr_size(4);
 319        *(u32 *) nla_data(nla) = value;
 320
 321        return len + nla_total_size(4);
 322}
 323
 324static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 325                                 struct fib_config *cfg)
 326{
 327        __be32 addr;
 328        int plen;
 329
 330        memset(cfg, 0, sizeof(*cfg));
 331        cfg->fc_nlinfo.nl_net = net;
 332
 333        if (rt->rt_dst.sa_family != AF_INET)
 334                return -EAFNOSUPPORT;
 335
 336        /*
 337         * Check mask for validity:
 338         * a) it must be contiguous.
 339         * b) destination must have all host bits clear.
 340         * c) if application forgot to set correct family (AF_INET),
 341         *    reject request unless it is absolutely clear i.e.
 342         *    both family and mask are zero.
 343         */
 344        plen = 32;
 345        addr = sk_extract_addr(&rt->rt_dst);
 346        if (!(rt->rt_flags & RTF_HOST)) {
 347                __be32 mask = sk_extract_addr(&rt->rt_genmask);
 348
 349                if (rt->rt_genmask.sa_family != AF_INET) {
 350                        if (mask || rt->rt_genmask.sa_family)
 351                                return -EAFNOSUPPORT;
 352                }
 353
 354                if (bad_mask(mask, addr))
 355                        return -EINVAL;
 356
 357                plen = inet_mask_len(mask);
 358        }
 359
 360        cfg->fc_dst_len = plen;
 361        cfg->fc_dst = addr;
 362
 363        if (cmd != SIOCDELRT) {
 364                cfg->fc_nlflags = NLM_F_CREATE;
 365                cfg->fc_protocol = RTPROT_BOOT;
 366        }
 367
 368        if (rt->rt_metric)
 369                cfg->fc_priority = rt->rt_metric - 1;
 370
 371        if (rt->rt_flags & RTF_REJECT) {
 372                cfg->fc_scope = RT_SCOPE_HOST;
 373                cfg->fc_type = RTN_UNREACHABLE;
 374                return 0;
 375        }
 376
 377        cfg->fc_scope = RT_SCOPE_NOWHERE;
 378        cfg->fc_type = RTN_UNICAST;
 379
 380        if (rt->rt_dev) {
 381                char *colon;
 382                struct net_device *dev;
 383                char devname[IFNAMSIZ];
 384
 385                if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
 386                        return -EFAULT;
 387
 388                devname[IFNAMSIZ-1] = 0;
 389                colon = strchr(devname, ':');
 390                if (colon)
 391                        *colon = 0;
 392                dev = __dev_get_by_name(net, devname);
 393                if (!dev)
 394                        return -ENODEV;
 395                cfg->fc_oif = dev->ifindex;
 396                if (colon) {
 397                        struct in_ifaddr *ifa;
 398                        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 399                        if (!in_dev)
 400                                return -ENODEV;
 401                        *colon = ':';
 402                        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
 403                                if (strcmp(ifa->ifa_label, devname) == 0)
 404                                        break;
 405                        if (ifa == NULL)
 406                                return -ENODEV;
 407                        cfg->fc_prefsrc = ifa->ifa_local;
 408                }
 409        }
 410
 411        addr = sk_extract_addr(&rt->rt_gateway);
 412        if (rt->rt_gateway.sa_family == AF_INET && addr) {
 413                cfg->fc_gw = addr;
 414                if (rt->rt_flags & RTF_GATEWAY &&
 415                    inet_addr_type(net, addr) == RTN_UNICAST)
 416                        cfg->fc_scope = RT_SCOPE_UNIVERSE;
 417        }
 418
 419        if (cmd == SIOCDELRT)
 420                return 0;
 421
 422        if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
 423                return -EINVAL;
 424
 425        if (cfg->fc_scope == RT_SCOPE_NOWHERE)
 426                cfg->fc_scope = RT_SCOPE_LINK;
 427
 428        if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
 429                struct nlattr *mx;
 430                int len = 0;
 431
 432                mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
 433                if (mx == NULL)
 434                        return -ENOMEM;
 435
 436                if (rt->rt_flags & RTF_MTU)
 437                        len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
 438
 439                if (rt->rt_flags & RTF_WINDOW)
 440                        len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
 441
 442                if (rt->rt_flags & RTF_IRTT)
 443                        len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
 444
 445                cfg->fc_mx = mx;
 446                cfg->fc_mx_len = len;
 447        }
 448
 449        return 0;
 450}
 451
 452/*
 453 *      Handle IP routing ioctl calls. These are used to manipulate the routing tables
 454 */
 455
 456int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 457{
 458        struct fib_config cfg;
 459        struct rtentry rt;
 460        int err;
 461
 462        switch (cmd) {
 463        case SIOCADDRT:         /* Add a route */
 464        case SIOCDELRT:         /* Delete a route */
 465                if (!capable(CAP_NET_ADMIN))
 466                        return -EPERM;
 467
 468                if (copy_from_user(&rt, arg, sizeof(rt)))
 469                        return -EFAULT;
 470
 471                rtnl_lock();
 472                err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
 473                if (err == 0) {
 474                        struct fib_table *tb;
 475
 476                        if (cmd == SIOCDELRT) {
 477                                tb = fib_get_table(net, cfg.fc_table);
 478                                if (tb)
 479                                        err = tb->tb_delete(tb, &cfg);
 480                                else
 481                                        err = -ESRCH;
 482                        } else {
 483                                tb = fib_new_table(net, cfg.fc_table);
 484                                if (tb)
 485                                        err = tb->tb_insert(tb, &cfg);
 486                                else
 487                                        err = -ENOBUFS;
 488                        }
 489
 490                        /* allocated by rtentry_to_fib_config() */
 491                        kfree(cfg.fc_mx);
 492                }
 493                rtnl_unlock();
 494                return err;
 495        }
 496        return -EINVAL;
 497}
 498
 499const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
 500        [RTA_DST]               = { .type = NLA_U32 },
 501        [RTA_SRC]               = { .type = NLA_U32 },
 502        [RTA_IIF]               = { .type = NLA_U32 },
 503        [RTA_OIF]               = { .type = NLA_U32 },
 504        [RTA_GATEWAY]           = { .type = NLA_U32 },
 505        [RTA_PRIORITY]          = { .type = NLA_U32 },
 506        [RTA_PREFSRC]           = { .type = NLA_U32 },
 507        [RTA_METRICS]           = { .type = NLA_NESTED },
 508        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
 509        [RTA_FLOW]              = { .type = NLA_U32 },
 510};
 511
 512static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 513                            struct nlmsghdr *nlh, struct fib_config *cfg)
 514{
 515        struct nlattr *attr;
 516        int err, remaining;
 517        struct rtmsg *rtm;
 518
 519        err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
 520        if (err < 0)
 521                goto errout;
 522
 523        memset(cfg, 0, sizeof(*cfg));
 524
 525        rtm = nlmsg_data(nlh);
 526        cfg->fc_dst_len = rtm->rtm_dst_len;
 527        cfg->fc_tos = rtm->rtm_tos;
 528        cfg->fc_table = rtm->rtm_table;
 529        cfg->fc_protocol = rtm->rtm_protocol;
 530        cfg->fc_scope = rtm->rtm_scope;
 531        cfg->fc_type = rtm->rtm_type;
 532        cfg->fc_flags = rtm->rtm_flags;
 533        cfg->fc_nlflags = nlh->nlmsg_flags;
 534
 535        cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 536        cfg->fc_nlinfo.nlh = nlh;
 537        cfg->fc_nlinfo.nl_net = net;
 538
 539        if (cfg->fc_type > RTN_MAX) {
 540                err = -EINVAL;
 541                goto errout;
 542        }
 543
 544        nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
 545                switch (nla_type(attr)) {
 546                case RTA_DST:
 547                        cfg->fc_dst = nla_get_be32(attr);
 548                        break;
 549                case RTA_OIF:
 550                        cfg->fc_oif = nla_get_u32(attr);
 551                        break;
 552                case RTA_GATEWAY:
 553                        cfg->fc_gw = nla_get_be32(attr);
 554                        break;
 555                case RTA_PRIORITY:
 556                        cfg->fc_priority = nla_get_u32(attr);
 557                        break;
 558                case RTA_PREFSRC:
 559                        cfg->fc_prefsrc = nla_get_be32(attr);
 560                        break;
 561                case RTA_METRICS:
 562                        cfg->fc_mx = nla_data(attr);
 563                        cfg->fc_mx_len = nla_len(attr);
 564                        break;
 565                case RTA_MULTIPATH:
 566                        cfg->fc_mp = nla_data(attr);
 567                        cfg->fc_mp_len = nla_len(attr);
 568                        break;
 569                case RTA_FLOW:
 570                        cfg->fc_flow = nla_get_u32(attr);
 571                        break;
 572                case RTA_TABLE:
 573                        cfg->fc_table = nla_get_u32(attr);
 574                        break;
 575                }
 576        }
 577
 578        return 0;
 579errout:
 580        return err;
 581}
 582
 583static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 584{
 585        struct net *net = sock_net(skb->sk);
 586        struct fib_config cfg;
 587        struct fib_table *tb;
 588        int err;
 589
 590        err = rtm_to_fib_config(net, skb, nlh, &cfg);
 591        if (err < 0)
 592                goto errout;
 593
 594        tb = fib_get_table(net, cfg.fc_table);
 595        if (tb == NULL) {
 596                err = -ESRCH;
 597                goto errout;
 598        }
 599
 600        err = tb->tb_delete(tb, &cfg);
 601errout:
 602        return err;
 603}
 604
 605static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 606{
 607        struct net *net = sock_net(skb->sk);
 608        struct fib_config cfg;
 609        struct fib_table *tb;
 610        int err;
 611
 612        err = rtm_to_fib_config(net, skb, nlh, &cfg);
 613        if (err < 0)
 614                goto errout;
 615
 616        tb = fib_new_table(net, cfg.fc_table);
 617        if (tb == NULL) {
 618                err = -ENOBUFS;
 619                goto errout;
 620        }
 621
 622        err = tb->tb_insert(tb, &cfg);
 623errout:
 624        return err;
 625}
 626
 627static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 628{
 629        struct net *net = sock_net(skb->sk);
 630        unsigned int h, s_h;
 631        unsigned int e = 0, s_e;
 632        struct fib_table *tb;
 633        struct hlist_node *node;
 634        struct hlist_head *head;
 635        int dumped = 0;
 636
 637        if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
 638            ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
 639                return ip_rt_dump(skb, cb);
 640
 641        s_h = cb->args[0];
 642        s_e = cb->args[1];
 643
 644        for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
 645                e = 0;
 646                head = &net->ipv4.fib_table_hash[h];
 647                hlist_for_each_entry(tb, node, head, tb_hlist) {
 648                        if (e < s_e)
 649                                goto next;
 650                        if (dumped)
 651                                memset(&cb->args[2], 0, sizeof(cb->args) -
 652                                                 2 * sizeof(cb->args[0]));
 653                        if (tb->tb_dump(tb, skb, cb) < 0)
 654                                goto out;
 655                        dumped = 1;
 656next:
 657                        e++;
 658                }
 659        }
 660out:
 661        cb->args[1] = e;
 662        cb->args[0] = h;
 663
 664        return skb->len;
 665}
 666
 667/* Prepare and feed intra-kernel routing request.
 668   Really, it should be netlink message, but :-( netlink
 669   can be not configured, so that we feed it directly
 670   to fib engine. It is legal, because all events occur
 671   only when netlink is already locked.
 672 */
 673
 674static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 675{
 676        struct net *net = dev_net(ifa->ifa_dev->dev);
 677        struct fib_table *tb;
 678        struct fib_config cfg = {
 679                .fc_protocol = RTPROT_KERNEL,
 680                .fc_type = type,
 681                .fc_dst = dst,
 682                .fc_dst_len = dst_len,
 683                .fc_prefsrc = ifa->ifa_local,
 684                .fc_oif = ifa->ifa_dev->dev->ifindex,
 685                .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
 686                .fc_nlinfo = {
 687                        .nl_net = net,
 688                },
 689        };
 690
 691        if (type == RTN_UNICAST)
 692                tb = fib_new_table(net, RT_TABLE_MAIN);
 693        else
 694                tb = fib_new_table(net, RT_TABLE_LOCAL);
 695
 696        if (tb == NULL)
 697                return;
 698
 699        cfg.fc_table = tb->tb_id;
 700
 701        if (type != RTN_LOCAL)
 702                cfg.fc_scope = RT_SCOPE_LINK;
 703        else
 704                cfg.fc_scope = RT_SCOPE_HOST;
 705
 706        if (cmd == RTM_NEWROUTE)
 707                tb->tb_insert(tb, &cfg);
 708        else
 709                tb->tb_delete(tb, &cfg);
 710}
 711
 712void fib_add_ifaddr(struct in_ifaddr *ifa)
 713{
 714        struct in_device *in_dev = ifa->ifa_dev;
 715        struct net_device *dev = in_dev->dev;
 716        struct in_ifaddr *prim = ifa;
 717        __be32 mask = ifa->ifa_mask;
 718        __be32 addr = ifa->ifa_local;
 719        __be32 prefix = ifa->ifa_address&mask;
 720
 721        if (ifa->ifa_flags&IFA_F_SECONDARY) {
 722                prim = inet_ifa_byprefix(in_dev, prefix, mask);
 723                if (prim == NULL) {
 724                        printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
 725                        return;
 726                }
 727        }
 728
 729        fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
 730
 731        if (!(dev->flags&IFF_UP))
 732                return;
 733
 734        /* Add broadcast address, if it is explicitly assigned. */
 735        if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
 736                fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 737
 738        if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
 739            (prefix != addr || ifa->ifa_prefixlen < 32)) {
 740                fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 741                          RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
 742
 743                /* Add network specific broadcasts, when it takes a sense */
 744                if (ifa->ifa_prefixlen < 31) {
 745                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
 746                        fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
 747                }
 748        }
 749}
 750
 751static void fib_del_ifaddr(struct in_ifaddr *ifa)
 752{
 753        struct in_device *in_dev = ifa->ifa_dev;
 754        struct net_device *dev = in_dev->dev;
 755        struct in_ifaddr *ifa1;
 756        struct in_ifaddr *prim = ifa;
 757        __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
 758        __be32 any = ifa->ifa_address&ifa->ifa_mask;
 759#define LOCAL_OK        1
 760#define BRD_OK          2
 761#define BRD0_OK         4
 762#define BRD1_OK         8
 763        unsigned ok = 0;
 764
 765        if (!(ifa->ifa_flags&IFA_F_SECONDARY))
 766                fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
 767                          RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
 768        else {
 769                prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 770                if (prim == NULL) {
 771                        printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
 772                        return;
 773                }
 774        }
 775
 776        /* Deletion is more complicated than add.
 777           We should take care of not to delete too much :-)
 778
 779           Scan address list to be sure that addresses are really gone.
 780         */
 781
 782        for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
 783                if (ifa->ifa_local == ifa1->ifa_local)
 784                        ok |= LOCAL_OK;
 785                if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
 786                        ok |= BRD_OK;
 787                if (brd == ifa1->ifa_broadcast)
 788                        ok |= BRD1_OK;
 789                if (any == ifa1->ifa_broadcast)
 790                        ok |= BRD0_OK;
 791        }
 792
 793        if (!(ok&BRD_OK))
 794                fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
 795        if (!(ok&BRD1_OK))
 796                fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
 797        if (!(ok&BRD0_OK))
 798                fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
 799        if (!(ok&LOCAL_OK)) {
 800                fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 801
 802                /* Check, that this local address finally disappeared. */
 803                if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
 804                        /* And the last, but not the least thing.
 805                           We must flush stray FIB entries.
 806
 807                           First of all, we scan fib_info list searching
 808                           for stray nexthop entries, then ignite fib_flush.
 809                        */
 810                        if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
 811                                fib_flush(dev_net(dev));
 812                }
 813        }
 814#undef LOCAL_OK
 815#undef BRD_OK
 816#undef BRD0_OK
 817#undef BRD1_OK
 818}
 819
 820static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
 821{
 822
 823        struct fib_result       res;
 824        struct flowi            fl = { .mark = frn->fl_mark,
 825                                       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
 826                                                            .tos = frn->fl_tos,
 827                                                            .scope = frn->fl_scope } } };
 828
 829#ifdef CONFIG_IP_MULTIPLE_TABLES
 830        res.r = NULL;
 831#endif
 832
 833        frn->err = -ENOENT;
 834        if (tb) {
 835                local_bh_disable();
 836
 837                frn->tb_id = tb->tb_id;
 838                frn->err = tb->tb_lookup(tb, &fl, &res);
 839
 840                if (!frn->err) {
 841                        frn->prefixlen = res.prefixlen;
 842                        frn->nh_sel = res.nh_sel;
 843                        frn->type = res.type;
 844                        frn->scope = res.scope;
 845                        fib_res_put(&res);
 846                }
 847                local_bh_enable();
 848        }
 849}
 850
 851static void nl_fib_input(struct sk_buff *skb)
 852{
 853        struct net *net;
 854        struct fib_result_nl *frn;
 855        struct nlmsghdr *nlh;
 856        struct fib_table *tb;
 857        u32 pid;
 858
 859        net = sock_net(skb->sk);
 860        nlh = nlmsg_hdr(skb);
 861        if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
 862            nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
 863                return;
 864
 865        skb = skb_clone(skb, GFP_KERNEL);
 866        if (skb == NULL)
 867                return;
 868        nlh = nlmsg_hdr(skb);
 869
 870        frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
 871        tb = fib_get_table(net, frn->tb_id_in);
 872
 873        nl_fib_lookup(frn, tb);
 874
 875        pid = NETLINK_CB(skb).pid;       /* pid of sending process */
 876        NETLINK_CB(skb).pid = 0;         /* from kernel */
 877        NETLINK_CB(skb).dst_group = 0;  /* unicast */
 878        netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
 879}
 880
 881static int nl_fib_lookup_init(struct net *net)
 882{
 883        struct sock *sk;
 884        sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
 885                                   nl_fib_input, NULL, THIS_MODULE);
 886        if (sk == NULL)
 887                return -EAFNOSUPPORT;
 888        net->ipv4.fibnl = sk;
 889        return 0;
 890}
 891
 892static void nl_fib_lookup_exit(struct net *net)
 893{
 894        netlink_kernel_release(net->ipv4.fibnl);
 895        net->ipv4.fibnl = NULL;
 896}
 897
 898static void fib_disable_ip(struct net_device *dev, int force)
 899{
 900        if (fib_sync_down_dev(dev, force))
 901                fib_flush(dev_net(dev));
 902        rt_cache_flush(dev_net(dev), 0);
 903        arp_ifdown(dev);
 904}
 905
 906static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
 907{
 908        struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 909        struct net_device *dev = ifa->ifa_dev->dev;
 910
 911        switch (event) {
 912        case NETDEV_UP:
 913                fib_add_ifaddr(ifa);
 914#ifdef CONFIG_IP_ROUTE_MULTIPATH
 915                fib_sync_up(dev);
 916#endif
 917                rt_cache_flush(dev_net(dev), -1);
 918                break;
 919        case NETDEV_DOWN:
 920                fib_del_ifaddr(ifa);
 921                if (ifa->ifa_dev->ifa_list == NULL) {
 922                        /* Last address was deleted from this interface.
 923                           Disable IP.
 924                         */
 925                        fib_disable_ip(dev, 1);
 926                } else {
 927                        rt_cache_flush(dev_net(dev), -1);
 928                }
 929                break;
 930        }
 931        return NOTIFY_DONE;
 932}
 933
 934static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 935{
 936        struct net_device *dev = ptr;
 937        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 938
 939        if (event == NETDEV_UNREGISTER) {
 940                fib_disable_ip(dev, 2);
 941                return NOTIFY_DONE;
 942        }
 943
 944        if (!in_dev)
 945                return NOTIFY_DONE;
 946
 947        switch (event) {
 948        case NETDEV_UP:
 949                for_ifa(in_dev) {
 950                        fib_add_ifaddr(ifa);
 951                } endfor_ifa(in_dev);
 952#ifdef CONFIG_IP_ROUTE_MULTIPATH
 953                fib_sync_up(dev);
 954#endif
 955                rt_cache_flush(dev_net(dev), -1);
 956                break;
 957        case NETDEV_DOWN:
 958                fib_disable_ip(dev, 0);
 959                break;
 960        case NETDEV_CHANGEMTU:
 961        case NETDEV_CHANGE:
 962                rt_cache_flush(dev_net(dev), 0);
 963                break;
 964        }
 965        return NOTIFY_DONE;
 966}
 967
 968static struct notifier_block fib_inetaddr_notifier = {
 969        .notifier_call = fib_inetaddr_event,
 970};
 971
 972static struct notifier_block fib_netdev_notifier = {
 973        .notifier_call = fib_netdev_event,
 974};
 975
 976static int __net_init ip_fib_net_init(struct net *net)
 977{
 978        int err;
 979        unsigned int i;
 980
 981        net->ipv4.fib_table_hash = kzalloc(
 982                        sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
 983        if (net->ipv4.fib_table_hash == NULL)
 984                return -ENOMEM;
 985
 986        for (i = 0; i < FIB_TABLE_HASHSZ; i++)
 987                INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
 988
 989        err = fib4_rules_init(net);
 990        if (err < 0)
 991                goto fail;
 992        return 0;
 993
 994fail:
 995        kfree(net->ipv4.fib_table_hash);
 996        return err;
 997}
 998
 999static void __net_exit ip_fib_net_exit(struct net *net)
1000{
1001        unsigned int i;
1002
1003#ifdef CONFIG_IP_MULTIPLE_TABLES
1004        fib4_rules_exit(net);
1005#endif
1006
1007        for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1008                struct fib_table *tb;
1009                struct hlist_head *head;
1010                struct hlist_node *node, *tmp;
1011
1012                head = &net->ipv4.fib_table_hash[i];
1013                hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1014                        hlist_del(node);
1015                        tb->tb_flush(tb);
1016                        kfree(tb);
1017                }
1018        }
1019        kfree(net->ipv4.fib_table_hash);
1020}
1021
1022static int __net_init fib_net_init(struct net *net)
1023{
1024        int error;
1025
1026        error = ip_fib_net_init(net);
1027        if (error < 0)
1028                goto out;
1029        error = nl_fib_lookup_init(net);
1030        if (error < 0)
1031                goto out_nlfl;
1032        error = fib_proc_init(net);
1033        if (error < 0)
1034                goto out_proc;
1035out:
1036        return error;
1037
1038out_proc:
1039        nl_fib_lookup_exit(net);
1040out_nlfl:
1041        ip_fib_net_exit(net);
1042        goto out;
1043}
1044
1045static void __net_exit fib_net_exit(struct net *net)
1046{
1047        fib_proc_exit(net);
1048        nl_fib_lookup_exit(net);
1049        ip_fib_net_exit(net);
1050}
1051
1052static struct pernet_operations fib_net_ops = {
1053        .init = fib_net_init,
1054        .exit = fib_net_exit,
1055};
1056
1057void __init ip_fib_init(void)
1058{
1059        rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1060        rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1061        rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1062
1063        register_pernet_subsys(&fib_net_ops);
1064        register_netdevice_notifier(&fib_netdev_notifier);
1065        register_inetaddr_notifier(&fib_inetaddr_notifier);
1066
1067        fib_hash_init();
1068}
1069
1070EXPORT_SYMBOL(inet_addr_type);
1071EXPORT_SYMBOL(inet_dev_addr_type);
1072EXPORT_SYMBOL(ip_dev_find);
1073