linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/string.h>
  36#include <linux/mm.h>
  37#include <linux/socket.h>
  38#include <linux/sockios.h>
  39#include <linux/in.h>
  40#include <linux/errno.h>
  41#include <linux/interrupt.h>
  42#include <linux/if_addr.h>
  43#include <linux/if_ether.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/etherdevice.h>
  47#include <linux/skbuff.h>
  48#include <linux/init.h>
  49#include <linux/notifier.h>
  50#include <linux/inetdevice.h>
  51#include <linux/igmp.h>
  52#include <linux/slab.h>
  53#include <linux/hash.h>
  54#ifdef CONFIG_SYSCTL
  55#include <linux/sysctl.h>
  56#endif
  57#include <linux/kmod.h>
  58#include <linux/netconf.h>
  59
  60#include <net/arp.h>
  61#include <net/ip.h>
  62#include <net/route.h>
  63#include <net/ip_fib.h>
  64#include <net/rtnetlink.h>
  65#include <net/net_namespace.h>
  66#include <net/addrconf.h>
  67
  68#include "fib_lookup.h"
  69
  70static struct ipv4_devconf ipv4_devconf = {
  71        .data = {
  72                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  74                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  75                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  76        },
  77};
  78
  79static struct ipv4_devconf ipv4_devconf_dflt = {
  80        .data = {
  81                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  82                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  83                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  84                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  85                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  86        },
  87};
  88
  89#define IPV4_DEVCONF_DFLT(net, attr) \
  90        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  91
  92static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  93        [IFA_LOCAL]             = { .type = NLA_U32 },
  94        [IFA_ADDRESS]           = { .type = NLA_U32 },
  95        [IFA_BROADCAST]         = { .type = NLA_U32 },
  96        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
  97        [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
  98};
  99
 100#define IN4_ADDR_HSIZE_SHIFT    8
 101#define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
 102
 103static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 104static DEFINE_SPINLOCK(inet_addr_hash_lock);
 105
 106static u32 inet_addr_hash(struct net *net, __be32 addr)
 107{
 108        u32 val = (__force u32) addr ^ net_hash_mix(net);
 109
 110        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 111}
 112
 113static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 114{
 115        u32 hash = inet_addr_hash(net, ifa->ifa_local);
 116
 117        spin_lock(&inet_addr_hash_lock);
 118        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 119        spin_unlock(&inet_addr_hash_lock);
 120}
 121
 122static void inet_hash_remove(struct in_ifaddr *ifa)
 123{
 124        spin_lock(&inet_addr_hash_lock);
 125        hlist_del_init_rcu(&ifa->hash);
 126        spin_unlock(&inet_addr_hash_lock);
 127}
 128
 129/**
 130 * __ip_dev_find - find the first device with a given source address.
 131 * @net: the net namespace
 132 * @addr: the source address
 133 * @devref: if true, take a reference on the found device
 134 *
 135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 136 */
 137struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 138{
 139        u32 hash = inet_addr_hash(net, addr);
 140        struct net_device *result = NULL;
 141        struct in_ifaddr *ifa;
 142
 143        rcu_read_lock();
 144        hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
 145                if (ifa->ifa_local == addr) {
 146                        struct net_device *dev = ifa->ifa_dev->dev;
 147
 148                        if (!net_eq(dev_net(dev), net))
 149                                continue;
 150                        result = dev;
 151                        break;
 152                }
 153        }
 154        if (!result) {
 155                struct flowi4 fl4 = { .daddr = addr };
 156                struct fib_result res = { 0 };
 157                struct fib_table *local;
 158
 159                /* Fallback to FIB local table so that communication
 160                 * over loopback subnets work.
 161                 */
 162                local = fib_get_table(net, RT_TABLE_LOCAL);
 163                if (local &&
 164                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 165                    res.type == RTN_LOCAL)
 166                        result = FIB_RES_DEV(res);
 167        }
 168        if (result && devref)
 169                dev_hold(result);
 170        rcu_read_unlock();
 171        return result;
 172}
 173EXPORT_SYMBOL(__ip_dev_find);
 174
 175static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 176
 177static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 178static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 179                         int destroy);
 180#ifdef CONFIG_SYSCTL
 181static void devinet_sysctl_register(struct in_device *idev);
 182static void devinet_sysctl_unregister(struct in_device *idev);
 183#else
 184static void devinet_sysctl_register(struct in_device *idev)
 185{
 186}
 187static void devinet_sysctl_unregister(struct in_device *idev)
 188{
 189}
 190#endif
 191
 192/* Locks all the inet devices. */
 193
 194static struct in_ifaddr *inet_alloc_ifa(void)
 195{
 196        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 197}
 198
 199static void inet_rcu_free_ifa(struct rcu_head *head)
 200{
 201        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 202        if (ifa->ifa_dev)
 203                in_dev_put(ifa->ifa_dev);
 204        kfree(ifa);
 205}
 206
 207static void inet_free_ifa(struct in_ifaddr *ifa)
 208{
 209        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 210}
 211
 212void in_dev_finish_destroy(struct in_device *idev)
 213{
 214        struct net_device *dev = idev->dev;
 215
 216        WARN_ON(idev->ifa_list);
 217        WARN_ON(idev->mc_list);
 218        kfree(rcu_dereference_protected(idev->mc_hash, 1));
 219#ifdef NET_REFCNT_DEBUG
 220        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 221#endif
 222        dev_put(dev);
 223        if (!idev->dead)
 224                pr_err("Freeing alive in_device %p\n", idev);
 225        else
 226                kfree(idev);
 227}
 228EXPORT_SYMBOL(in_dev_finish_destroy);
 229
 230static struct in_device *inetdev_init(struct net_device *dev)
 231{
 232        struct in_device *in_dev;
 233
 234        ASSERT_RTNL();
 235
 236        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 237        if (!in_dev)
 238                goto out;
 239        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 240                        sizeof(in_dev->cnf));
 241        in_dev->cnf.sysctl = NULL;
 242        in_dev->dev = dev;
 243        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 244        if (!in_dev->arp_parms)
 245                goto out_kfree;
 246        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 247                dev_disable_lro(dev);
 248        /* Reference in_dev->dev */
 249        dev_hold(dev);
 250        /* Account for reference dev->ip_ptr (below) */
 251        in_dev_hold(in_dev);
 252
 253        devinet_sysctl_register(in_dev);
 254        ip_mc_init_dev(in_dev);
 255        if (dev->flags & IFF_UP)
 256                ip_mc_up(in_dev);
 257
 258        /* we can receive as soon as ip_ptr is set -- do this last */
 259        rcu_assign_pointer(dev->ip_ptr, in_dev);
 260out:
 261        return in_dev;
 262out_kfree:
 263        kfree(in_dev);
 264        in_dev = NULL;
 265        goto out;
 266}
 267
 268static void in_dev_rcu_put(struct rcu_head *head)
 269{
 270        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 271        in_dev_put(idev);
 272}
 273
 274static void inetdev_destroy(struct in_device *in_dev)
 275{
 276        struct in_ifaddr *ifa;
 277        struct net_device *dev;
 278
 279        ASSERT_RTNL();
 280
 281        dev = in_dev->dev;
 282
 283        in_dev->dead = 1;
 284
 285        ip_mc_destroy_dev(in_dev);
 286
 287        while ((ifa = in_dev->ifa_list) != NULL) {
 288                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 289                inet_free_ifa(ifa);
 290        }
 291
 292        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 293
 294        devinet_sysctl_unregister(in_dev);
 295        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 296        arp_ifdown(dev);
 297
 298        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 299}
 300
 301int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 302{
 303        rcu_read_lock();
 304        for_primary_ifa(in_dev) {
 305                if (inet_ifa_match(a, ifa)) {
 306                        if (!b || inet_ifa_match(b, ifa)) {
 307                                rcu_read_unlock();
 308                                return 1;
 309                        }
 310                }
 311        } endfor_ifa(in_dev);
 312        rcu_read_unlock();
 313        return 0;
 314}
 315
 316static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 317                         int destroy, struct nlmsghdr *nlh, u32 portid)
 318{
 319        struct in_ifaddr *promote = NULL;
 320        struct in_ifaddr *ifa, *ifa1 = *ifap;
 321        struct in_ifaddr *last_prim = in_dev->ifa_list;
 322        struct in_ifaddr *prev_prom = NULL;
 323        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 324
 325        ASSERT_RTNL();
 326
 327        /* 1. Deleting primary ifaddr forces deletion all secondaries
 328         * unless alias promotion is set
 329         **/
 330
 331        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 332                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 333
 334                while ((ifa = *ifap1) != NULL) {
 335                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 336                            ifa1->ifa_scope <= ifa->ifa_scope)
 337                                last_prim = ifa;
 338
 339                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 340                            ifa1->ifa_mask != ifa->ifa_mask ||
 341                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 342                                ifap1 = &ifa->ifa_next;
 343                                prev_prom = ifa;
 344                                continue;
 345                        }
 346
 347                        if (!do_promote) {
 348                                inet_hash_remove(ifa);
 349                                *ifap1 = ifa->ifa_next;
 350
 351                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 352                                blocking_notifier_call_chain(&inetaddr_chain,
 353                                                NETDEV_DOWN, ifa);
 354                                inet_free_ifa(ifa);
 355                        } else {
 356                                promote = ifa;
 357                                break;
 358                        }
 359                }
 360        }
 361
 362        /* On promotion all secondaries from subnet are changing
 363         * the primary IP, we must remove all their routes silently
 364         * and later to add them back with new prefsrc. Do this
 365         * while all addresses are on the device list.
 366         */
 367        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 368                if (ifa1->ifa_mask == ifa->ifa_mask &&
 369                    inet_ifa_match(ifa1->ifa_address, ifa))
 370                        fib_del_ifaddr(ifa, ifa1);
 371        }
 372
 373        /* 2. Unlink it */
 374
 375        *ifap = ifa1->ifa_next;
 376        inet_hash_remove(ifa1);
 377
 378        /* 3. Announce address deletion */
 379
 380        /* Send message first, then call notifier.
 381           At first sight, FIB update triggered by notifier
 382           will refer to already deleted ifaddr, that could confuse
 383           netlink listeners. It is not true: look, gated sees
 384           that route deleted and if it still thinks that ifaddr
 385           is valid, it will try to restore deleted routes... Grr.
 386           So that, this order is correct.
 387         */
 388        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 389        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 390
 391        if (promote) {
 392                struct in_ifaddr *next_sec = promote->ifa_next;
 393
 394                if (prev_prom) {
 395                        prev_prom->ifa_next = promote->ifa_next;
 396                        promote->ifa_next = last_prim->ifa_next;
 397                        last_prim->ifa_next = promote;
 398                }
 399
 400                promote->ifa_flags &= ~IFA_F_SECONDARY;
 401                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 402                blocking_notifier_call_chain(&inetaddr_chain,
 403                                NETDEV_UP, promote);
 404                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 405                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 406                            !inet_ifa_match(ifa1->ifa_address, ifa))
 407                                        continue;
 408                        fib_add_ifaddr(ifa);
 409                }
 410
 411        }
 412        if (destroy)
 413                inet_free_ifa(ifa1);
 414}
 415
 416static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 417                         int destroy)
 418{
 419        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 420}
 421
 422static void check_lifetime(struct work_struct *work);
 423
 424static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 425
 426static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 427                             u32 portid)
 428{
 429        struct in_device *in_dev = ifa->ifa_dev;
 430        struct in_ifaddr *ifa1, **ifap, **last_primary;
 431
 432        ASSERT_RTNL();
 433
 434        if (!ifa->ifa_local) {
 435                inet_free_ifa(ifa);
 436                return 0;
 437        }
 438
 439        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 440        last_primary = &in_dev->ifa_list;
 441
 442        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 443             ifap = &ifa1->ifa_next) {
 444                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 445                    ifa->ifa_scope <= ifa1->ifa_scope)
 446                        last_primary = &ifa1->ifa_next;
 447                if (ifa1->ifa_mask == ifa->ifa_mask &&
 448                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 449                        if (ifa1->ifa_local == ifa->ifa_local) {
 450                                inet_free_ifa(ifa);
 451                                return -EEXIST;
 452                        }
 453                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 454                                inet_free_ifa(ifa);
 455                                return -EINVAL;
 456                        }
 457                        ifa->ifa_flags |= IFA_F_SECONDARY;
 458                }
 459        }
 460
 461        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 462                net_srandom(ifa->ifa_local);
 463                ifap = last_primary;
 464        }
 465
 466        ifa->ifa_next = *ifap;
 467        *ifap = ifa;
 468
 469        inet_hash_insert(dev_net(in_dev->dev), ifa);
 470
 471        cancel_delayed_work(&check_lifetime_work);
 472        schedule_delayed_work(&check_lifetime_work, 0);
 473
 474        /* Send message first, then call notifier.
 475           Notifier will trigger FIB update, so that
 476           listeners of netlink will know about new ifaddr */
 477        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 478        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 479
 480        return 0;
 481}
 482
 483static int inet_insert_ifa(struct in_ifaddr *ifa)
 484{
 485        return __inet_insert_ifa(ifa, NULL, 0);
 486}
 487
 488static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 489{
 490        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 491
 492        ASSERT_RTNL();
 493
 494        if (!in_dev) {
 495                inet_free_ifa(ifa);
 496                return -ENOBUFS;
 497        }
 498        ipv4_devconf_setall(in_dev);
 499        if (ifa->ifa_dev != in_dev) {
 500                WARN_ON(ifa->ifa_dev);
 501                in_dev_hold(in_dev);
 502                ifa->ifa_dev = in_dev;
 503        }
 504        if (ipv4_is_loopback(ifa->ifa_local))
 505                ifa->ifa_scope = RT_SCOPE_HOST;
 506        return inet_insert_ifa(ifa);
 507}
 508
 509/* Caller must hold RCU or RTNL :
 510 * We dont take a reference on found in_device
 511 */
 512struct in_device *inetdev_by_index(struct net *net, int ifindex)
 513{
 514        struct net_device *dev;
 515        struct in_device *in_dev = NULL;
 516
 517        rcu_read_lock();
 518        dev = dev_get_by_index_rcu(net, ifindex);
 519        if (dev)
 520                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 521        rcu_read_unlock();
 522        return in_dev;
 523}
 524EXPORT_SYMBOL(inetdev_by_index);
 525
 526/* Called only from RTNL semaphored context. No locks. */
 527
 528struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 529                                    __be32 mask)
 530{
 531        ASSERT_RTNL();
 532
 533        for_primary_ifa(in_dev) {
 534                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 535                        return ifa;
 536        } endfor_ifa(in_dev);
 537        return NULL;
 538}
 539
 540static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 541{
 542        struct net *net = sock_net(skb->sk);
 543        struct nlattr *tb[IFA_MAX+1];
 544        struct in_device *in_dev;
 545        struct ifaddrmsg *ifm;
 546        struct in_ifaddr *ifa, **ifap;
 547        int err = -EINVAL;
 548
 549        ASSERT_RTNL();
 550
 551        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 552        if (err < 0)
 553                goto errout;
 554
 555        ifm = nlmsg_data(nlh);
 556        in_dev = inetdev_by_index(net, ifm->ifa_index);
 557        if (in_dev == NULL) {
 558                err = -ENODEV;
 559                goto errout;
 560        }
 561
 562        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 563             ifap = &ifa->ifa_next) {
 564                if (tb[IFA_LOCAL] &&
 565                    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
 566                        continue;
 567
 568                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 569                        continue;
 570
 571                if (tb[IFA_ADDRESS] &&
 572                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 573                    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 574                        continue;
 575
 576                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 577                return 0;
 578        }
 579
 580        err = -EADDRNOTAVAIL;
 581errout:
 582        return err;
 583}
 584
 585#define INFINITY_LIFE_TIME      0xFFFFFFFF
 586
 587static void check_lifetime(struct work_struct *work)
 588{
 589        unsigned long now, next, next_sec, next_sched;
 590        struct in_ifaddr *ifa;
 591        struct hlist_node *n;
 592        int i;
 593
 594        now = jiffies;
 595        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 596
 597        for (i = 0; i < IN4_ADDR_HSIZE; i++) {
 598                bool change_needed = false;
 599
 600                rcu_read_lock();
 601                hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 602                        unsigned long age;
 603
 604                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 605                                continue;
 606
 607                        /* We try to batch several events at once. */
 608                        age = (now - ifa->ifa_tstamp +
 609                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 610
 611                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 612                            age >= ifa->ifa_valid_lft) {
 613                                change_needed = true;
 614                        } else if (ifa->ifa_preferred_lft ==
 615                                   INFINITY_LIFE_TIME) {
 616                                continue;
 617                        } else if (age >= ifa->ifa_preferred_lft) {
 618                                if (time_before(ifa->ifa_tstamp +
 619                                                ifa->ifa_valid_lft * HZ, next))
 620                                        next = ifa->ifa_tstamp +
 621                                               ifa->ifa_valid_lft * HZ;
 622
 623                                if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
 624                                        change_needed = true;
 625                        } else if (time_before(ifa->ifa_tstamp +
 626                                               ifa->ifa_preferred_lft * HZ,
 627                                               next)) {
 628                                next = ifa->ifa_tstamp +
 629                                       ifa->ifa_preferred_lft * HZ;
 630                        }
 631                }
 632                rcu_read_unlock();
 633                if (!change_needed)
 634                        continue;
 635                rtnl_lock();
 636                hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
 637                        unsigned long age;
 638
 639                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 640                                continue;
 641
 642                        /* We try to batch several events at once. */
 643                        age = (now - ifa->ifa_tstamp +
 644                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 645
 646                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 647                            age >= ifa->ifa_valid_lft) {
 648                                struct in_ifaddr **ifap;
 649
 650                                for (ifap = &ifa->ifa_dev->ifa_list;
 651                                     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
 652                                        if (*ifap == ifa) {
 653                                                inet_del_ifa(ifa->ifa_dev,
 654                                                             ifap, 1);
 655                                                break;
 656                                        }
 657                                }
 658                        } else if (ifa->ifa_preferred_lft !=
 659                                   INFINITY_LIFE_TIME &&
 660                                   age >= ifa->ifa_preferred_lft &&
 661                                   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
 662                                ifa->ifa_flags |= IFA_F_DEPRECATED;
 663                                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 664                        }
 665                }
 666                rtnl_unlock();
 667        }
 668
 669        next_sec = round_jiffies_up(next);
 670        next_sched = next;
 671
 672        /* If rounded timeout is accurate enough, accept it. */
 673        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
 674                next_sched = next_sec;
 675
 676        now = jiffies;
 677        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
 678        if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
 679                next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
 680
 681        schedule_delayed_work(&check_lifetime_work, next_sched - now);
 682}
 683
 684static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
 685                             __u32 prefered_lft)
 686{
 687        unsigned long timeout;
 688
 689        ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
 690
 691        timeout = addrconf_timeout_fixup(valid_lft, HZ);
 692        if (addrconf_finite_timeout(timeout))
 693                ifa->ifa_valid_lft = timeout;
 694        else
 695                ifa->ifa_flags |= IFA_F_PERMANENT;
 696
 697        timeout = addrconf_timeout_fixup(prefered_lft, HZ);
 698        if (addrconf_finite_timeout(timeout)) {
 699                if (timeout == 0)
 700                        ifa->ifa_flags |= IFA_F_DEPRECATED;
 701                ifa->ifa_preferred_lft = timeout;
 702        }
 703        ifa->ifa_tstamp = jiffies;
 704        if (!ifa->ifa_cstamp)
 705                ifa->ifa_cstamp = ifa->ifa_tstamp;
 706}
 707
 708static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 709                                       __u32 *pvalid_lft, __u32 *pprefered_lft)
 710{
 711        struct nlattr *tb[IFA_MAX+1];
 712        struct in_ifaddr *ifa;
 713        struct ifaddrmsg *ifm;
 714        struct net_device *dev;
 715        struct in_device *in_dev;
 716        int err;
 717
 718        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 719        if (err < 0)
 720                goto errout;
 721
 722        ifm = nlmsg_data(nlh);
 723        err = -EINVAL;
 724        if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
 725                goto errout;
 726
 727        dev = __dev_get_by_index(net, ifm->ifa_index);
 728        err = -ENODEV;
 729        if (dev == NULL)
 730                goto errout;
 731
 732        in_dev = __in_dev_get_rtnl(dev);
 733        err = -ENOBUFS;
 734        if (in_dev == NULL)
 735                goto errout;
 736
 737        ifa = inet_alloc_ifa();
 738        if (ifa == NULL)
 739                /*
 740                 * A potential indev allocation can be left alive, it stays
 741                 * assigned to its device and is destroy with it.
 742                 */
 743                goto errout;
 744
 745        ipv4_devconf_setall(in_dev);
 746        in_dev_hold(in_dev);
 747
 748        if (tb[IFA_ADDRESS] == NULL)
 749                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 750
 751        INIT_HLIST_NODE(&ifa->hash);
 752        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 753        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 754        ifa->ifa_flags = ifm->ifa_flags;
 755        ifa->ifa_scope = ifm->ifa_scope;
 756        ifa->ifa_dev = in_dev;
 757
 758        ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
 759        ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
 760
 761        if (tb[IFA_BROADCAST])
 762                ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 763
 764        if (tb[IFA_LABEL])
 765                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 766        else
 767                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 768
 769        if (tb[IFA_CACHEINFO]) {
 770                struct ifa_cacheinfo *ci;
 771
 772                ci = nla_data(tb[IFA_CACHEINFO]);
 773                if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 774                        err = -EINVAL;
 775                        goto errout_free;
 776                }
 777                *pvalid_lft = ci->ifa_valid;
 778                *pprefered_lft = ci->ifa_prefered;
 779        }
 780
 781        return ifa;
 782
 783errout_free:
 784        inet_free_ifa(ifa);
 785errout:
 786        return ERR_PTR(err);
 787}
 788
 789static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 790{
 791        struct in_device *in_dev = ifa->ifa_dev;
 792        struct in_ifaddr *ifa1, **ifap;
 793
 794        if (!ifa->ifa_local)
 795                return NULL;
 796
 797        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 798             ifap = &ifa1->ifa_next) {
 799                if (ifa1->ifa_mask == ifa->ifa_mask &&
 800                    inet_ifa_match(ifa1->ifa_address, ifa) &&
 801                    ifa1->ifa_local == ifa->ifa_local)
 802                        return ifa1;
 803        }
 804        return NULL;
 805}
 806
 807static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 808{
 809        struct net *net = sock_net(skb->sk);
 810        struct in_ifaddr *ifa;
 811        struct in_ifaddr *ifa_existing;
 812        __u32 valid_lft = INFINITY_LIFE_TIME;
 813        __u32 prefered_lft = INFINITY_LIFE_TIME;
 814
 815        ASSERT_RTNL();
 816
 817        ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 818        if (IS_ERR(ifa))
 819                return PTR_ERR(ifa);
 820
 821        ifa_existing = find_matching_ifa(ifa);
 822        if (!ifa_existing) {
 823                /* It would be best to check for !NLM_F_CREATE here but
 824                 * userspace alreay relies on not having to provide this.
 825                 */
 826                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 827                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 828        } else {
 829                inet_free_ifa(ifa);
 830
 831                if (nlh->nlmsg_flags & NLM_F_EXCL ||
 832                    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 833                        return -EEXIST;
 834                ifa = ifa_existing;
 835                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 836                cancel_delayed_work(&check_lifetime_work);
 837                schedule_delayed_work(&check_lifetime_work, 0);
 838                rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
 839                blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 840        }
 841        return 0;
 842}
 843
 844/*
 845 *      Determine a default network mask, based on the IP address.
 846 */
 847
 848static int inet_abc_len(__be32 addr)
 849{
 850        int rc = -1;    /* Something else, probably a multicast. */
 851
 852        if (ipv4_is_zeronet(addr))
 853                rc = 0;
 854        else {
 855                __u32 haddr = ntohl(addr);
 856
 857                if (IN_CLASSA(haddr))
 858                        rc = 8;
 859                else if (IN_CLASSB(haddr))
 860                        rc = 16;
 861                else if (IN_CLASSC(haddr))
 862                        rc = 24;
 863        }
 864
 865        return rc;
 866}
 867
 868
 869int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 870{
 871        struct ifreq ifr;
 872        struct sockaddr_in sin_orig;
 873        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 874        struct in_device *in_dev;
 875        struct in_ifaddr **ifap = NULL;
 876        struct in_ifaddr *ifa = NULL;
 877        struct net_device *dev;
 878        char *colon;
 879        int ret = -EFAULT;
 880        int tryaddrmatch = 0;
 881
 882        /*
 883         *      Fetch the caller's info block into kernel space
 884         */
 885
 886        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 887                goto out;
 888        ifr.ifr_name[IFNAMSIZ - 1] = 0;
 889
 890        /* save original address for comparison */
 891        memcpy(&sin_orig, sin, sizeof(*sin));
 892
 893        colon = strchr(ifr.ifr_name, ':');
 894        if (colon)
 895                *colon = 0;
 896
 897        dev_load(net, ifr.ifr_name);
 898
 899        switch (cmd) {
 900        case SIOCGIFADDR:       /* Get interface address */
 901        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 902        case SIOCGIFDSTADDR:    /* Get the destination address */
 903        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 904                /* Note that these ioctls will not sleep,
 905                   so that we do not impose a lock.
 906                   One day we will be forced to put shlock here (I mean SMP)
 907                 */
 908                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 909                memset(sin, 0, sizeof(*sin));
 910                sin->sin_family = AF_INET;
 911                break;
 912
 913        case SIOCSIFFLAGS:
 914                ret = -EPERM;
 915                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 916                        goto out;
 917                break;
 918        case SIOCSIFADDR:       /* Set interface address (and family) */
 919        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 920        case SIOCSIFDSTADDR:    /* Set the destination address */
 921        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 922                ret = -EPERM;
 923                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 924                        goto out;
 925                ret = -EINVAL;
 926                if (sin->sin_family != AF_INET)
 927                        goto out;
 928                break;
 929        default:
 930                ret = -EINVAL;
 931                goto out;
 932        }
 933
 934        rtnl_lock();
 935
 936        ret = -ENODEV;
 937        dev = __dev_get_by_name(net, ifr.ifr_name);
 938        if (!dev)
 939                goto done;
 940
 941        if (colon)
 942                *colon = ':';
 943
 944        in_dev = __in_dev_get_rtnl(dev);
 945        if (in_dev) {
 946                if (tryaddrmatch) {
 947                        /* Matthias Andree */
 948                        /* compare label and address (4.4BSD style) */
 949                        /* note: we only do this for a limited set of ioctls
 950                           and only if the original address family was AF_INET.
 951                           This is checked above. */
 952                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 953                             ifap = &ifa->ifa_next) {
 954                                if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 955                                    sin_orig.sin_addr.s_addr ==
 956                                                        ifa->ifa_local) {
 957                                        break; /* found */
 958                                }
 959                        }
 960                }
 961                /* we didn't get a match, maybe the application is
 962                   4.3BSD-style and passed in junk so we fall back to
 963                   comparing just the label */
 964                if (!ifa) {
 965                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 966                             ifap = &ifa->ifa_next)
 967                                if (!strcmp(ifr.ifr_name, ifa->ifa_label))
 968                                        break;
 969                }
 970        }
 971
 972        ret = -EADDRNOTAVAIL;
 973        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 974                goto done;
 975
 976        switch (cmd) {
 977        case SIOCGIFADDR:       /* Get interface address */
 978                sin->sin_addr.s_addr = ifa->ifa_local;
 979                goto rarok;
 980
 981        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 982                sin->sin_addr.s_addr = ifa->ifa_broadcast;
 983                goto rarok;
 984
 985        case SIOCGIFDSTADDR:    /* Get the destination address */
 986                sin->sin_addr.s_addr = ifa->ifa_address;
 987                goto rarok;
 988
 989        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 990                sin->sin_addr.s_addr = ifa->ifa_mask;
 991                goto rarok;
 992
 993        case SIOCSIFFLAGS:
 994                if (colon) {
 995                        ret = -EADDRNOTAVAIL;
 996                        if (!ifa)
 997                                break;
 998                        ret = 0;
 999                        if (!(ifr.ifr_flags & IFF_UP))
1000                                inet_del_ifa(in_dev, ifap, 1);
1001                        break;
1002                }
1003                ret = dev_change_flags(dev, ifr.ifr_flags);
1004                break;
1005
1006        case SIOCSIFADDR:       /* Set interface address (and family) */
1007                ret = -EINVAL;
1008                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1009                        break;
1010
1011                if (!ifa) {
1012                        ret = -ENOBUFS;
1013                        ifa = inet_alloc_ifa();
1014                        if (!ifa)
1015                                break;
1016                        INIT_HLIST_NODE(&ifa->hash);
1017                        if (colon)
1018                                memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1019                        else
1020                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1021                } else {
1022                        ret = 0;
1023                        if (ifa->ifa_local == sin->sin_addr.s_addr)
1024                                break;
1025                        inet_del_ifa(in_dev, ifap, 0);
1026                        ifa->ifa_broadcast = 0;
1027                        ifa->ifa_scope = 0;
1028                }
1029
1030                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1031
1032                if (!(dev->flags & IFF_POINTOPOINT)) {
1033                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1034                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1035                        if ((dev->flags & IFF_BROADCAST) &&
1036                            ifa->ifa_prefixlen < 31)
1037                                ifa->ifa_broadcast = ifa->ifa_address |
1038                                                     ~ifa->ifa_mask;
1039                } else {
1040                        ifa->ifa_prefixlen = 32;
1041                        ifa->ifa_mask = inet_make_mask(32);
1042                }
1043                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1044                ret = inet_set_ifa(dev, ifa);
1045                break;
1046
1047        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1048                ret = 0;
1049                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1050                        inet_del_ifa(in_dev, ifap, 0);
1051                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
1052                        inet_insert_ifa(ifa);
1053                }
1054                break;
1055
1056        case SIOCSIFDSTADDR:    /* Set the destination address */
1057                ret = 0;
1058                if (ifa->ifa_address == sin->sin_addr.s_addr)
1059                        break;
1060                ret = -EINVAL;
1061                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1062                        break;
1063                ret = 0;
1064                inet_del_ifa(in_dev, ifap, 0);
1065                ifa->ifa_address = sin->sin_addr.s_addr;
1066                inet_insert_ifa(ifa);
1067                break;
1068
1069        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1070
1071                /*
1072                 *      The mask we set must be legal.
1073                 */
1074                ret = -EINVAL;
1075                if (bad_mask(sin->sin_addr.s_addr, 0))
1076                        break;
1077                ret = 0;
1078                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1079                        __be32 old_mask = ifa->ifa_mask;
1080                        inet_del_ifa(in_dev, ifap, 0);
1081                        ifa->ifa_mask = sin->sin_addr.s_addr;
1082                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1083
1084                        /* See if current broadcast address matches
1085                         * with current netmask, then recalculate
1086                         * the broadcast address. Otherwise it's a
1087                         * funny address, so don't touch it since
1088                         * the user seems to know what (s)he's doing...
1089                         */
1090                        if ((dev->flags & IFF_BROADCAST) &&
1091                            (ifa->ifa_prefixlen < 31) &&
1092                            (ifa->ifa_broadcast ==
1093                             (ifa->ifa_local|~old_mask))) {
1094                                ifa->ifa_broadcast = (ifa->ifa_local |
1095                                                      ~sin->sin_addr.s_addr);
1096                        }
1097                        inet_insert_ifa(ifa);
1098                }
1099                break;
1100        }
1101done:
1102        rtnl_unlock();
1103out:
1104        return ret;
1105rarok:
1106        rtnl_unlock();
1107        ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1108        goto out;
1109}
1110
1111static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1112{
1113        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1114        struct in_ifaddr *ifa;
1115        struct ifreq ifr;
1116        int done = 0;
1117
1118        if (!in_dev)
1119                goto out;
1120
1121        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1122                if (!buf) {
1123                        done += sizeof(ifr);
1124                        continue;
1125                }
1126                if (len < (int) sizeof(ifr))
1127                        break;
1128                memset(&ifr, 0, sizeof(struct ifreq));
1129                if (ifa->ifa_label)
1130                        strcpy(ifr.ifr_name, ifa->ifa_label);
1131                else
1132                        strcpy(ifr.ifr_name, dev->name);
1133
1134                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1135                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1136                                                                ifa->ifa_local;
1137
1138                if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1139                        done = -EFAULT;
1140                        break;
1141                }
1142                buf  += sizeof(struct ifreq);
1143                len  -= sizeof(struct ifreq);
1144                done += sizeof(struct ifreq);
1145        }
1146out:
1147        return done;
1148}
1149
1150__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1151{
1152        __be32 addr = 0;
1153        struct in_device *in_dev;
1154        struct net *net = dev_net(dev);
1155
1156        rcu_read_lock();
1157        in_dev = __in_dev_get_rcu(dev);
1158        if (!in_dev)
1159                goto no_in_dev;
1160
1161        for_primary_ifa(in_dev) {
1162                if (ifa->ifa_scope > scope)
1163                        continue;
1164                if (!dst || inet_ifa_match(dst, ifa)) {
1165                        addr = ifa->ifa_local;
1166                        break;
1167                }
1168                if (!addr)
1169                        addr = ifa->ifa_local;
1170        } endfor_ifa(in_dev);
1171
1172        if (addr)
1173                goto out_unlock;
1174no_in_dev:
1175
1176        /* Not loopback addresses on loopback should be preferred
1177           in this case. It is importnat that lo is the first interface
1178           in dev_base list.
1179         */
1180        for_each_netdev_rcu(net, dev) {
1181                in_dev = __in_dev_get_rcu(dev);
1182                if (!in_dev)
1183                        continue;
1184
1185                for_primary_ifa(in_dev) {
1186                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
1187                            ifa->ifa_scope <= scope) {
1188                                addr = ifa->ifa_local;
1189                                goto out_unlock;
1190                        }
1191                } endfor_ifa(in_dev);
1192        }
1193out_unlock:
1194        rcu_read_unlock();
1195        return addr;
1196}
1197EXPORT_SYMBOL(inet_select_addr);
1198
1199static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1200                              __be32 local, int scope)
1201{
1202        int same = 0;
1203        __be32 addr = 0;
1204
1205        for_ifa(in_dev) {
1206                if (!addr &&
1207                    (local == ifa->ifa_local || !local) &&
1208                    ifa->ifa_scope <= scope) {
1209                        addr = ifa->ifa_local;
1210                        if (same)
1211                                break;
1212                }
1213                if (!same) {
1214                        same = (!local || inet_ifa_match(local, ifa)) &&
1215                                (!dst || inet_ifa_match(dst, ifa));
1216                        if (same && addr) {
1217                                if (local || !dst)
1218                                        break;
1219                                /* Is the selected addr into dst subnet? */
1220                                if (inet_ifa_match(addr, ifa))
1221                                        break;
1222                                /* No, then can we use new local src? */
1223                                if (ifa->ifa_scope <= scope) {
1224                                        addr = ifa->ifa_local;
1225                                        break;
1226                                }
1227                                /* search for large dst subnet for addr */
1228                                same = 0;
1229                        }
1230                }
1231        } endfor_ifa(in_dev);
1232
1233        return same ? addr : 0;
1234}
1235
1236/*
1237 * Confirm that local IP address exists using wildcards:
1238 * - in_dev: only on this interface, 0=any interface
1239 * - dst: only in the same subnet as dst, 0=any dst
1240 * - local: address, 0=autoselect the local address
1241 * - scope: maximum allowed scope value for the local address
1242 */
1243__be32 inet_confirm_addr(struct in_device *in_dev,
1244                         __be32 dst, __be32 local, int scope)
1245{
1246        __be32 addr = 0;
1247        struct net_device *dev;
1248        struct net *net;
1249
1250        if (scope != RT_SCOPE_LINK)
1251                return confirm_addr_indev(in_dev, dst, local, scope);
1252
1253        net = dev_net(in_dev->dev);
1254        rcu_read_lock();
1255        for_each_netdev_rcu(net, dev) {
1256                in_dev = __in_dev_get_rcu(dev);
1257                if (in_dev) {
1258                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1259                        if (addr)
1260                                break;
1261                }
1262        }
1263        rcu_read_unlock();
1264
1265        return addr;
1266}
1267EXPORT_SYMBOL(inet_confirm_addr);
1268
1269/*
1270 *      Device notifier
1271 */
1272
1273int register_inetaddr_notifier(struct notifier_block *nb)
1274{
1275        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1276}
1277EXPORT_SYMBOL(register_inetaddr_notifier);
1278
1279int unregister_inetaddr_notifier(struct notifier_block *nb)
1280{
1281        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1282}
1283EXPORT_SYMBOL(unregister_inetaddr_notifier);
1284
1285/* Rename ifa_labels for a device name change. Make some effort to preserve
1286 * existing alias numbering and to create unique labels if possible.
1287*/
1288static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1289{
1290        struct in_ifaddr *ifa;
1291        int named = 0;
1292
1293        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1294                char old[IFNAMSIZ], *dot;
1295
1296                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1297                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1298                if (named++ == 0)
1299                        goto skip;
1300                dot = strchr(old, ':');
1301                if (dot == NULL) {
1302                        sprintf(old, ":%d", named);
1303                        dot = old;
1304                }
1305                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1306                        strcat(ifa->ifa_label, dot);
1307                else
1308                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1309skip:
1310                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1311        }
1312}
1313
1314static bool inetdev_valid_mtu(unsigned int mtu)
1315{
1316        return mtu >= 68;
1317}
1318
1319static void inetdev_send_gratuitous_arp(struct net_device *dev,
1320                                        struct in_device *in_dev)
1321
1322{
1323        struct in_ifaddr *ifa;
1324
1325        for (ifa = in_dev->ifa_list; ifa;
1326             ifa = ifa->ifa_next) {
1327                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1328                         ifa->ifa_local, dev,
1329                         ifa->ifa_local, NULL,
1330                         dev->dev_addr, NULL);
1331        }
1332}
1333
1334/* Called only under RTNL semaphore */
1335
1336static int inetdev_event(struct notifier_block *this, unsigned long event,
1337                         void *ptr)
1338{
1339        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1340        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1341
1342        ASSERT_RTNL();
1343
1344        if (!in_dev) {
1345                if (event == NETDEV_REGISTER) {
1346                        in_dev = inetdev_init(dev);
1347                        if (!in_dev)
1348                                return notifier_from_errno(-ENOMEM);
1349                        if (dev->flags & IFF_LOOPBACK) {
1350                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1351                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1352                        }
1353                } else if (event == NETDEV_CHANGEMTU) {
1354                        /* Re-enabling IP */
1355                        if (inetdev_valid_mtu(dev->mtu))
1356                                in_dev = inetdev_init(dev);
1357                }
1358                goto out;
1359        }
1360
1361        switch (event) {
1362        case NETDEV_REGISTER:
1363                pr_debug("%s: bug\n", __func__);
1364                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1365                break;
1366        case NETDEV_UP:
1367                if (!inetdev_valid_mtu(dev->mtu))
1368                        break;
1369                if (dev->flags & IFF_LOOPBACK) {
1370                        struct in_ifaddr *ifa = inet_alloc_ifa();
1371
1372                        if (ifa) {
1373                                INIT_HLIST_NODE(&ifa->hash);
1374                                ifa->ifa_local =
1375                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1376                                ifa->ifa_prefixlen = 8;
1377                                ifa->ifa_mask = inet_make_mask(8);
1378                                in_dev_hold(in_dev);
1379                                ifa->ifa_dev = in_dev;
1380                                ifa->ifa_scope = RT_SCOPE_HOST;
1381                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1382                                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1383                                                 INFINITY_LIFE_TIME);
1384                                inet_insert_ifa(ifa);
1385                        }
1386                }
1387                ip_mc_up(in_dev);
1388                /* fall through */
1389        case NETDEV_CHANGEADDR:
1390                if (!IN_DEV_ARP_NOTIFY(in_dev))
1391                        break;
1392                /* fall through */
1393        case NETDEV_NOTIFY_PEERS:
1394                /* Send gratuitous ARP to notify of link change */
1395                inetdev_send_gratuitous_arp(dev, in_dev);
1396                break;
1397        case NETDEV_DOWN:
1398                ip_mc_down(in_dev);
1399                break;
1400        case NETDEV_PRE_TYPE_CHANGE:
1401                ip_mc_unmap(in_dev);
1402                break;
1403        case NETDEV_POST_TYPE_CHANGE:
1404                ip_mc_remap(in_dev);
1405                break;
1406        case NETDEV_CHANGEMTU:
1407                if (inetdev_valid_mtu(dev->mtu))
1408                        break;
1409                /* disable IP when MTU is not enough */
1410        case NETDEV_UNREGISTER:
1411                inetdev_destroy(in_dev);
1412                break;
1413        case NETDEV_CHANGENAME:
1414                /* Do not notify about label change, this event is
1415                 * not interesting to applications using netlink.
1416                 */
1417                inetdev_changename(dev, in_dev);
1418
1419                devinet_sysctl_unregister(in_dev);
1420                devinet_sysctl_register(in_dev);
1421                break;
1422        }
1423out:
1424        return NOTIFY_DONE;
1425}
1426
1427static struct notifier_block ip_netdev_notifier = {
1428        .notifier_call = inetdev_event,
1429};
1430
1431static size_t inet_nlmsg_size(void)
1432{
1433        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1434               + nla_total_size(4) /* IFA_ADDRESS */
1435               + nla_total_size(4) /* IFA_LOCAL */
1436               + nla_total_size(4) /* IFA_BROADCAST */
1437               + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1438}
1439
1440static inline u32 cstamp_delta(unsigned long cstamp)
1441{
1442        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1443}
1444
1445static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1446                         unsigned long tstamp, u32 preferred, u32 valid)
1447{
1448        struct ifa_cacheinfo ci;
1449
1450        ci.cstamp = cstamp_delta(cstamp);
1451        ci.tstamp = cstamp_delta(tstamp);
1452        ci.ifa_prefered = preferred;
1453        ci.ifa_valid = valid;
1454
1455        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1456}
1457
1458static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1459                            u32 portid, u32 seq, int event, unsigned int flags)
1460{
1461        struct ifaddrmsg *ifm;
1462        struct nlmsghdr  *nlh;
1463        u32 preferred, valid;
1464
1465        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1466        if (nlh == NULL)
1467                return -EMSGSIZE;
1468
1469        ifm = nlmsg_data(nlh);
1470        ifm->ifa_family = AF_INET;
1471        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1472        ifm->ifa_flags = ifa->ifa_flags;
1473        ifm->ifa_scope = ifa->ifa_scope;
1474        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1475
1476        if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1477                preferred = ifa->ifa_preferred_lft;
1478                valid = ifa->ifa_valid_lft;
1479                if (preferred != INFINITY_LIFE_TIME) {
1480                        long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1481
1482                        if (preferred > tval)
1483                                preferred -= tval;
1484                        else
1485                                preferred = 0;
1486                        if (valid != INFINITY_LIFE_TIME) {
1487                                if (valid > tval)
1488                                        valid -= tval;
1489                                else
1490                                        valid = 0;
1491                        }
1492                }
1493        } else {
1494                preferred = INFINITY_LIFE_TIME;
1495                valid = INFINITY_LIFE_TIME;
1496        }
1497        if ((ifa->ifa_address &&
1498             nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1499            (ifa->ifa_local &&
1500             nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1501            (ifa->ifa_broadcast &&
1502             nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1503            (ifa->ifa_label[0] &&
1504             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1505            put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1506                          preferred, valid))
1507                goto nla_put_failure;
1508
1509        return nlmsg_end(skb, nlh);
1510
1511nla_put_failure:
1512        nlmsg_cancel(skb, nlh);
1513        return -EMSGSIZE;
1514}
1515
1516static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1517{
1518        struct net *net = sock_net(skb->sk);
1519        int h, s_h;
1520        int idx, s_idx;
1521        int ip_idx, s_ip_idx;
1522        struct net_device *dev;
1523        struct in_device *in_dev;
1524        struct in_ifaddr *ifa;
1525        struct hlist_head *head;
1526
1527        s_h = cb->args[0];
1528        s_idx = idx = cb->args[1];
1529        s_ip_idx = ip_idx = cb->args[2];
1530
1531        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1532                idx = 0;
1533                head = &net->dev_index_head[h];
1534                rcu_read_lock();
1535                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1536                          net->dev_base_seq;
1537                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1538                        if (idx < s_idx)
1539                                goto cont;
1540                        if (h > s_h || idx > s_idx)
1541                                s_ip_idx = 0;
1542                        in_dev = __in_dev_get_rcu(dev);
1543                        if (!in_dev)
1544                                goto cont;
1545
1546                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1547                             ifa = ifa->ifa_next, ip_idx++) {
1548                                if (ip_idx < s_ip_idx)
1549                                        continue;
1550                                if (inet_fill_ifaddr(skb, ifa,
1551                                             NETLINK_CB(cb->skb).portid,
1552                                             cb->nlh->nlmsg_seq,
1553                                             RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1554                                        rcu_read_unlock();
1555                                        goto done;
1556                                }
1557                                nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1558                        }
1559cont:
1560                        idx++;
1561                }
1562                rcu_read_unlock();
1563        }
1564
1565done:
1566        cb->args[0] = h;
1567        cb->args[1] = idx;
1568        cb->args[2] = ip_idx;
1569
1570        return skb->len;
1571}
1572
1573static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1574                      u32 portid)
1575{
1576        struct sk_buff *skb;
1577        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1578        int err = -ENOBUFS;
1579        struct net *net;
1580
1581        net = dev_net(ifa->ifa_dev->dev);
1582        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1583        if (skb == NULL)
1584                goto errout;
1585
1586        err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1587        if (err < 0) {
1588                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1589                WARN_ON(err == -EMSGSIZE);
1590                kfree_skb(skb);
1591                goto errout;
1592        }
1593        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1594        return;
1595errout:
1596        if (err < 0)
1597                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1598}
1599
1600static size_t inet_get_link_af_size(const struct net_device *dev)
1601{
1602        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1603
1604        if (!in_dev)
1605                return 0;
1606
1607        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1608}
1609
1610static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1611{
1612        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1613        struct nlattr *nla;
1614        int i;
1615
1616        if (!in_dev)
1617                return -ENODATA;
1618
1619        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1620        if (nla == NULL)
1621                return -EMSGSIZE;
1622
1623        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1624                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1625
1626        return 0;
1627}
1628
1629static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1630        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1631};
1632
1633static int inet_validate_link_af(const struct net_device *dev,
1634                                 const struct nlattr *nla)
1635{
1636        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1637        int err, rem;
1638
1639        if (dev && !__in_dev_get_rtnl(dev))
1640                return -EAFNOSUPPORT;
1641
1642        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1643        if (err < 0)
1644                return err;
1645
1646        if (tb[IFLA_INET_CONF]) {
1647                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1648                        int cfgid = nla_type(a);
1649
1650                        if (nla_len(a) < 4)
1651                                return -EINVAL;
1652
1653                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1654                                return -EINVAL;
1655                }
1656        }
1657
1658        return 0;
1659}
1660
1661static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1662{
1663        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1664        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1665        int rem;
1666
1667        if (!in_dev)
1668                return -EAFNOSUPPORT;
1669
1670        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1671                BUG();
1672
1673        if (tb[IFLA_INET_CONF]) {
1674                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1675                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1676        }
1677
1678        return 0;
1679}
1680
1681static int inet_netconf_msgsize_devconf(int type)
1682{
1683        int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1684                   + nla_total_size(4); /* NETCONFA_IFINDEX */
1685
1686        /* type -1 is used for ALL */
1687        if (type == -1 || type == NETCONFA_FORWARDING)
1688                size += nla_total_size(4);
1689        if (type == -1 || type == NETCONFA_RP_FILTER)
1690                size += nla_total_size(4);
1691        if (type == -1 || type == NETCONFA_MC_FORWARDING)
1692                size += nla_total_size(4);
1693
1694        return size;
1695}
1696
1697static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1698                                     struct ipv4_devconf *devconf, u32 portid,
1699                                     u32 seq, int event, unsigned int flags,
1700                                     int type)
1701{
1702        struct nlmsghdr  *nlh;
1703        struct netconfmsg *ncm;
1704
1705        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1706                        flags);
1707        if (nlh == NULL)
1708                return -EMSGSIZE;
1709
1710        ncm = nlmsg_data(nlh);
1711        ncm->ncm_family = AF_INET;
1712
1713        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1714                goto nla_put_failure;
1715
1716        /* type -1 is used for ALL */
1717        if ((type == -1 || type == NETCONFA_FORWARDING) &&
1718            nla_put_s32(skb, NETCONFA_FORWARDING,
1719                        IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1720                goto nla_put_failure;
1721        if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1722            nla_put_s32(skb, NETCONFA_RP_FILTER,
1723                        IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1724                goto nla_put_failure;
1725        if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1726            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1727                        IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1728                goto nla_put_failure;
1729
1730        return nlmsg_end(skb, nlh);
1731
1732nla_put_failure:
1733        nlmsg_cancel(skb, nlh);
1734        return -EMSGSIZE;
1735}
1736
1737void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1738                                 struct ipv4_devconf *devconf)
1739{
1740        struct sk_buff *skb;
1741        int err = -ENOBUFS;
1742
1743        skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1744        if (skb == NULL)
1745                goto errout;
1746
1747        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1748                                        RTM_NEWNETCONF, 0, type);
1749        if (err < 0) {
1750                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1751                WARN_ON(err == -EMSGSIZE);
1752                kfree_skb(skb);
1753                goto errout;
1754        }
1755        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1756        return;
1757errout:
1758        if (err < 0)
1759                rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1760}
1761
1762static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1763        [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1764        [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1765        [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1766};
1767
1768static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1769                                    struct nlmsghdr *nlh)
1770{
1771        struct net *net = sock_net(in_skb->sk);
1772        struct nlattr *tb[NETCONFA_MAX+1];
1773        struct netconfmsg *ncm;
1774        struct sk_buff *skb;
1775        struct ipv4_devconf *devconf;
1776        struct in_device *in_dev;
1777        struct net_device *dev;
1778        int ifindex;
1779        int err;
1780
1781        err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1782                          devconf_ipv4_policy);
1783        if (err < 0)
1784                goto errout;
1785
1786        err = EINVAL;
1787        if (!tb[NETCONFA_IFINDEX])
1788                goto errout;
1789
1790        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1791        switch (ifindex) {
1792        case NETCONFA_IFINDEX_ALL:
1793                devconf = net->ipv4.devconf_all;
1794                break;
1795        case NETCONFA_IFINDEX_DEFAULT:
1796                devconf = net->ipv4.devconf_dflt;
1797                break;
1798        default:
1799                dev = __dev_get_by_index(net, ifindex);
1800                if (dev == NULL)
1801                        goto errout;
1802                in_dev = __in_dev_get_rtnl(dev);
1803                if (in_dev == NULL)
1804                        goto errout;
1805                devconf = &in_dev->cnf;
1806                break;
1807        }
1808
1809        err = -ENOBUFS;
1810        skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1811        if (skb == NULL)
1812                goto errout;
1813
1814        err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1815                                        NETLINK_CB(in_skb).portid,
1816                                        nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1817                                        -1);
1818        if (err < 0) {
1819                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1820                WARN_ON(err == -EMSGSIZE);
1821                kfree_skb(skb);
1822                goto errout;
1823        }
1824        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1825errout:
1826        return err;
1827}
1828
1829static int inet_netconf_dump_devconf(struct sk_buff *skb,
1830                                     struct netlink_callback *cb)
1831{
1832        struct net *net = sock_net(skb->sk);
1833        int h, s_h;
1834        int idx, s_idx;
1835        struct net_device *dev;
1836        struct in_device *in_dev;
1837        struct hlist_head *head;
1838
1839        s_h = cb->args[0];
1840        s_idx = idx = cb->args[1];
1841
1842        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1843                idx = 0;
1844                head = &net->dev_index_head[h];
1845                rcu_read_lock();
1846                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1847                          net->dev_base_seq;
1848                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1849                        if (idx < s_idx)
1850                                goto cont;
1851                        in_dev = __in_dev_get_rcu(dev);
1852                        if (!in_dev)
1853                                goto cont;
1854
1855                        if (inet_netconf_fill_devconf(skb, dev->ifindex,
1856                                                      &in_dev->cnf,
1857                                                      NETLINK_CB(cb->skb).portid,
1858                                                      cb->nlh->nlmsg_seq,
1859                                                      RTM_NEWNETCONF,
1860                                                      NLM_F_MULTI,
1861                                                      -1) <= 0) {
1862                                rcu_read_unlock();
1863                                goto done;
1864                        }
1865                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1866cont:
1867                        idx++;
1868                }
1869                rcu_read_unlock();
1870        }
1871        if (h == NETDEV_HASHENTRIES) {
1872                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1873                                              net->ipv4.devconf_all,
1874                                              NETLINK_CB(cb->skb).portid,
1875                                              cb->nlh->nlmsg_seq,
1876                                              RTM_NEWNETCONF, NLM_F_MULTI,
1877                                              -1) <= 0)
1878                        goto done;
1879                else
1880                        h++;
1881        }
1882        if (h == NETDEV_HASHENTRIES + 1) {
1883                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1884                                              net->ipv4.devconf_dflt,
1885                                              NETLINK_CB(cb->skb).portid,
1886                                              cb->nlh->nlmsg_seq,
1887                                              RTM_NEWNETCONF, NLM_F_MULTI,
1888                                              -1) <= 0)
1889                        goto done;
1890                else
1891                        h++;
1892        }
1893done:
1894        cb->args[0] = h;
1895        cb->args[1] = idx;
1896
1897        return skb->len;
1898}
1899
1900#ifdef CONFIG_SYSCTL
1901
1902static void devinet_copy_dflt_conf(struct net *net, int i)
1903{
1904        struct net_device *dev;
1905
1906        rcu_read_lock();
1907        for_each_netdev_rcu(net, dev) {
1908                struct in_device *in_dev;
1909
1910                in_dev = __in_dev_get_rcu(dev);
1911                if (in_dev && !test_bit(i, in_dev->cnf.state))
1912                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1913        }
1914        rcu_read_unlock();
1915}
1916
1917/* called with RTNL locked */
1918static void inet_forward_change(struct net *net)
1919{
1920        struct net_device *dev;
1921        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1922
1923        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1924        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1925        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1926                                    NETCONFA_IFINDEX_ALL,
1927                                    net->ipv4.devconf_all);
1928        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1929                                    NETCONFA_IFINDEX_DEFAULT,
1930                                    net->ipv4.devconf_dflt);
1931
1932        for_each_netdev(net, dev) {
1933                struct in_device *in_dev;
1934                if (on)
1935                        dev_disable_lro(dev);
1936                rcu_read_lock();
1937                in_dev = __in_dev_get_rcu(dev);
1938                if (in_dev) {
1939                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1940                        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941                                                    dev->ifindex, &in_dev->cnf);
1942                }
1943                rcu_read_unlock();
1944        }
1945}
1946
1947static int devinet_conf_proc(struct ctl_table *ctl, int write,
1948                             void __user *buffer,
1949                             size_t *lenp, loff_t *ppos)
1950{
1951        int old_value = *(int *)ctl->data;
1952        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1953        int new_value = *(int *)ctl->data;
1954
1955        if (write) {
1956                struct ipv4_devconf *cnf = ctl->extra1;
1957                struct net *net = ctl->extra2;
1958                int i = (int *)ctl->data - cnf->data;
1959
1960                set_bit(i, cnf->state);
1961
1962                if (cnf == net->ipv4.devconf_dflt)
1963                        devinet_copy_dflt_conf(net, i);
1964                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1965                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1966                        if ((new_value == 0) && (old_value != 0))
1967                                rt_cache_flush(net);
1968                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1969                    new_value != old_value) {
1970                        int ifindex;
1971
1972                        if (cnf == net->ipv4.devconf_dflt)
1973                                ifindex = NETCONFA_IFINDEX_DEFAULT;
1974                        else if (cnf == net->ipv4.devconf_all)
1975                                ifindex = NETCONFA_IFINDEX_ALL;
1976                        else {
1977                                struct in_device *idev =
1978                                        container_of(cnf, struct in_device,
1979                                                     cnf);
1980                                ifindex = idev->dev->ifindex;
1981                        }
1982                        inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1983                                                    ifindex, cnf);
1984                }
1985        }
1986
1987        return ret;
1988}
1989
1990static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1991                                  void __user *buffer,
1992                                  size_t *lenp, loff_t *ppos)
1993{
1994        int *valp = ctl->data;
1995        int val = *valp;
1996        loff_t pos = *ppos;
1997        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1998
1999        if (write && *valp != val) {
2000                struct net *net = ctl->extra2;
2001
2002                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2003                        if (!rtnl_trylock()) {
2004                                /* Restore the original values before restarting */
2005                                *valp = val;
2006                                *ppos = pos;
2007                                return restart_syscall();
2008                        }
2009                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2010                                inet_forward_change(net);
2011                        } else {
2012                                struct ipv4_devconf *cnf = ctl->extra1;
2013                                struct in_device *idev =
2014                                        container_of(cnf, struct in_device, cnf);
2015                                if (*valp)
2016                                        dev_disable_lro(idev->dev);
2017                                inet_netconf_notify_devconf(net,
2018                                                            NETCONFA_FORWARDING,
2019                                                            idev->dev->ifindex,
2020                                                            cnf);
2021                        }
2022                        rtnl_unlock();
2023                        rt_cache_flush(net);
2024                } else
2025                        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2026                                                    NETCONFA_IFINDEX_DEFAULT,
2027                                                    net->ipv4.devconf_dflt);
2028        }
2029
2030        return ret;
2031}
2032
2033static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2034                                void __user *buffer,
2035                                size_t *lenp, loff_t *ppos)
2036{
2037        int *valp = ctl->data;
2038        int val = *valp;
2039        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2040        struct net *net = ctl->extra2;
2041
2042        if (write && *valp != val)
2043                rt_cache_flush(net);
2044
2045        return ret;
2046}
2047
2048#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2049        { \
2050                .procname       = name, \
2051                .data           = ipv4_devconf.data + \
2052                                  IPV4_DEVCONF_ ## attr - 1, \
2053                .maxlen         = sizeof(int), \
2054                .mode           = mval, \
2055                .proc_handler   = proc, \
2056                .extra1         = &ipv4_devconf, \
2057        }
2058
2059#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2060        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2061
2062#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2063        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2064
2065#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2066        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2067
2068#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2069        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2070
2071static struct devinet_sysctl_table {
2072        struct ctl_table_header *sysctl_header;
2073        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2074} devinet_sysctl = {
2075        .devinet_vars = {
2076                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2077                                             devinet_sysctl_forward),
2078                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2079
2080                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2081                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2082                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2083                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2084                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2085                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2086                                        "accept_source_route"),
2087                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2088                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2089                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2090                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2091                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2092                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2093                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2094                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2095                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2096                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2097                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2098                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2099                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2100
2101                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2102                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2103                DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2104                                              "force_igmp_version"),
2105                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2106                                              "promote_secondaries"),
2107                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2108                                              "route_localnet"),
2109        },
2110};
2111
2112static int __devinet_sysctl_register(struct net *net, char *dev_name,
2113                                        struct ipv4_devconf *p)
2114{
2115        int i;
2116        struct devinet_sysctl_table *t;
2117        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2118
2119        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2120        if (!t)
2121                goto out;
2122
2123        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2124                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2125                t->devinet_vars[i].extra1 = p;
2126                t->devinet_vars[i].extra2 = net;
2127        }
2128
2129        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2130
2131        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2132        if (!t->sysctl_header)
2133                goto free;
2134
2135        p->sysctl = t;
2136        return 0;
2137
2138free:
2139        kfree(t);
2140out:
2141        return -ENOBUFS;
2142}
2143
2144static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2145{
2146        struct devinet_sysctl_table *t = cnf->sysctl;
2147
2148        if (t == NULL)
2149                return;
2150
2151        cnf->sysctl = NULL;
2152        unregister_net_sysctl_table(t->sysctl_header);
2153        kfree(t);
2154}
2155
2156static void devinet_sysctl_register(struct in_device *idev)
2157{
2158        neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2159        __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2160                                        &idev->cnf);
2161}
2162
2163static void devinet_sysctl_unregister(struct in_device *idev)
2164{
2165        __devinet_sysctl_unregister(&idev->cnf);
2166        neigh_sysctl_unregister(idev->arp_parms);
2167}
2168
2169static struct ctl_table ctl_forward_entry[] = {
2170        {
2171                .procname       = "ip_forward",
2172                .data           = &ipv4_devconf.data[
2173                                        IPV4_DEVCONF_FORWARDING - 1],
2174                .maxlen         = sizeof(int),
2175                .mode           = 0644,
2176                .proc_handler   = devinet_sysctl_forward,
2177                .extra1         = &ipv4_devconf,
2178                .extra2         = &init_net,
2179        },
2180        { },
2181};
2182#endif
2183
2184static __net_init int devinet_init_net(struct net *net)
2185{
2186        int err;
2187        struct ipv4_devconf *all, *dflt;
2188#ifdef CONFIG_SYSCTL
2189        struct ctl_table *tbl = ctl_forward_entry;
2190        struct ctl_table_header *forw_hdr;
2191#endif
2192
2193        err = -ENOMEM;
2194        all = &ipv4_devconf;
2195        dflt = &ipv4_devconf_dflt;
2196
2197        if (!net_eq(net, &init_net)) {
2198                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2199                if (all == NULL)
2200                        goto err_alloc_all;
2201
2202                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2203                if (dflt == NULL)
2204                        goto err_alloc_dflt;
2205
2206#ifdef CONFIG_SYSCTL
2207                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2208                if (tbl == NULL)
2209                        goto err_alloc_ctl;
2210
2211                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2212                tbl[0].extra1 = all;
2213                tbl[0].extra2 = net;
2214#endif
2215        }
2216
2217#ifdef CONFIG_SYSCTL
2218        err = __devinet_sysctl_register(net, "all", all);
2219        if (err < 0)
2220                goto err_reg_all;
2221
2222        err = __devinet_sysctl_register(net, "default", dflt);
2223        if (err < 0)
2224                goto err_reg_dflt;
2225
2226        err = -ENOMEM;
2227        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2228        if (forw_hdr == NULL)
2229                goto err_reg_ctl;
2230        net->ipv4.forw_hdr = forw_hdr;
2231#endif
2232
2233        net->ipv4.devconf_all = all;
2234        net->ipv4.devconf_dflt = dflt;
2235        return 0;
2236
2237#ifdef CONFIG_SYSCTL
2238err_reg_ctl:
2239        __devinet_sysctl_unregister(dflt);
2240err_reg_dflt:
2241        __devinet_sysctl_unregister(all);
2242err_reg_all:
2243        if (tbl != ctl_forward_entry)
2244                kfree(tbl);
2245err_alloc_ctl:
2246#endif
2247        if (dflt != &ipv4_devconf_dflt)
2248                kfree(dflt);
2249err_alloc_dflt:
2250        if (all != &ipv4_devconf)
2251                kfree(all);
2252err_alloc_all:
2253        return err;
2254}
2255
2256static __net_exit void devinet_exit_net(struct net *net)
2257{
2258#ifdef CONFIG_SYSCTL
2259        struct ctl_table *tbl;
2260
2261        tbl = net->ipv4.forw_hdr->ctl_table_arg;
2262        unregister_net_sysctl_table(net->ipv4.forw_hdr);
2263        __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2264        __devinet_sysctl_unregister(net->ipv4.devconf_all);
2265        kfree(tbl);
2266#endif
2267        kfree(net->ipv4.devconf_dflt);
2268        kfree(net->ipv4.devconf_all);
2269}
2270
2271static __net_initdata struct pernet_operations devinet_ops = {
2272        .init = devinet_init_net,
2273        .exit = devinet_exit_net,
2274};
2275
2276static struct rtnl_af_ops inet_af_ops = {
2277        .family           = AF_INET,
2278        .fill_link_af     = inet_fill_link_af,
2279        .get_link_af_size = inet_get_link_af_size,
2280        .validate_link_af = inet_validate_link_af,
2281        .set_link_af      = inet_set_link_af,
2282};
2283
2284void __init devinet_init(void)
2285{
2286        int i;
2287
2288        for (i = 0; i < IN4_ADDR_HSIZE; i++)
2289                INIT_HLIST_HEAD(&inet_addr_lst[i]);
2290
2291        register_pernet_subsys(&devinet_ops);
2292
2293        register_gifconf(PF_INET, inet_gifconf);
2294        register_netdevice_notifier(&ip_netdev_notifier);
2295
2296        schedule_delayed_work(&check_lifetime_work, 0);
2297
2298        rtnl_af_register(&inet_af_ops);
2299
2300        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2301        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2302        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2303        rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2304                      inet_netconf_dump_devconf, NULL);
2305}
2306
2307