linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <linux/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/sched/signal.h>
  36#include <linux/string.h>
  37#include <linux/mm.h>
  38#include <linux/socket.h>
  39#include <linux/sockios.h>
  40#include <linux/in.h>
  41#include <linux/errno.h>
  42#include <linux/interrupt.h>
  43#include <linux/if_addr.h>
  44#include <linux/if_ether.h>
  45#include <linux/inet.h>
  46#include <linux/netdevice.h>
  47#include <linux/etherdevice.h>
  48#include <linux/skbuff.h>
  49#include <linux/init.h>
  50#include <linux/notifier.h>
  51#include <linux/inetdevice.h>
  52#include <linux/igmp.h>
  53#include <linux/slab.h>
  54#include <linux/hash.h>
  55#ifdef CONFIG_SYSCTL
  56#include <linux/sysctl.h>
  57#endif
  58#include <linux/kmod.h>
  59#include <linux/netconf.h>
  60
  61#include <net/arp.h>
  62#include <net/ip.h>
  63#include <net/route.h>
  64#include <net/ip_fib.h>
  65#include <net/rtnetlink.h>
  66#include <net/net_namespace.h>
  67#include <net/addrconf.h>
  68
  69static struct ipv4_devconf ipv4_devconf = {
  70        .data = {
  71                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  72                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  74                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  75                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  76                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  77        },
  78};
  79
  80static struct ipv4_devconf ipv4_devconf_dflt = {
  81        .data = {
  82                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  83                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  84                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  85                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  86                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  87                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  88                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  89        },
  90};
  91
  92#define IPV4_DEVCONF_DFLT(net, attr) \
  93        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  94
  95static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  96        [IFA_LOCAL]             = { .type = NLA_U32 },
  97        [IFA_ADDRESS]           = { .type = NLA_U32 },
  98        [IFA_BROADCAST]         = { .type = NLA_U32 },
  99        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 100        [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
 101        [IFA_FLAGS]             = { .type = NLA_U32 },
 102};
 103
 104#define IN4_ADDR_HSIZE_SHIFT    8
 105#define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
 106
 107static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 108
 109static u32 inet_addr_hash(const struct net *net, __be32 addr)
 110{
 111        u32 val = (__force u32) addr ^ net_hash_mix(net);
 112
 113        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 114}
 115
 116static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 117{
 118        u32 hash = inet_addr_hash(net, ifa->ifa_local);
 119
 120        ASSERT_RTNL();
 121        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 122}
 123
 124static void inet_hash_remove(struct in_ifaddr *ifa)
 125{
 126        ASSERT_RTNL();
 127        hlist_del_init_rcu(&ifa->hash);
 128}
 129
 130/**
 131 * __ip_dev_find - find the first device with a given source address.
 132 * @net: the net namespace
 133 * @addr: the source address
 134 * @devref: if true, take a reference on the found device
 135 *
 136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 137 */
 138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 139{
 140        struct net_device *result = NULL;
 141        struct in_ifaddr *ifa;
 142
 143        rcu_read_lock();
 144        ifa = inet_lookup_ifaddr_rcu(net, addr);
 145        if (!ifa) {
 146                struct flowi4 fl4 = { .daddr = addr };
 147                struct fib_result res = { 0 };
 148                struct fib_table *local;
 149
 150                /* Fallback to FIB local table so that communication
 151                 * over loopback subnets work.
 152                 */
 153                local = fib_get_table(net, RT_TABLE_LOCAL);
 154                if (local &&
 155                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 156                    res.type == RTN_LOCAL)
 157                        result = FIB_RES_DEV(res);
 158        } else {
 159                result = ifa->ifa_dev->dev;
 160        }
 161        if (result && devref)
 162                dev_hold(result);
 163        rcu_read_unlock();
 164        return result;
 165}
 166EXPORT_SYMBOL(__ip_dev_find);
 167
 168/* called under RCU lock */
 169struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
 170{
 171        u32 hash = inet_addr_hash(net, addr);
 172        struct in_ifaddr *ifa;
 173
 174        hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
 175                if (ifa->ifa_local == addr &&
 176                    net_eq(dev_net(ifa->ifa_dev->dev), net))
 177                        return ifa;
 178
 179        return NULL;
 180}
 181
 182static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 183
 184static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 185static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
 186static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 187                         int destroy);
 188#ifdef CONFIG_SYSCTL
 189static int devinet_sysctl_register(struct in_device *idev);
 190static void devinet_sysctl_unregister(struct in_device *idev);
 191#else
 192static int devinet_sysctl_register(struct in_device *idev)
 193{
 194        return 0;
 195}
 196static void devinet_sysctl_unregister(struct in_device *idev)
 197{
 198}
 199#endif
 200
 201/* Locks all the inet devices. */
 202
 203static struct in_ifaddr *inet_alloc_ifa(void)
 204{
 205        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 206}
 207
 208static void inet_rcu_free_ifa(struct rcu_head *head)
 209{
 210        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 211        if (ifa->ifa_dev)
 212                in_dev_put(ifa->ifa_dev);
 213        kfree(ifa);
 214}
 215
 216static void inet_free_ifa(struct in_ifaddr *ifa)
 217{
 218        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 219}
 220
 221void in_dev_finish_destroy(struct in_device *idev)
 222{
 223        struct net_device *dev = idev->dev;
 224
 225        WARN_ON(idev->ifa_list);
 226        WARN_ON(idev->mc_list);
 227        kfree(rcu_dereference_protected(idev->mc_hash, 1));
 228#ifdef NET_REFCNT_DEBUG
 229        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 230#endif
 231        dev_put(dev);
 232        if (!idev->dead)
 233                pr_err("Freeing alive in_device %p\n", idev);
 234        else
 235                kfree(idev);
 236}
 237EXPORT_SYMBOL(in_dev_finish_destroy);
 238
 239static struct in_device *inetdev_init(struct net_device *dev)
 240{
 241        struct in_device *in_dev;
 242        int err = -ENOMEM;
 243
 244        ASSERT_RTNL();
 245
 246        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 247        if (!in_dev)
 248                goto out;
 249        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 250                        sizeof(in_dev->cnf));
 251        in_dev->cnf.sysctl = NULL;
 252        in_dev->dev = dev;
 253        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 254        if (!in_dev->arp_parms)
 255                goto out_kfree;
 256        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 257                dev_disable_lro(dev);
 258        /* Reference in_dev->dev */
 259        dev_hold(dev);
 260        /* Account for reference dev->ip_ptr (below) */
 261        refcount_set(&in_dev->refcnt, 1);
 262
 263        err = devinet_sysctl_register(in_dev);
 264        if (err) {
 265                in_dev->dead = 1;
 266                in_dev_put(in_dev);
 267                in_dev = NULL;
 268                goto out;
 269        }
 270        ip_mc_init_dev(in_dev);
 271        if (dev->flags & IFF_UP)
 272                ip_mc_up(in_dev);
 273
 274        /* we can receive as soon as ip_ptr is set -- do this last */
 275        rcu_assign_pointer(dev->ip_ptr, in_dev);
 276out:
 277        return in_dev ?: ERR_PTR(err);
 278out_kfree:
 279        kfree(in_dev);
 280        in_dev = NULL;
 281        goto out;
 282}
 283
 284static void in_dev_rcu_put(struct rcu_head *head)
 285{
 286        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 287        in_dev_put(idev);
 288}
 289
 290static void inetdev_destroy(struct in_device *in_dev)
 291{
 292        struct in_ifaddr *ifa;
 293        struct net_device *dev;
 294
 295        ASSERT_RTNL();
 296
 297        dev = in_dev->dev;
 298
 299        in_dev->dead = 1;
 300
 301        ip_mc_destroy_dev(in_dev);
 302
 303        while ((ifa = in_dev->ifa_list) != NULL) {
 304                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 305                inet_free_ifa(ifa);
 306        }
 307
 308        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 309
 310        devinet_sysctl_unregister(in_dev);
 311        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 312        arp_ifdown(dev);
 313
 314        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 315}
 316
 317int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 318{
 319        rcu_read_lock();
 320        for_primary_ifa(in_dev) {
 321                if (inet_ifa_match(a, ifa)) {
 322                        if (!b || inet_ifa_match(b, ifa)) {
 323                                rcu_read_unlock();
 324                                return 1;
 325                        }
 326                }
 327        } endfor_ifa(in_dev);
 328        rcu_read_unlock();
 329        return 0;
 330}
 331
 332static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 333                         int destroy, struct nlmsghdr *nlh, u32 portid)
 334{
 335        struct in_ifaddr *promote = NULL;
 336        struct in_ifaddr *ifa, *ifa1 = *ifap;
 337        struct in_ifaddr *last_prim = in_dev->ifa_list;
 338        struct in_ifaddr *prev_prom = NULL;
 339        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 340
 341        ASSERT_RTNL();
 342
 343        if (in_dev->dead)
 344                goto no_promotions;
 345
 346        /* 1. Deleting primary ifaddr forces deletion all secondaries
 347         * unless alias promotion is set
 348         **/
 349
 350        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 351                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 352
 353                while ((ifa = *ifap1) != NULL) {
 354                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 355                            ifa1->ifa_scope <= ifa->ifa_scope)
 356                                last_prim = ifa;
 357
 358                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 359                            ifa1->ifa_mask != ifa->ifa_mask ||
 360                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 361                                ifap1 = &ifa->ifa_next;
 362                                prev_prom = ifa;
 363                                continue;
 364                        }
 365
 366                        if (!do_promote) {
 367                                inet_hash_remove(ifa);
 368                                *ifap1 = ifa->ifa_next;
 369
 370                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 371                                blocking_notifier_call_chain(&inetaddr_chain,
 372                                                NETDEV_DOWN, ifa);
 373                                inet_free_ifa(ifa);
 374                        } else {
 375                                promote = ifa;
 376                                break;
 377                        }
 378                }
 379        }
 380
 381        /* On promotion all secondaries from subnet are changing
 382         * the primary IP, we must remove all their routes silently
 383         * and later to add them back with new prefsrc. Do this
 384         * while all addresses are on the device list.
 385         */
 386        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 387                if (ifa1->ifa_mask == ifa->ifa_mask &&
 388                    inet_ifa_match(ifa1->ifa_address, ifa))
 389                        fib_del_ifaddr(ifa, ifa1);
 390        }
 391
 392no_promotions:
 393        /* 2. Unlink it */
 394
 395        *ifap = ifa1->ifa_next;
 396        inet_hash_remove(ifa1);
 397
 398        /* 3. Announce address deletion */
 399
 400        /* Send message first, then call notifier.
 401           At first sight, FIB update triggered by notifier
 402           will refer to already deleted ifaddr, that could confuse
 403           netlink listeners. It is not true: look, gated sees
 404           that route deleted and if it still thinks that ifaddr
 405           is valid, it will try to restore deleted routes... Grr.
 406           So that, this order is correct.
 407         */
 408        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 409        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 410
 411        if (promote) {
 412                struct in_ifaddr *next_sec = promote->ifa_next;
 413
 414                if (prev_prom) {
 415                        prev_prom->ifa_next = promote->ifa_next;
 416                        promote->ifa_next = last_prim->ifa_next;
 417                        last_prim->ifa_next = promote;
 418                }
 419
 420                promote->ifa_flags &= ~IFA_F_SECONDARY;
 421                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 422                blocking_notifier_call_chain(&inetaddr_chain,
 423                                NETDEV_UP, promote);
 424                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 425                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 426                            !inet_ifa_match(ifa1->ifa_address, ifa))
 427                                        continue;
 428                        fib_add_ifaddr(ifa);
 429                }
 430
 431        }
 432        if (destroy)
 433                inet_free_ifa(ifa1);
 434}
 435
 436static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 437                         int destroy)
 438{
 439        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 440}
 441
 442static void check_lifetime(struct work_struct *work);
 443
 444static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 445
 446static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 447                             u32 portid, struct netlink_ext_ack *extack)
 448{
 449        struct in_device *in_dev = ifa->ifa_dev;
 450        struct in_ifaddr *ifa1, **ifap, **last_primary;
 451        struct in_validator_info ivi;
 452        int ret;
 453
 454        ASSERT_RTNL();
 455
 456        if (!ifa->ifa_local) {
 457                inet_free_ifa(ifa);
 458                return 0;
 459        }
 460
 461        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 462        last_primary = &in_dev->ifa_list;
 463
 464        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 465             ifap = &ifa1->ifa_next) {
 466                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 467                    ifa->ifa_scope <= ifa1->ifa_scope)
 468                        last_primary = &ifa1->ifa_next;
 469                if (ifa1->ifa_mask == ifa->ifa_mask &&
 470                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 471                        if (ifa1->ifa_local == ifa->ifa_local) {
 472                                inet_free_ifa(ifa);
 473                                return -EEXIST;
 474                        }
 475                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 476                                inet_free_ifa(ifa);
 477                                return -EINVAL;
 478                        }
 479                        ifa->ifa_flags |= IFA_F_SECONDARY;
 480                }
 481        }
 482
 483        /* Allow any devices that wish to register ifaddr validtors to weigh
 484         * in now, before changes are committed.  The rntl lock is serializing
 485         * access here, so the state should not change between a validator call
 486         * and a final notify on commit.  This isn't invoked on promotion under
 487         * the assumption that validators are checking the address itself, and
 488         * not the flags.
 489         */
 490        ivi.ivi_addr = ifa->ifa_address;
 491        ivi.ivi_dev = ifa->ifa_dev;
 492        ivi.extack = extack;
 493        ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
 494                                           NETDEV_UP, &ivi);
 495        ret = notifier_to_errno(ret);
 496        if (ret) {
 497                inet_free_ifa(ifa);
 498                return ret;
 499        }
 500
 501        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 502                prandom_seed((__force u32) ifa->ifa_local);
 503                ifap = last_primary;
 504        }
 505
 506        ifa->ifa_next = *ifap;
 507        *ifap = ifa;
 508
 509        inet_hash_insert(dev_net(in_dev->dev), ifa);
 510
 511        cancel_delayed_work(&check_lifetime_work);
 512        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
 513
 514        /* Send message first, then call notifier.
 515           Notifier will trigger FIB update, so that
 516           listeners of netlink will know about new ifaddr */
 517        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 518        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 519
 520        return 0;
 521}
 522
 523static int inet_insert_ifa(struct in_ifaddr *ifa)
 524{
 525        return __inet_insert_ifa(ifa, NULL, 0, NULL);
 526}
 527
 528static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 529{
 530        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 531
 532        ASSERT_RTNL();
 533
 534        if (!in_dev) {
 535                inet_free_ifa(ifa);
 536                return -ENOBUFS;
 537        }
 538        ipv4_devconf_setall(in_dev);
 539        neigh_parms_data_state_setall(in_dev->arp_parms);
 540        if (ifa->ifa_dev != in_dev) {
 541                WARN_ON(ifa->ifa_dev);
 542                in_dev_hold(in_dev);
 543                ifa->ifa_dev = in_dev;
 544        }
 545        if (ipv4_is_loopback(ifa->ifa_local))
 546                ifa->ifa_scope = RT_SCOPE_HOST;
 547        return inet_insert_ifa(ifa);
 548}
 549
 550/* Caller must hold RCU or RTNL :
 551 * We dont take a reference on found in_device
 552 */
 553struct in_device *inetdev_by_index(struct net *net, int ifindex)
 554{
 555        struct net_device *dev;
 556        struct in_device *in_dev = NULL;
 557
 558        rcu_read_lock();
 559        dev = dev_get_by_index_rcu(net, ifindex);
 560        if (dev)
 561                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 562        rcu_read_unlock();
 563        return in_dev;
 564}
 565EXPORT_SYMBOL(inetdev_by_index);
 566
 567/* Called only from RTNL semaphored context. No locks. */
 568
 569struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 570                                    __be32 mask)
 571{
 572        ASSERT_RTNL();
 573
 574        for_primary_ifa(in_dev) {
 575                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 576                        return ifa;
 577        } endfor_ifa(in_dev);
 578        return NULL;
 579}
 580
 581static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
 582{
 583        struct ip_mreqn mreq = {
 584                .imr_multiaddr.s_addr = ifa->ifa_address,
 585                .imr_ifindex = ifa->ifa_dev->dev->ifindex,
 586        };
 587        int ret;
 588
 589        ASSERT_RTNL();
 590
 591        lock_sock(sk);
 592        if (join)
 593                ret = ip_mc_join_group(sk, &mreq);
 594        else
 595                ret = ip_mc_leave_group(sk, &mreq);
 596        release_sock(sk);
 597
 598        return ret;
 599}
 600
 601static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 602                            struct netlink_ext_ack *extack)
 603{
 604        struct net *net = sock_net(skb->sk);
 605        struct nlattr *tb[IFA_MAX+1];
 606        struct in_device *in_dev;
 607        struct ifaddrmsg *ifm;
 608        struct in_ifaddr *ifa, **ifap;
 609        int err = -EINVAL;
 610
 611        ASSERT_RTNL();
 612
 613        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
 614                          extack);
 615        if (err < 0)
 616                goto errout;
 617
 618        ifm = nlmsg_data(nlh);
 619        in_dev = inetdev_by_index(net, ifm->ifa_index);
 620        if (!in_dev) {
 621                err = -ENODEV;
 622                goto errout;
 623        }
 624
 625        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 626             ifap = &ifa->ifa_next) {
 627                if (tb[IFA_LOCAL] &&
 628                    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
 629                        continue;
 630
 631                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 632                        continue;
 633
 634                if (tb[IFA_ADDRESS] &&
 635                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 636                    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
 637                        continue;
 638
 639                if (ipv4_is_multicast(ifa->ifa_address))
 640                        ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 641                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 642                return 0;
 643        }
 644
 645        err = -EADDRNOTAVAIL;
 646errout:
 647        return err;
 648}
 649
 650#define INFINITY_LIFE_TIME      0xFFFFFFFF
 651
 652static void check_lifetime(struct work_struct *work)
 653{
 654        unsigned long now, next, next_sec, next_sched;
 655        struct in_ifaddr *ifa;
 656        struct hlist_node *n;
 657        int i;
 658
 659        now = jiffies;
 660        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 661
 662        for (i = 0; i < IN4_ADDR_HSIZE; i++) {
 663                bool change_needed = false;
 664
 665                rcu_read_lock();
 666                hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 667                        unsigned long age;
 668
 669                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 670                                continue;
 671
 672                        /* We try to batch several events at once. */
 673                        age = (now - ifa->ifa_tstamp +
 674                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 675
 676                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 677                            age >= ifa->ifa_valid_lft) {
 678                                change_needed = true;
 679                        } else if (ifa->ifa_preferred_lft ==
 680                                   INFINITY_LIFE_TIME) {
 681                                continue;
 682                        } else if (age >= ifa->ifa_preferred_lft) {
 683                                if (time_before(ifa->ifa_tstamp +
 684                                                ifa->ifa_valid_lft * HZ, next))
 685                                        next = ifa->ifa_tstamp +
 686                                               ifa->ifa_valid_lft * HZ;
 687
 688                                if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
 689                                        change_needed = true;
 690                        } else if (time_before(ifa->ifa_tstamp +
 691                                               ifa->ifa_preferred_lft * HZ,
 692                                               next)) {
 693                                next = ifa->ifa_tstamp +
 694                                       ifa->ifa_preferred_lft * HZ;
 695                        }
 696                }
 697                rcu_read_unlock();
 698                if (!change_needed)
 699                        continue;
 700                rtnl_lock();
 701                hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
 702                        unsigned long age;
 703
 704                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 705                                continue;
 706
 707                        /* We try to batch several events at once. */
 708                        age = (now - ifa->ifa_tstamp +
 709                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 710
 711                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 712                            age >= ifa->ifa_valid_lft) {
 713                                struct in_ifaddr **ifap;
 714
 715                                for (ifap = &ifa->ifa_dev->ifa_list;
 716                                     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
 717                                        if (*ifap == ifa) {
 718                                                inet_del_ifa(ifa->ifa_dev,
 719                                                             ifap, 1);
 720                                                break;
 721                                        }
 722                                }
 723                        } else if (ifa->ifa_preferred_lft !=
 724                                   INFINITY_LIFE_TIME &&
 725                                   age >= ifa->ifa_preferred_lft &&
 726                                   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
 727                                ifa->ifa_flags |= IFA_F_DEPRECATED;
 728                                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 729                        }
 730                }
 731                rtnl_unlock();
 732        }
 733
 734        next_sec = round_jiffies_up(next);
 735        next_sched = next;
 736
 737        /* If rounded timeout is accurate enough, accept it. */
 738        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
 739                next_sched = next_sec;
 740
 741        now = jiffies;
 742        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
 743        if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
 744                next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
 745
 746        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
 747                        next_sched - now);
 748}
 749
 750static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
 751                             __u32 prefered_lft)
 752{
 753        unsigned long timeout;
 754
 755        ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
 756
 757        timeout = addrconf_timeout_fixup(valid_lft, HZ);
 758        if (addrconf_finite_timeout(timeout))
 759                ifa->ifa_valid_lft = timeout;
 760        else
 761                ifa->ifa_flags |= IFA_F_PERMANENT;
 762
 763        timeout = addrconf_timeout_fixup(prefered_lft, HZ);
 764        if (addrconf_finite_timeout(timeout)) {
 765                if (timeout == 0)
 766                        ifa->ifa_flags |= IFA_F_DEPRECATED;
 767                ifa->ifa_preferred_lft = timeout;
 768        }
 769        ifa->ifa_tstamp = jiffies;
 770        if (!ifa->ifa_cstamp)
 771                ifa->ifa_cstamp = ifa->ifa_tstamp;
 772}
 773
 774static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 775                                       __u32 *pvalid_lft, __u32 *pprefered_lft)
 776{
 777        struct nlattr *tb[IFA_MAX+1];
 778        struct in_ifaddr *ifa;
 779        struct ifaddrmsg *ifm;
 780        struct net_device *dev;
 781        struct in_device *in_dev;
 782        int err;
 783
 784        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
 785                          NULL);
 786        if (err < 0)
 787                goto errout;
 788
 789        ifm = nlmsg_data(nlh);
 790        err = -EINVAL;
 791        if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
 792                goto errout;
 793
 794        dev = __dev_get_by_index(net, ifm->ifa_index);
 795        err = -ENODEV;
 796        if (!dev)
 797                goto errout;
 798
 799        in_dev = __in_dev_get_rtnl(dev);
 800        err = -ENOBUFS;
 801        if (!in_dev)
 802                goto errout;
 803
 804        ifa = inet_alloc_ifa();
 805        if (!ifa)
 806                /*
 807                 * A potential indev allocation can be left alive, it stays
 808                 * assigned to its device and is destroy with it.
 809                 */
 810                goto errout;
 811
 812        ipv4_devconf_setall(in_dev);
 813        neigh_parms_data_state_setall(in_dev->arp_parms);
 814        in_dev_hold(in_dev);
 815
 816        if (!tb[IFA_ADDRESS])
 817                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 818
 819        INIT_HLIST_NODE(&ifa->hash);
 820        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 821        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 822        ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
 823                                         ifm->ifa_flags;
 824        ifa->ifa_scope = ifm->ifa_scope;
 825        ifa->ifa_dev = in_dev;
 826
 827        ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
 828        ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
 829
 830        if (tb[IFA_BROADCAST])
 831                ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
 832
 833        if (tb[IFA_LABEL])
 834                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 835        else
 836                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 837
 838        if (tb[IFA_CACHEINFO]) {
 839                struct ifa_cacheinfo *ci;
 840
 841                ci = nla_data(tb[IFA_CACHEINFO]);
 842                if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 843                        err = -EINVAL;
 844                        goto errout_free;
 845                }
 846                *pvalid_lft = ci->ifa_valid;
 847                *pprefered_lft = ci->ifa_prefered;
 848        }
 849
 850        return ifa;
 851
 852errout_free:
 853        inet_free_ifa(ifa);
 854errout:
 855        return ERR_PTR(err);
 856}
 857
 858static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 859{
 860        struct in_device *in_dev = ifa->ifa_dev;
 861        struct in_ifaddr *ifa1, **ifap;
 862
 863        if (!ifa->ifa_local)
 864                return NULL;
 865
 866        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 867             ifap = &ifa1->ifa_next) {
 868                if (ifa1->ifa_mask == ifa->ifa_mask &&
 869                    inet_ifa_match(ifa1->ifa_address, ifa) &&
 870                    ifa1->ifa_local == ifa->ifa_local)
 871                        return ifa1;
 872        }
 873        return NULL;
 874}
 875
 876static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 877                            struct netlink_ext_ack *extack)
 878{
 879        struct net *net = sock_net(skb->sk);
 880        struct in_ifaddr *ifa;
 881        struct in_ifaddr *ifa_existing;
 882        __u32 valid_lft = INFINITY_LIFE_TIME;
 883        __u32 prefered_lft = INFINITY_LIFE_TIME;
 884
 885        ASSERT_RTNL();
 886
 887        ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 888        if (IS_ERR(ifa))
 889                return PTR_ERR(ifa);
 890
 891        ifa_existing = find_matching_ifa(ifa);
 892        if (!ifa_existing) {
 893                /* It would be best to check for !NLM_F_CREATE here but
 894                 * userspace already relies on not having to provide this.
 895                 */
 896                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 897                if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
 898                        int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
 899                                               true, ifa);
 900
 901                        if (ret < 0) {
 902                                inet_free_ifa(ifa);
 903                                return ret;
 904                        }
 905                }
 906                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
 907                                         extack);
 908        } else {
 909                inet_free_ifa(ifa);
 910
 911                if (nlh->nlmsg_flags & NLM_F_EXCL ||
 912                    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 913                        return -EEXIST;
 914                ifa = ifa_existing;
 915                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 916                cancel_delayed_work(&check_lifetime_work);
 917                queue_delayed_work(system_power_efficient_wq,
 918                                &check_lifetime_work, 0);
 919                rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
 920        }
 921        return 0;
 922}
 923
 924/*
 925 *      Determine a default network mask, based on the IP address.
 926 */
 927
 928static int inet_abc_len(__be32 addr)
 929{
 930        int rc = -1;    /* Something else, probably a multicast. */
 931
 932        if (ipv4_is_zeronet(addr))
 933                rc = 0;
 934        else {
 935                __u32 haddr = ntohl(addr);
 936
 937                if (IN_CLASSA(haddr))
 938                        rc = 8;
 939                else if (IN_CLASSB(haddr))
 940                        rc = 16;
 941                else if (IN_CLASSC(haddr))
 942                        rc = 24;
 943        }
 944
 945        return rc;
 946}
 947
 948
 949int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
 950{
 951        struct sockaddr_in sin_orig;
 952        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
 953        struct in_device *in_dev;
 954        struct in_ifaddr **ifap = NULL;
 955        struct in_ifaddr *ifa = NULL;
 956        struct net_device *dev;
 957        char *colon;
 958        int ret = -EFAULT;
 959        int tryaddrmatch = 0;
 960
 961        ifr->ifr_name[IFNAMSIZ - 1] = 0;
 962
 963        /* save original address for comparison */
 964        memcpy(&sin_orig, sin, sizeof(*sin));
 965
 966        colon = strchr(ifr->ifr_name, ':');
 967        if (colon)
 968                *colon = 0;
 969
 970        dev_load(net, ifr->ifr_name);
 971
 972        switch (cmd) {
 973        case SIOCGIFADDR:       /* Get interface address */
 974        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 975        case SIOCGIFDSTADDR:    /* Get the destination address */
 976        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 977                /* Note that these ioctls will not sleep,
 978                   so that we do not impose a lock.
 979                   One day we will be forced to put shlock here (I mean SMP)
 980                 */
 981                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 982                memset(sin, 0, sizeof(*sin));
 983                sin->sin_family = AF_INET;
 984                break;
 985
 986        case SIOCSIFFLAGS:
 987                ret = -EPERM;
 988                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 989                        goto out;
 990                break;
 991        case SIOCSIFADDR:       /* Set interface address (and family) */
 992        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 993        case SIOCSIFDSTADDR:    /* Set the destination address */
 994        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 995                ret = -EPERM;
 996                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 997                        goto out;
 998                ret = -EINVAL;
 999                if (sin->sin_family != AF_INET)
1000                        goto out;
1001                break;
1002        default:
1003                ret = -EINVAL;
1004                goto out;
1005        }
1006
1007        rtnl_lock();
1008
1009        ret = -ENODEV;
1010        dev = __dev_get_by_name(net, ifr->ifr_name);
1011        if (!dev)
1012                goto done;
1013
1014        if (colon)
1015                *colon = ':';
1016
1017        in_dev = __in_dev_get_rtnl(dev);
1018        if (in_dev) {
1019                if (tryaddrmatch) {
1020                        /* Matthias Andree */
1021                        /* compare label and address (4.4BSD style) */
1022                        /* note: we only do this for a limited set of ioctls
1023                           and only if the original address family was AF_INET.
1024                           This is checked above. */
1025                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1026                             ifap = &ifa->ifa_next) {
1027                                if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1028                                    sin_orig.sin_addr.s_addr ==
1029                                                        ifa->ifa_local) {
1030                                        break; /* found */
1031                                }
1032                        }
1033                }
1034                /* we didn't get a match, maybe the application is
1035                   4.3BSD-style and passed in junk so we fall back to
1036                   comparing just the label */
1037                if (!ifa) {
1038                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1039                             ifap = &ifa->ifa_next)
1040                                if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1041                                        break;
1042                }
1043        }
1044
1045        ret = -EADDRNOTAVAIL;
1046        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1047                goto done;
1048
1049        switch (cmd) {
1050        case SIOCGIFADDR:       /* Get interface address */
1051                ret = 0;
1052                sin->sin_addr.s_addr = ifa->ifa_local;
1053                break;
1054
1055        case SIOCGIFBRDADDR:    /* Get the broadcast address */
1056                ret = 0;
1057                sin->sin_addr.s_addr = ifa->ifa_broadcast;
1058                break;
1059
1060        case SIOCGIFDSTADDR:    /* Get the destination address */
1061                ret = 0;
1062                sin->sin_addr.s_addr = ifa->ifa_address;
1063                break;
1064
1065        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1066                ret = 0;
1067                sin->sin_addr.s_addr = ifa->ifa_mask;
1068                break;
1069
1070        case SIOCSIFFLAGS:
1071                if (colon) {
1072                        ret = -EADDRNOTAVAIL;
1073                        if (!ifa)
1074                                break;
1075                        ret = 0;
1076                        if (!(ifr->ifr_flags & IFF_UP))
1077                                inet_del_ifa(in_dev, ifap, 1);
1078                        break;
1079                }
1080                ret = dev_change_flags(dev, ifr->ifr_flags);
1081                break;
1082
1083        case SIOCSIFADDR:       /* Set interface address (and family) */
1084                ret = -EINVAL;
1085                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1086                        break;
1087
1088                if (!ifa) {
1089                        ret = -ENOBUFS;
1090                        ifa = inet_alloc_ifa();
1091                        if (!ifa)
1092                                break;
1093                        INIT_HLIST_NODE(&ifa->hash);
1094                        if (colon)
1095                                memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1096                        else
1097                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1098                } else {
1099                        ret = 0;
1100                        if (ifa->ifa_local == sin->sin_addr.s_addr)
1101                                break;
1102                        inet_del_ifa(in_dev, ifap, 0);
1103                        ifa->ifa_broadcast = 0;
1104                        ifa->ifa_scope = 0;
1105                }
1106
1107                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1108
1109                if (!(dev->flags & IFF_POINTOPOINT)) {
1110                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1111                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1112                        if ((dev->flags & IFF_BROADCAST) &&
1113                            ifa->ifa_prefixlen < 31)
1114                                ifa->ifa_broadcast = ifa->ifa_address |
1115                                                     ~ifa->ifa_mask;
1116                } else {
1117                        ifa->ifa_prefixlen = 32;
1118                        ifa->ifa_mask = inet_make_mask(32);
1119                }
1120                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1121                ret = inet_set_ifa(dev, ifa);
1122                break;
1123
1124        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1125                ret = 0;
1126                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1127                        inet_del_ifa(in_dev, ifap, 0);
1128                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
1129                        inet_insert_ifa(ifa);
1130                }
1131                break;
1132
1133        case SIOCSIFDSTADDR:    /* Set the destination address */
1134                ret = 0;
1135                if (ifa->ifa_address == sin->sin_addr.s_addr)
1136                        break;
1137                ret = -EINVAL;
1138                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1139                        break;
1140                ret = 0;
1141                inet_del_ifa(in_dev, ifap, 0);
1142                ifa->ifa_address = sin->sin_addr.s_addr;
1143                inet_insert_ifa(ifa);
1144                break;
1145
1146        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1147
1148                /*
1149                 *      The mask we set must be legal.
1150                 */
1151                ret = -EINVAL;
1152                if (bad_mask(sin->sin_addr.s_addr, 0))
1153                        break;
1154                ret = 0;
1155                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1156                        __be32 old_mask = ifa->ifa_mask;
1157                        inet_del_ifa(in_dev, ifap, 0);
1158                        ifa->ifa_mask = sin->sin_addr.s_addr;
1159                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1160
1161                        /* See if current broadcast address matches
1162                         * with current netmask, then recalculate
1163                         * the broadcast address. Otherwise it's a
1164                         * funny address, so don't touch it since
1165                         * the user seems to know what (s)he's doing...
1166                         */
1167                        if ((dev->flags & IFF_BROADCAST) &&
1168                            (ifa->ifa_prefixlen < 31) &&
1169                            (ifa->ifa_broadcast ==
1170                             (ifa->ifa_local|~old_mask))) {
1171                                ifa->ifa_broadcast = (ifa->ifa_local |
1172                                                      ~sin->sin_addr.s_addr);
1173                        }
1174                        inet_insert_ifa(ifa);
1175                }
1176                break;
1177        }
1178done:
1179        rtnl_unlock();
1180out:
1181        return ret;
1182}
1183
1184static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1185{
1186        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1187        struct in_ifaddr *ifa;
1188        struct ifreq ifr;
1189        int done = 0;
1190
1191        if (WARN_ON(size > sizeof(struct ifreq)))
1192                goto out;
1193
1194        if (!in_dev)
1195                goto out;
1196
1197        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1198                if (!buf) {
1199                        done += size;
1200                        continue;
1201                }
1202                if (len < size)
1203                        break;
1204                memset(&ifr, 0, sizeof(struct ifreq));
1205                strcpy(ifr.ifr_name, ifa->ifa_label);
1206
1207                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1208                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1209                                                                ifa->ifa_local;
1210
1211                if (copy_to_user(buf + done, &ifr, size)) {
1212                        done = -EFAULT;
1213                        break;
1214                }
1215                len  -= size;
1216                done += size;
1217        }
1218out:
1219        return done;
1220}
1221
1222static __be32 in_dev_select_addr(const struct in_device *in_dev,
1223                                 int scope)
1224{
1225        for_primary_ifa(in_dev) {
1226                if (ifa->ifa_scope != RT_SCOPE_LINK &&
1227                    ifa->ifa_scope <= scope)
1228                        return ifa->ifa_local;
1229        } endfor_ifa(in_dev);
1230
1231        return 0;
1232}
1233
1234__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1235{
1236        __be32 addr = 0;
1237        struct in_device *in_dev;
1238        struct net *net = dev_net(dev);
1239        int master_idx;
1240
1241        rcu_read_lock();
1242        in_dev = __in_dev_get_rcu(dev);
1243        if (!in_dev)
1244                goto no_in_dev;
1245
1246        for_primary_ifa(in_dev) {
1247                if (ifa->ifa_scope > scope)
1248                        continue;
1249                if (!dst || inet_ifa_match(dst, ifa)) {
1250                        addr = ifa->ifa_local;
1251                        break;
1252                }
1253                if (!addr)
1254                        addr = ifa->ifa_local;
1255        } endfor_ifa(in_dev);
1256
1257        if (addr)
1258                goto out_unlock;
1259no_in_dev:
1260        master_idx = l3mdev_master_ifindex_rcu(dev);
1261
1262        /* For VRFs, the VRF device takes the place of the loopback device,
1263         * with addresses on it being preferred.  Note in such cases the
1264         * loopback device will be among the devices that fail the master_idx
1265         * equality check in the loop below.
1266         */
1267        if (master_idx &&
1268            (dev = dev_get_by_index_rcu(net, master_idx)) &&
1269            (in_dev = __in_dev_get_rcu(dev))) {
1270                addr = in_dev_select_addr(in_dev, scope);
1271                if (addr)
1272                        goto out_unlock;
1273        }
1274
1275        /* Not loopback addresses on loopback should be preferred
1276           in this case. It is important that lo is the first interface
1277           in dev_base list.
1278         */
1279        for_each_netdev_rcu(net, dev) {
1280                if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1281                        continue;
1282
1283                in_dev = __in_dev_get_rcu(dev);
1284                if (!in_dev)
1285                        continue;
1286
1287                addr = in_dev_select_addr(in_dev, scope);
1288                if (addr)
1289                        goto out_unlock;
1290        }
1291out_unlock:
1292        rcu_read_unlock();
1293        return addr;
1294}
1295EXPORT_SYMBOL(inet_select_addr);
1296
1297static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1298                              __be32 local, int scope)
1299{
1300        int same = 0;
1301        __be32 addr = 0;
1302
1303        for_ifa(in_dev) {
1304                if (!addr &&
1305                    (local == ifa->ifa_local || !local) &&
1306                    ifa->ifa_scope <= scope) {
1307                        addr = ifa->ifa_local;
1308                        if (same)
1309                                break;
1310                }
1311                if (!same) {
1312                        same = (!local || inet_ifa_match(local, ifa)) &&
1313                                (!dst || inet_ifa_match(dst, ifa));
1314                        if (same && addr) {
1315                                if (local || !dst)
1316                                        break;
1317                                /* Is the selected addr into dst subnet? */
1318                                if (inet_ifa_match(addr, ifa))
1319                                        break;
1320                                /* No, then can we use new local src? */
1321                                if (ifa->ifa_scope <= scope) {
1322                                        addr = ifa->ifa_local;
1323                                        break;
1324                                }
1325                                /* search for large dst subnet for addr */
1326                                same = 0;
1327                        }
1328                }
1329        } endfor_ifa(in_dev);
1330
1331        return same ? addr : 0;
1332}
1333
1334/*
1335 * Confirm that local IP address exists using wildcards:
1336 * - net: netns to check, cannot be NULL
1337 * - in_dev: only on this interface, NULL=any interface
1338 * - dst: only in the same subnet as dst, 0=any dst
1339 * - local: address, 0=autoselect the local address
1340 * - scope: maximum allowed scope value for the local address
1341 */
1342__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1343                         __be32 dst, __be32 local, int scope)
1344{
1345        __be32 addr = 0;
1346        struct net_device *dev;
1347
1348        if (in_dev)
1349                return confirm_addr_indev(in_dev, dst, local, scope);
1350
1351        rcu_read_lock();
1352        for_each_netdev_rcu(net, dev) {
1353                in_dev = __in_dev_get_rcu(dev);
1354                if (in_dev) {
1355                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1356                        if (addr)
1357                                break;
1358                }
1359        }
1360        rcu_read_unlock();
1361
1362        return addr;
1363}
1364EXPORT_SYMBOL(inet_confirm_addr);
1365
1366/*
1367 *      Device notifier
1368 */
1369
1370int register_inetaddr_notifier(struct notifier_block *nb)
1371{
1372        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1373}
1374EXPORT_SYMBOL(register_inetaddr_notifier);
1375
1376int unregister_inetaddr_notifier(struct notifier_block *nb)
1377{
1378        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1379}
1380EXPORT_SYMBOL(unregister_inetaddr_notifier);
1381
1382int register_inetaddr_validator_notifier(struct notifier_block *nb)
1383{
1384        return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1385}
1386EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1387
1388int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1389{
1390        return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1391            nb);
1392}
1393EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1394
1395/* Rename ifa_labels for a device name change. Make some effort to preserve
1396 * existing alias numbering and to create unique labels if possible.
1397*/
1398static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1399{
1400        struct in_ifaddr *ifa;
1401        int named = 0;
1402
1403        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1404                char old[IFNAMSIZ], *dot;
1405
1406                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1407                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1408                if (named++ == 0)
1409                        goto skip;
1410                dot = strchr(old, ':');
1411                if (!dot) {
1412                        sprintf(old, ":%d", named);
1413                        dot = old;
1414                }
1415                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1416                        strcat(ifa->ifa_label, dot);
1417                else
1418                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1419skip:
1420                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1421        }
1422}
1423
1424static bool inetdev_valid_mtu(unsigned int mtu)
1425{
1426        return mtu >= IPV4_MIN_MTU;
1427}
1428
1429static void inetdev_send_gratuitous_arp(struct net_device *dev,
1430                                        struct in_device *in_dev)
1431
1432{
1433        struct in_ifaddr *ifa;
1434
1435        for (ifa = in_dev->ifa_list; ifa;
1436             ifa = ifa->ifa_next) {
1437                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1438                         ifa->ifa_local, dev,
1439                         ifa->ifa_local, NULL,
1440                         dev->dev_addr, NULL);
1441        }
1442}
1443
1444/* Called only under RTNL semaphore */
1445
1446static int inetdev_event(struct notifier_block *this, unsigned long event,
1447                         void *ptr)
1448{
1449        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1450        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1451
1452        ASSERT_RTNL();
1453
1454        if (!in_dev) {
1455                if (event == NETDEV_REGISTER) {
1456                        in_dev = inetdev_init(dev);
1457                        if (IS_ERR(in_dev))
1458                                return notifier_from_errno(PTR_ERR(in_dev));
1459                        if (dev->flags & IFF_LOOPBACK) {
1460                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1461                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1462                        }
1463                } else if (event == NETDEV_CHANGEMTU) {
1464                        /* Re-enabling IP */
1465                        if (inetdev_valid_mtu(dev->mtu))
1466                                in_dev = inetdev_init(dev);
1467                }
1468                goto out;
1469        }
1470
1471        switch (event) {
1472        case NETDEV_REGISTER:
1473                pr_debug("%s: bug\n", __func__);
1474                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1475                break;
1476        case NETDEV_UP:
1477                if (!inetdev_valid_mtu(dev->mtu))
1478                        break;
1479                if (dev->flags & IFF_LOOPBACK) {
1480                        struct in_ifaddr *ifa = inet_alloc_ifa();
1481
1482                        if (ifa) {
1483                                INIT_HLIST_NODE(&ifa->hash);
1484                                ifa->ifa_local =
1485                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1486                                ifa->ifa_prefixlen = 8;
1487                                ifa->ifa_mask = inet_make_mask(8);
1488                                in_dev_hold(in_dev);
1489                                ifa->ifa_dev = in_dev;
1490                                ifa->ifa_scope = RT_SCOPE_HOST;
1491                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492                                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1493                                                 INFINITY_LIFE_TIME);
1494                                ipv4_devconf_setall(in_dev);
1495                                neigh_parms_data_state_setall(in_dev->arp_parms);
1496                                inet_insert_ifa(ifa);
1497                        }
1498                }
1499                ip_mc_up(in_dev);
1500                /* fall through */
1501        case NETDEV_CHANGEADDR:
1502                if (!IN_DEV_ARP_NOTIFY(in_dev))
1503                        break;
1504                /* fall through */
1505        case NETDEV_NOTIFY_PEERS:
1506                /* Send gratuitous ARP to notify of link change */
1507                inetdev_send_gratuitous_arp(dev, in_dev);
1508                break;
1509        case NETDEV_DOWN:
1510                ip_mc_down(in_dev);
1511                break;
1512        case NETDEV_PRE_TYPE_CHANGE:
1513                ip_mc_unmap(in_dev);
1514                break;
1515        case NETDEV_POST_TYPE_CHANGE:
1516                ip_mc_remap(in_dev);
1517                break;
1518        case NETDEV_CHANGEMTU:
1519                if (inetdev_valid_mtu(dev->mtu))
1520                        break;
1521                /* disable IP when MTU is not enough */
1522                /* fall through */
1523        case NETDEV_UNREGISTER:
1524                inetdev_destroy(in_dev);
1525                break;
1526        case NETDEV_CHANGENAME:
1527                /* Do not notify about label change, this event is
1528                 * not interesting to applications using netlink.
1529                 */
1530                inetdev_changename(dev, in_dev);
1531
1532                devinet_sysctl_unregister(in_dev);
1533                devinet_sysctl_register(in_dev);
1534                break;
1535        }
1536out:
1537        return NOTIFY_DONE;
1538}
1539
1540static struct notifier_block ip_netdev_notifier = {
1541        .notifier_call = inetdev_event,
1542};
1543
1544static size_t inet_nlmsg_size(void)
1545{
1546        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1547               + nla_total_size(4) /* IFA_ADDRESS */
1548               + nla_total_size(4) /* IFA_LOCAL */
1549               + nla_total_size(4) /* IFA_BROADCAST */
1550               + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1551               + nla_total_size(4)  /* IFA_FLAGS */
1552               + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1553}
1554
1555static inline u32 cstamp_delta(unsigned long cstamp)
1556{
1557        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1558}
1559
1560static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1561                         unsigned long tstamp, u32 preferred, u32 valid)
1562{
1563        struct ifa_cacheinfo ci;
1564
1565        ci.cstamp = cstamp_delta(cstamp);
1566        ci.tstamp = cstamp_delta(tstamp);
1567        ci.ifa_prefered = preferred;
1568        ci.ifa_valid = valid;
1569
1570        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1571}
1572
1573static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1574                            u32 portid, u32 seq, int event, unsigned int flags)
1575{
1576        struct ifaddrmsg *ifm;
1577        struct nlmsghdr  *nlh;
1578        u32 preferred, valid;
1579
1580        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1581        if (!nlh)
1582                return -EMSGSIZE;
1583
1584        ifm = nlmsg_data(nlh);
1585        ifm->ifa_family = AF_INET;
1586        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1587        ifm->ifa_flags = ifa->ifa_flags;
1588        ifm->ifa_scope = ifa->ifa_scope;
1589        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1590
1591        if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1592                preferred = ifa->ifa_preferred_lft;
1593                valid = ifa->ifa_valid_lft;
1594                if (preferred != INFINITY_LIFE_TIME) {
1595                        long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1596
1597                        if (preferred > tval)
1598                                preferred -= tval;
1599                        else
1600                                preferred = 0;
1601                        if (valid != INFINITY_LIFE_TIME) {
1602                                if (valid > tval)
1603                                        valid -= tval;
1604                                else
1605                                        valid = 0;
1606                        }
1607                }
1608        } else {
1609                preferred = INFINITY_LIFE_TIME;
1610                valid = INFINITY_LIFE_TIME;
1611        }
1612        if ((ifa->ifa_address &&
1613             nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1614            (ifa->ifa_local &&
1615             nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1616            (ifa->ifa_broadcast &&
1617             nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1618            (ifa->ifa_label[0] &&
1619             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1620            nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1621            put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1622                          preferred, valid))
1623                goto nla_put_failure;
1624
1625        nlmsg_end(skb, nlh);
1626        return 0;
1627
1628nla_put_failure:
1629        nlmsg_cancel(skb, nlh);
1630        return -EMSGSIZE;
1631}
1632
1633static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1634{
1635        struct net *net = sock_net(skb->sk);
1636        int h, s_h;
1637        int idx, s_idx;
1638        int ip_idx, s_ip_idx;
1639        struct net_device *dev;
1640        struct in_device *in_dev;
1641        struct in_ifaddr *ifa;
1642        struct hlist_head *head;
1643
1644        s_h = cb->args[0];
1645        s_idx = idx = cb->args[1];
1646        s_ip_idx = ip_idx = cb->args[2];
1647
1648        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1649                idx = 0;
1650                head = &net->dev_index_head[h];
1651                rcu_read_lock();
1652                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1653                          net->dev_base_seq;
1654                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1655                        if (idx < s_idx)
1656                                goto cont;
1657                        if (h > s_h || idx > s_idx)
1658                                s_ip_idx = 0;
1659                        in_dev = __in_dev_get_rcu(dev);
1660                        if (!in_dev)
1661                                goto cont;
1662
1663                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1664                             ifa = ifa->ifa_next, ip_idx++) {
1665                                if (ip_idx < s_ip_idx)
1666                                        continue;
1667                                if (inet_fill_ifaddr(skb, ifa,
1668                                             NETLINK_CB(cb->skb).portid,
1669                                             cb->nlh->nlmsg_seq,
1670                                             RTM_NEWADDR, NLM_F_MULTI) < 0) {
1671                                        rcu_read_unlock();
1672                                        goto done;
1673                                }
1674                                nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1675                        }
1676cont:
1677                        idx++;
1678                }
1679                rcu_read_unlock();
1680        }
1681
1682done:
1683        cb->args[0] = h;
1684        cb->args[1] = idx;
1685        cb->args[2] = ip_idx;
1686
1687        return skb->len;
1688}
1689
1690static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1691                      u32 portid)
1692{
1693        struct sk_buff *skb;
1694        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1695        int err = -ENOBUFS;
1696        struct net *net;
1697
1698        net = dev_net(ifa->ifa_dev->dev);
1699        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1700        if (!skb)
1701                goto errout;
1702
1703        err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1704        if (err < 0) {
1705                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1706                WARN_ON(err == -EMSGSIZE);
1707                kfree_skb(skb);
1708                goto errout;
1709        }
1710        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1711        return;
1712errout:
1713        if (err < 0)
1714                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1715}
1716
1717static size_t inet_get_link_af_size(const struct net_device *dev,
1718                                    u32 ext_filter_mask)
1719{
1720        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1721
1722        if (!in_dev)
1723                return 0;
1724
1725        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1726}
1727
1728static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1729                             u32 ext_filter_mask)
1730{
1731        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1732        struct nlattr *nla;
1733        int i;
1734
1735        if (!in_dev)
1736                return -ENODATA;
1737
1738        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1739        if (!nla)
1740                return -EMSGSIZE;
1741
1742        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1743                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1744
1745        return 0;
1746}
1747
1748static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1749        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1750};
1751
1752static int inet_validate_link_af(const struct net_device *dev,
1753                                 const struct nlattr *nla)
1754{
1755        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1756        int err, rem;
1757
1758        if (dev && !__in_dev_get_rcu(dev))
1759                return -EAFNOSUPPORT;
1760
1761        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1762        if (err < 0)
1763                return err;
1764
1765        if (tb[IFLA_INET_CONF]) {
1766                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1767                        int cfgid = nla_type(a);
1768
1769                        if (nla_len(a) < 4)
1770                                return -EINVAL;
1771
1772                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1773                                return -EINVAL;
1774                }
1775        }
1776
1777        return 0;
1778}
1779
1780static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1781{
1782        struct in_device *in_dev = __in_dev_get_rcu(dev);
1783        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1784        int rem;
1785
1786        if (!in_dev)
1787                return -EAFNOSUPPORT;
1788
1789        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1790                BUG();
1791
1792        if (tb[IFLA_INET_CONF]) {
1793                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1794                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1795        }
1796
1797        return 0;
1798}
1799
1800static int inet_netconf_msgsize_devconf(int type)
1801{
1802        int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1803                   + nla_total_size(4); /* NETCONFA_IFINDEX */
1804        bool all = false;
1805
1806        if (type == NETCONFA_ALL)
1807                all = true;
1808
1809        if (all || type == NETCONFA_FORWARDING)
1810                size += nla_total_size(4);
1811        if (all || type == NETCONFA_RP_FILTER)
1812                size += nla_total_size(4);
1813        if (all || type == NETCONFA_MC_FORWARDING)
1814                size += nla_total_size(4);
1815        if (all || type == NETCONFA_PROXY_NEIGH)
1816                size += nla_total_size(4);
1817        if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1818                size += nla_total_size(4);
1819
1820        return size;
1821}
1822
1823static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1824                                     struct ipv4_devconf *devconf, u32 portid,
1825                                     u32 seq, int event, unsigned int flags,
1826                                     int type)
1827{
1828        struct nlmsghdr  *nlh;
1829        struct netconfmsg *ncm;
1830        bool all = false;
1831
1832        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1833                        flags);
1834        if (!nlh)
1835                return -EMSGSIZE;
1836
1837        if (type == NETCONFA_ALL)
1838                all = true;
1839
1840        ncm = nlmsg_data(nlh);
1841        ncm->ncm_family = AF_INET;
1842
1843        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1844                goto nla_put_failure;
1845
1846        if (!devconf)
1847                goto out;
1848
1849        if ((all || type == NETCONFA_FORWARDING) &&
1850            nla_put_s32(skb, NETCONFA_FORWARDING,
1851                        IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1852                goto nla_put_failure;
1853        if ((all || type == NETCONFA_RP_FILTER) &&
1854            nla_put_s32(skb, NETCONFA_RP_FILTER,
1855                        IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1856                goto nla_put_failure;
1857        if ((all || type == NETCONFA_MC_FORWARDING) &&
1858            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1859                        IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1860                goto nla_put_failure;
1861        if ((all || type == NETCONFA_PROXY_NEIGH) &&
1862            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1863                        IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1864                goto nla_put_failure;
1865        if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1866            nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1867                        IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1868                goto nla_put_failure;
1869
1870out:
1871        nlmsg_end(skb, nlh);
1872        return 0;
1873
1874nla_put_failure:
1875        nlmsg_cancel(skb, nlh);
1876        return -EMSGSIZE;
1877}
1878
1879void inet_netconf_notify_devconf(struct net *net, int event, int type,
1880                                 int ifindex, struct ipv4_devconf *devconf)
1881{
1882        struct sk_buff *skb;
1883        int err = -ENOBUFS;
1884
1885        skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1886        if (!skb)
1887                goto errout;
1888
1889        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1890                                        event, 0, type);
1891        if (err < 0) {
1892                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1893                WARN_ON(err == -EMSGSIZE);
1894                kfree_skb(skb);
1895                goto errout;
1896        }
1897        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1898        return;
1899errout:
1900        if (err < 0)
1901                rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1902}
1903
1904static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1905        [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1906        [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1907        [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1908        [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1909        [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1910};
1911
1912static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1913                                    struct nlmsghdr *nlh,
1914                                    struct netlink_ext_ack *extack)
1915{
1916        struct net *net = sock_net(in_skb->sk);
1917        struct nlattr *tb[NETCONFA_MAX+1];
1918        struct netconfmsg *ncm;
1919        struct sk_buff *skb;
1920        struct ipv4_devconf *devconf;
1921        struct in_device *in_dev;
1922        struct net_device *dev;
1923        int ifindex;
1924        int err;
1925
1926        err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1927                          devconf_ipv4_policy, extack);
1928        if (err < 0)
1929                goto errout;
1930
1931        err = -EINVAL;
1932        if (!tb[NETCONFA_IFINDEX])
1933                goto errout;
1934
1935        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1936        switch (ifindex) {
1937        case NETCONFA_IFINDEX_ALL:
1938                devconf = net->ipv4.devconf_all;
1939                break;
1940        case NETCONFA_IFINDEX_DEFAULT:
1941                devconf = net->ipv4.devconf_dflt;
1942                break;
1943        default:
1944                dev = __dev_get_by_index(net, ifindex);
1945                if (!dev)
1946                        goto errout;
1947                in_dev = __in_dev_get_rtnl(dev);
1948                if (!in_dev)
1949                        goto errout;
1950                devconf = &in_dev->cnf;
1951                break;
1952        }
1953
1954        err = -ENOBUFS;
1955        skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1956        if (!skb)
1957                goto errout;
1958
1959        err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1960                                        NETLINK_CB(in_skb).portid,
1961                                        nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1962                                        NETCONFA_ALL);
1963        if (err < 0) {
1964                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1965                WARN_ON(err == -EMSGSIZE);
1966                kfree_skb(skb);
1967                goto errout;
1968        }
1969        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1970errout:
1971        return err;
1972}
1973
1974static int inet_netconf_dump_devconf(struct sk_buff *skb,
1975                                     struct netlink_callback *cb)
1976{
1977        struct net *net = sock_net(skb->sk);
1978        int h, s_h;
1979        int idx, s_idx;
1980        struct net_device *dev;
1981        struct in_device *in_dev;
1982        struct hlist_head *head;
1983
1984        s_h = cb->args[0];
1985        s_idx = idx = cb->args[1];
1986
1987        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1988                idx = 0;
1989                head = &net->dev_index_head[h];
1990                rcu_read_lock();
1991                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1992                          net->dev_base_seq;
1993                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1994                        if (idx < s_idx)
1995                                goto cont;
1996                        in_dev = __in_dev_get_rcu(dev);
1997                        if (!in_dev)
1998                                goto cont;
1999
2000                        if (inet_netconf_fill_devconf(skb, dev->ifindex,
2001                                                      &in_dev->cnf,
2002                                                      NETLINK_CB(cb->skb).portid,
2003                                                      cb->nlh->nlmsg_seq,
2004                                                      RTM_NEWNETCONF,
2005                                                      NLM_F_MULTI,
2006                                                      NETCONFA_ALL) < 0) {
2007                                rcu_read_unlock();
2008                                goto done;
2009                        }
2010                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2011cont:
2012                        idx++;
2013                }
2014                rcu_read_unlock();
2015        }
2016        if (h == NETDEV_HASHENTRIES) {
2017                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2018                                              net->ipv4.devconf_all,
2019                                              NETLINK_CB(cb->skb).portid,
2020                                              cb->nlh->nlmsg_seq,
2021                                              RTM_NEWNETCONF, NLM_F_MULTI,
2022                                              NETCONFA_ALL) < 0)
2023                        goto done;
2024                else
2025                        h++;
2026        }
2027        if (h == NETDEV_HASHENTRIES + 1) {
2028                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2029                                              net->ipv4.devconf_dflt,
2030                                              NETLINK_CB(cb->skb).portid,
2031                                              cb->nlh->nlmsg_seq,
2032                                              RTM_NEWNETCONF, NLM_F_MULTI,
2033                                              NETCONFA_ALL) < 0)
2034                        goto done;
2035                else
2036                        h++;
2037        }
2038done:
2039        cb->args[0] = h;
2040        cb->args[1] = idx;
2041
2042        return skb->len;
2043}
2044
2045#ifdef CONFIG_SYSCTL
2046
2047static void devinet_copy_dflt_conf(struct net *net, int i)
2048{
2049        struct net_device *dev;
2050
2051        rcu_read_lock();
2052        for_each_netdev_rcu(net, dev) {
2053                struct in_device *in_dev;
2054
2055                in_dev = __in_dev_get_rcu(dev);
2056                if (in_dev && !test_bit(i, in_dev->cnf.state))
2057                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2058        }
2059        rcu_read_unlock();
2060}
2061
2062/* called with RTNL locked */
2063static void inet_forward_change(struct net *net)
2064{
2065        struct net_device *dev;
2066        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2067
2068        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2069        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2070        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2071                                    NETCONFA_FORWARDING,
2072                                    NETCONFA_IFINDEX_ALL,
2073                                    net->ipv4.devconf_all);
2074        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2075                                    NETCONFA_FORWARDING,
2076                                    NETCONFA_IFINDEX_DEFAULT,
2077                                    net->ipv4.devconf_dflt);
2078
2079        for_each_netdev(net, dev) {
2080                struct in_device *in_dev;
2081
2082                if (on)
2083                        dev_disable_lro(dev);
2084
2085                in_dev = __in_dev_get_rtnl(dev);
2086                if (in_dev) {
2087                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2088                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2089                                                    NETCONFA_FORWARDING,
2090                                                    dev->ifindex, &in_dev->cnf);
2091                }
2092        }
2093}
2094
2095static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2096{
2097        if (cnf == net->ipv4.devconf_dflt)
2098                return NETCONFA_IFINDEX_DEFAULT;
2099        else if (cnf == net->ipv4.devconf_all)
2100                return NETCONFA_IFINDEX_ALL;
2101        else {
2102                struct in_device *idev
2103                        = container_of(cnf, struct in_device, cnf);
2104                return idev->dev->ifindex;
2105        }
2106}
2107
2108static int devinet_conf_proc(struct ctl_table *ctl, int write,
2109                             void __user *buffer,
2110                             size_t *lenp, loff_t *ppos)
2111{
2112        int old_value = *(int *)ctl->data;
2113        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2114        int new_value = *(int *)ctl->data;
2115
2116        if (write) {
2117                struct ipv4_devconf *cnf = ctl->extra1;
2118                struct net *net = ctl->extra2;
2119                int i = (int *)ctl->data - cnf->data;
2120                int ifindex;
2121
2122                set_bit(i, cnf->state);
2123
2124                if (cnf == net->ipv4.devconf_dflt)
2125                        devinet_copy_dflt_conf(net, i);
2126                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2127                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2128                        if ((new_value == 0) && (old_value != 0))
2129                                rt_cache_flush(net);
2130
2131                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2132                    new_value != old_value) {
2133                        ifindex = devinet_conf_ifindex(net, cnf);
2134                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2135                                                    NETCONFA_RP_FILTER,
2136                                                    ifindex, cnf);
2137                }
2138                if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2139                    new_value != old_value) {
2140                        ifindex = devinet_conf_ifindex(net, cnf);
2141                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2142                                                    NETCONFA_PROXY_NEIGH,
2143                                                    ifindex, cnf);
2144                }
2145                if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2146                    new_value != old_value) {
2147                        ifindex = devinet_conf_ifindex(net, cnf);
2148                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2149                                                    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2150                                                    ifindex, cnf);
2151                }
2152        }
2153
2154        return ret;
2155}
2156
2157static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2158                                  void __user *buffer,
2159                                  size_t *lenp, loff_t *ppos)
2160{
2161        int *valp = ctl->data;
2162        int val = *valp;
2163        loff_t pos = *ppos;
2164        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2165
2166        if (write && *valp != val) {
2167                struct net *net = ctl->extra2;
2168
2169                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2170                        if (!rtnl_trylock()) {
2171                                /* Restore the original values before restarting */
2172                                *valp = val;
2173                                *ppos = pos;
2174                                return restart_syscall();
2175                        }
2176                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2177                                inet_forward_change(net);
2178                        } else {
2179                                struct ipv4_devconf *cnf = ctl->extra1;
2180                                struct in_device *idev =
2181                                        container_of(cnf, struct in_device, cnf);
2182                                if (*valp)
2183                                        dev_disable_lro(idev->dev);
2184                                inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2185                                                            NETCONFA_FORWARDING,
2186                                                            idev->dev->ifindex,
2187                                                            cnf);
2188                        }
2189                        rtnl_unlock();
2190                        rt_cache_flush(net);
2191                } else
2192                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2193                                                    NETCONFA_FORWARDING,
2194                                                    NETCONFA_IFINDEX_DEFAULT,
2195                                                    net->ipv4.devconf_dflt);
2196        }
2197
2198        return ret;
2199}
2200
2201static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2202                                void __user *buffer,
2203                                size_t *lenp, loff_t *ppos)
2204{
2205        int *valp = ctl->data;
2206        int val = *valp;
2207        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2208        struct net *net = ctl->extra2;
2209
2210        if (write && *valp != val)
2211                rt_cache_flush(net);
2212
2213        return ret;
2214}
2215
2216#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2217        { \
2218                .procname       = name, \
2219                .data           = ipv4_devconf.data + \
2220                                  IPV4_DEVCONF_ ## attr - 1, \
2221                .maxlen         = sizeof(int), \
2222                .mode           = mval, \
2223                .proc_handler   = proc, \
2224                .extra1         = &ipv4_devconf, \
2225        }
2226
2227#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2228        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2229
2230#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2231        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2232
2233#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2234        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2235
2236#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2237        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2238
2239static struct devinet_sysctl_table {
2240        struct ctl_table_header *sysctl_header;
2241        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2242} devinet_sysctl = {
2243        .devinet_vars = {
2244                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2245                                             devinet_sysctl_forward),
2246                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2247
2248                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2249                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2250                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2251                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2252                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2253                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2254                                        "accept_source_route"),
2255                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2256                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2257                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2258                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2259                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2260                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2261                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2262                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2263                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2264                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2265                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2266                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2267                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2268                DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2269                                        "force_igmp_version"),
2270                DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2271                                        "igmpv2_unsolicited_report_interval"),
2272                DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2273                                        "igmpv3_unsolicited_report_interval"),
2274                DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2275                                        "ignore_routes_with_linkdown"),
2276                DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2277                                        "drop_gratuitous_arp"),
2278
2279                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2280                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2281                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2282                                              "promote_secondaries"),
2283                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2284                                              "route_localnet"),
2285                DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2286                                              "drop_unicast_in_l2_multicast"),
2287        },
2288};
2289
2290static int __devinet_sysctl_register(struct net *net, char *dev_name,
2291                                     int ifindex, struct ipv4_devconf *p)
2292{
2293        int i;
2294        struct devinet_sysctl_table *t;
2295        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2296
2297        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2298        if (!t)
2299                goto out;
2300
2301        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2302                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2303                t->devinet_vars[i].extra1 = p;
2304                t->devinet_vars[i].extra2 = net;
2305        }
2306
2307        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2308
2309        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2310        if (!t->sysctl_header)
2311                goto free;
2312
2313        p->sysctl = t;
2314
2315        inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2316                                    ifindex, p);
2317        return 0;
2318
2319free:
2320        kfree(t);
2321out:
2322        return -ENOBUFS;
2323}
2324
2325static void __devinet_sysctl_unregister(struct net *net,
2326                                        struct ipv4_devconf *cnf, int ifindex)
2327{
2328        struct devinet_sysctl_table *t = cnf->sysctl;
2329
2330        if (t) {
2331                cnf->sysctl = NULL;
2332                unregister_net_sysctl_table(t->sysctl_header);
2333                kfree(t);
2334        }
2335
2336        inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2337}
2338
2339static int devinet_sysctl_register(struct in_device *idev)
2340{
2341        int err;
2342
2343        if (!sysctl_dev_name_is_allowed(idev->dev->name))
2344                return -EINVAL;
2345
2346        err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2347        if (err)
2348                return err;
2349        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2350                                        idev->dev->ifindex, &idev->cnf);
2351        if (err)
2352                neigh_sysctl_unregister(idev->arp_parms);
2353        return err;
2354}
2355
2356static void devinet_sysctl_unregister(struct in_device *idev)
2357{
2358        struct net *net = dev_net(idev->dev);
2359
2360        __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2361        neigh_sysctl_unregister(idev->arp_parms);
2362}
2363
2364static struct ctl_table ctl_forward_entry[] = {
2365        {
2366                .procname       = "ip_forward",
2367                .data           = &ipv4_devconf.data[
2368                                        IPV4_DEVCONF_FORWARDING - 1],
2369                .maxlen         = sizeof(int),
2370                .mode           = 0644,
2371                .proc_handler   = devinet_sysctl_forward,
2372                .extra1         = &ipv4_devconf,
2373                .extra2         = &init_net,
2374        },
2375        { },
2376};
2377#endif
2378
2379static __net_init int devinet_init_net(struct net *net)
2380{
2381        int err;
2382        struct ipv4_devconf *all, *dflt;
2383#ifdef CONFIG_SYSCTL
2384        struct ctl_table *tbl = ctl_forward_entry;
2385        struct ctl_table_header *forw_hdr;
2386#endif
2387
2388        err = -ENOMEM;
2389        all = &ipv4_devconf;
2390        dflt = &ipv4_devconf_dflt;
2391
2392        if (!net_eq(net, &init_net)) {
2393                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2394                if (!all)
2395                        goto err_alloc_all;
2396
2397                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2398                if (!dflt)
2399                        goto err_alloc_dflt;
2400
2401#ifdef CONFIG_SYSCTL
2402                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2403                if (!tbl)
2404                        goto err_alloc_ctl;
2405
2406                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2407                tbl[0].extra1 = all;
2408                tbl[0].extra2 = net;
2409#endif
2410        }
2411
2412#ifdef CONFIG_SYSCTL
2413        err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2414        if (err < 0)
2415                goto err_reg_all;
2416
2417        err = __devinet_sysctl_register(net, "default",
2418                                        NETCONFA_IFINDEX_DEFAULT, dflt);
2419        if (err < 0)
2420                goto err_reg_dflt;
2421
2422        err = -ENOMEM;
2423        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2424        if (!forw_hdr)
2425                goto err_reg_ctl;
2426        net->ipv4.forw_hdr = forw_hdr;
2427#endif
2428
2429        net->ipv4.devconf_all = all;
2430        net->ipv4.devconf_dflt = dflt;
2431        return 0;
2432
2433#ifdef CONFIG_SYSCTL
2434err_reg_ctl:
2435        __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2436err_reg_dflt:
2437        __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2438err_reg_all:
2439        if (tbl != ctl_forward_entry)
2440                kfree(tbl);
2441err_alloc_ctl:
2442#endif
2443        if (dflt != &ipv4_devconf_dflt)
2444                kfree(dflt);
2445err_alloc_dflt:
2446        if (all != &ipv4_devconf)
2447                kfree(all);
2448err_alloc_all:
2449        return err;
2450}
2451
2452static __net_exit void devinet_exit_net(struct net *net)
2453{
2454#ifdef CONFIG_SYSCTL
2455        struct ctl_table *tbl;
2456
2457        tbl = net->ipv4.forw_hdr->ctl_table_arg;
2458        unregister_net_sysctl_table(net->ipv4.forw_hdr);
2459        __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2460                                    NETCONFA_IFINDEX_DEFAULT);
2461        __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2462                                    NETCONFA_IFINDEX_ALL);
2463        kfree(tbl);
2464#endif
2465        kfree(net->ipv4.devconf_dflt);
2466        kfree(net->ipv4.devconf_all);
2467}
2468
2469static __net_initdata struct pernet_operations devinet_ops = {
2470        .init = devinet_init_net,
2471        .exit = devinet_exit_net,
2472};
2473
2474static struct rtnl_af_ops inet_af_ops __read_mostly = {
2475        .family           = AF_INET,
2476        .fill_link_af     = inet_fill_link_af,
2477        .get_link_af_size = inet_get_link_af_size,
2478        .validate_link_af = inet_validate_link_af,
2479        .set_link_af      = inet_set_link_af,
2480};
2481
2482void __init devinet_init(void)
2483{
2484        int i;
2485
2486        for (i = 0; i < IN4_ADDR_HSIZE; i++)
2487                INIT_HLIST_HEAD(&inet_addr_lst[i]);
2488
2489        register_pernet_subsys(&devinet_ops);
2490
2491        register_gifconf(PF_INET, inet_gifconf);
2492        register_netdevice_notifier(&ip_netdev_notifier);
2493
2494        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2495
2496        rtnl_af_register(&inet_af_ops);
2497
2498        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2499        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2500        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2501        rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2502                      inet_netconf_dump_devconf, 0);
2503}
2504