linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <linux/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/sched/signal.h>
  36#include <linux/string.h>
  37#include <linux/mm.h>
  38#include <linux/socket.h>
  39#include <linux/sockios.h>
  40#include <linux/in.h>
  41#include <linux/errno.h>
  42#include <linux/interrupt.h>
  43#include <linux/if_addr.h>
  44#include <linux/if_ether.h>
  45#include <linux/inet.h>
  46#include <linux/netdevice.h>
  47#include <linux/etherdevice.h>
  48#include <linux/skbuff.h>
  49#include <linux/init.h>
  50#include <linux/notifier.h>
  51#include <linux/inetdevice.h>
  52#include <linux/igmp.h>
  53#include <linux/slab.h>
  54#include <linux/hash.h>
  55#ifdef CONFIG_SYSCTL
  56#include <linux/sysctl.h>
  57#endif
  58#include <linux/kmod.h>
  59#include <linux/netconf.h>
  60
  61#include <net/arp.h>
  62#include <net/ip.h>
  63#include <net/route.h>
  64#include <net/ip_fib.h>
  65#include <net/rtnetlink.h>
  66#include <net/net_namespace.h>
  67#include <net/addrconf.h>
  68
  69static struct ipv4_devconf ipv4_devconf = {
  70        .data = {
  71                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  72                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  74                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  75                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  76                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  77        },
  78};
  79
  80static struct ipv4_devconf ipv4_devconf_dflt = {
  81        .data = {
  82                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  83                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  84                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  85                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  86                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  87                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  88                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  89        },
  90};
  91
  92#define IPV4_DEVCONF_DFLT(net, attr) \
  93        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  94
  95static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  96        [IFA_LOCAL]             = { .type = NLA_U32 },
  97        [IFA_ADDRESS]           = { .type = NLA_U32 },
  98        [IFA_BROADCAST]         = { .type = NLA_U32 },
  99        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 100        [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
 101        [IFA_FLAGS]             = { .type = NLA_U32 },
 102        [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
 103};
 104
 105#define IN4_ADDR_HSIZE_SHIFT    8
 106#define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
 107
 108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 109
 110static u32 inet_addr_hash(const struct net *net, __be32 addr)
 111{
 112        u32 val = (__force u32) addr ^ net_hash_mix(net);
 113
 114        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 115}
 116
 117static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 118{
 119        u32 hash = inet_addr_hash(net, ifa->ifa_local);
 120
 121        ASSERT_RTNL();
 122        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 123}
 124
 125static void inet_hash_remove(struct in_ifaddr *ifa)
 126{
 127        ASSERT_RTNL();
 128        hlist_del_init_rcu(&ifa->hash);
 129}
 130
 131/**
 132 * __ip_dev_find - find the first device with a given source address.
 133 * @net: the net namespace
 134 * @addr: the source address
 135 * @devref: if true, take a reference on the found device
 136 *
 137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 138 */
 139struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 140{
 141        struct net_device *result = NULL;
 142        struct in_ifaddr *ifa;
 143
 144        rcu_read_lock();
 145        ifa = inet_lookup_ifaddr_rcu(net, addr);
 146        if (!ifa) {
 147                struct flowi4 fl4 = { .daddr = addr };
 148                struct fib_result res = { 0 };
 149                struct fib_table *local;
 150
 151                /* Fallback to FIB local table so that communication
 152                 * over loopback subnets work.
 153                 */
 154                local = fib_get_table(net, RT_TABLE_LOCAL);
 155                if (local &&
 156                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 157                    res.type == RTN_LOCAL)
 158                        result = FIB_RES_DEV(res);
 159        } else {
 160                result = ifa->ifa_dev->dev;
 161        }
 162        if (result && devref)
 163                dev_hold(result);
 164        rcu_read_unlock();
 165        return result;
 166}
 167EXPORT_SYMBOL(__ip_dev_find);
 168
 169/* called under RCU lock */
 170struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
 171{
 172        u32 hash = inet_addr_hash(net, addr);
 173        struct in_ifaddr *ifa;
 174
 175        hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
 176                if (ifa->ifa_local == addr &&
 177                    net_eq(dev_net(ifa->ifa_dev->dev), net))
 178                        return ifa;
 179
 180        return NULL;
 181}
 182
 183static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 184
 185static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 186static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
 187static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 188                         int destroy);
 189#ifdef CONFIG_SYSCTL
 190static int devinet_sysctl_register(struct in_device *idev);
 191static void devinet_sysctl_unregister(struct in_device *idev);
 192#else
 193static int devinet_sysctl_register(struct in_device *idev)
 194{
 195        return 0;
 196}
 197static void devinet_sysctl_unregister(struct in_device *idev)
 198{
 199}
 200#endif
 201
 202/* Locks all the inet devices. */
 203
 204static struct in_ifaddr *inet_alloc_ifa(void)
 205{
 206        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 207}
 208
 209static void inet_rcu_free_ifa(struct rcu_head *head)
 210{
 211        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 212        if (ifa->ifa_dev)
 213                in_dev_put(ifa->ifa_dev);
 214        kfree(ifa);
 215}
 216
 217static void inet_free_ifa(struct in_ifaddr *ifa)
 218{
 219        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 220}
 221
 222void in_dev_finish_destroy(struct in_device *idev)
 223{
 224        struct net_device *dev = idev->dev;
 225
 226        WARN_ON(idev->ifa_list);
 227        WARN_ON(idev->mc_list);
 228        kfree(rcu_dereference_protected(idev->mc_hash, 1));
 229#ifdef NET_REFCNT_DEBUG
 230        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 231#endif
 232        dev_put(dev);
 233        if (!idev->dead)
 234                pr_err("Freeing alive in_device %p\n", idev);
 235        else
 236                kfree(idev);
 237}
 238EXPORT_SYMBOL(in_dev_finish_destroy);
 239
 240static struct in_device *inetdev_init(struct net_device *dev)
 241{
 242        struct in_device *in_dev;
 243        int err = -ENOMEM;
 244
 245        ASSERT_RTNL();
 246
 247        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 248        if (!in_dev)
 249                goto out;
 250        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 251                        sizeof(in_dev->cnf));
 252        in_dev->cnf.sysctl = NULL;
 253        in_dev->dev = dev;
 254        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 255        if (!in_dev->arp_parms)
 256                goto out_kfree;
 257        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 258                dev_disable_lro(dev);
 259        /* Reference in_dev->dev */
 260        dev_hold(dev);
 261        /* Account for reference dev->ip_ptr (below) */
 262        refcount_set(&in_dev->refcnt, 1);
 263
 264        err = devinet_sysctl_register(in_dev);
 265        if (err) {
 266                in_dev->dead = 1;
 267                in_dev_put(in_dev);
 268                in_dev = NULL;
 269                goto out;
 270        }
 271        ip_mc_init_dev(in_dev);
 272        if (dev->flags & IFF_UP)
 273                ip_mc_up(in_dev);
 274
 275        /* we can receive as soon as ip_ptr is set -- do this last */
 276        rcu_assign_pointer(dev->ip_ptr, in_dev);
 277out:
 278        return in_dev ?: ERR_PTR(err);
 279out_kfree:
 280        kfree(in_dev);
 281        in_dev = NULL;
 282        goto out;
 283}
 284
 285static void in_dev_rcu_put(struct rcu_head *head)
 286{
 287        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 288        in_dev_put(idev);
 289}
 290
 291static void inetdev_destroy(struct in_device *in_dev)
 292{
 293        struct in_ifaddr *ifa;
 294        struct net_device *dev;
 295
 296        ASSERT_RTNL();
 297
 298        dev = in_dev->dev;
 299
 300        in_dev->dead = 1;
 301
 302        ip_mc_destroy_dev(in_dev);
 303
 304        while ((ifa = in_dev->ifa_list) != NULL) {
 305                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 306                inet_free_ifa(ifa);
 307        }
 308
 309        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 310
 311        devinet_sysctl_unregister(in_dev);
 312        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 313        arp_ifdown(dev);
 314
 315        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 316}
 317
 318int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 319{
 320        rcu_read_lock();
 321        for_primary_ifa(in_dev) {
 322                if (inet_ifa_match(a, ifa)) {
 323                        if (!b || inet_ifa_match(b, ifa)) {
 324                                rcu_read_unlock();
 325                                return 1;
 326                        }
 327                }
 328        } endfor_ifa(in_dev);
 329        rcu_read_unlock();
 330        return 0;
 331}
 332
 333static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 334                         int destroy, struct nlmsghdr *nlh, u32 portid)
 335{
 336        struct in_ifaddr *promote = NULL;
 337        struct in_ifaddr *ifa, *ifa1 = *ifap;
 338        struct in_ifaddr *last_prim = in_dev->ifa_list;
 339        struct in_ifaddr *prev_prom = NULL;
 340        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 341
 342        ASSERT_RTNL();
 343
 344        if (in_dev->dead)
 345                goto no_promotions;
 346
 347        /* 1. Deleting primary ifaddr forces deletion all secondaries
 348         * unless alias promotion is set
 349         **/
 350
 351        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 352                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 353
 354                while ((ifa = *ifap1) != NULL) {
 355                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 356                            ifa1->ifa_scope <= ifa->ifa_scope)
 357                                last_prim = ifa;
 358
 359                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 360                            ifa1->ifa_mask != ifa->ifa_mask ||
 361                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 362                                ifap1 = &ifa->ifa_next;
 363                                prev_prom = ifa;
 364                                continue;
 365                        }
 366
 367                        if (!do_promote) {
 368                                inet_hash_remove(ifa);
 369                                *ifap1 = ifa->ifa_next;
 370
 371                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 372                                blocking_notifier_call_chain(&inetaddr_chain,
 373                                                NETDEV_DOWN, ifa);
 374                                inet_free_ifa(ifa);
 375                        } else {
 376                                promote = ifa;
 377                                break;
 378                        }
 379                }
 380        }
 381
 382        /* On promotion all secondaries from subnet are changing
 383         * the primary IP, we must remove all their routes silently
 384         * and later to add them back with new prefsrc. Do this
 385         * while all addresses are on the device list.
 386         */
 387        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 388                if (ifa1->ifa_mask == ifa->ifa_mask &&
 389                    inet_ifa_match(ifa1->ifa_address, ifa))
 390                        fib_del_ifaddr(ifa, ifa1);
 391        }
 392
 393no_promotions:
 394        /* 2. Unlink it */
 395
 396        *ifap = ifa1->ifa_next;
 397        inet_hash_remove(ifa1);
 398
 399        /* 3. Announce address deletion */
 400
 401        /* Send message first, then call notifier.
 402           At first sight, FIB update triggered by notifier
 403           will refer to already deleted ifaddr, that could confuse
 404           netlink listeners. It is not true: look, gated sees
 405           that route deleted and if it still thinks that ifaddr
 406           is valid, it will try to restore deleted routes... Grr.
 407           So that, this order is correct.
 408         */
 409        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 410        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 411
 412        if (promote) {
 413                struct in_ifaddr *next_sec = promote->ifa_next;
 414
 415                if (prev_prom) {
 416                        prev_prom->ifa_next = promote->ifa_next;
 417                        promote->ifa_next = last_prim->ifa_next;
 418                        last_prim->ifa_next = promote;
 419                }
 420
 421                promote->ifa_flags &= ~IFA_F_SECONDARY;
 422                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 423                blocking_notifier_call_chain(&inetaddr_chain,
 424                                NETDEV_UP, promote);
 425                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 426                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 427                            !inet_ifa_match(ifa1->ifa_address, ifa))
 428                                        continue;
 429                        fib_add_ifaddr(ifa);
 430                }
 431
 432        }
 433        if (destroy)
 434                inet_free_ifa(ifa1);
 435}
 436
 437static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 438                         int destroy)
 439{
 440        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 441}
 442
 443static void check_lifetime(struct work_struct *work);
 444
 445static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 446
 447static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 448                             u32 portid, struct netlink_ext_ack *extack)
 449{
 450        struct in_device *in_dev = ifa->ifa_dev;
 451        struct in_ifaddr *ifa1, **ifap, **last_primary;
 452        struct in_validator_info ivi;
 453        int ret;
 454
 455        ASSERT_RTNL();
 456
 457        if (!ifa->ifa_local) {
 458                inet_free_ifa(ifa);
 459                return 0;
 460        }
 461
 462        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 463        last_primary = &in_dev->ifa_list;
 464
 465        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 466             ifap = &ifa1->ifa_next) {
 467                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 468                    ifa->ifa_scope <= ifa1->ifa_scope)
 469                        last_primary = &ifa1->ifa_next;
 470                if (ifa1->ifa_mask == ifa->ifa_mask &&
 471                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 472                        if (ifa1->ifa_local == ifa->ifa_local) {
 473                                inet_free_ifa(ifa);
 474                                return -EEXIST;
 475                        }
 476                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 477                                inet_free_ifa(ifa);
 478                                return -EINVAL;
 479                        }
 480                        ifa->ifa_flags |= IFA_F_SECONDARY;
 481                }
 482        }
 483
 484        /* Allow any devices that wish to register ifaddr validtors to weigh
 485         * in now, before changes are committed.  The rntl lock is serializing
 486         * access here, so the state should not change between a validator call
 487         * and a final notify on commit.  This isn't invoked on promotion under
 488         * the assumption that validators are checking the address itself, and
 489         * not the flags.
 490         */
 491        ivi.ivi_addr = ifa->ifa_address;
 492        ivi.ivi_dev = ifa->ifa_dev;
 493        ivi.extack = extack;
 494        ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
 495                                           NETDEV_UP, &ivi);
 496        ret = notifier_to_errno(ret);
 497        if (ret) {
 498                inet_free_ifa(ifa);
 499                return ret;
 500        }
 501
 502        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 503                prandom_seed((__force u32) ifa->ifa_local);
 504                ifap = last_primary;
 505        }
 506
 507        ifa->ifa_next = *ifap;
 508        *ifap = ifa;
 509
 510        inet_hash_insert(dev_net(in_dev->dev), ifa);
 511
 512        cancel_delayed_work(&check_lifetime_work);
 513        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
 514
 515        /* Send message first, then call notifier.
 516           Notifier will trigger FIB update, so that
 517           listeners of netlink will know about new ifaddr */
 518        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 519        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 520
 521        return 0;
 522}
 523
 524static int inet_insert_ifa(struct in_ifaddr *ifa)
 525{
 526        return __inet_insert_ifa(ifa, NULL, 0, NULL);
 527}
 528
 529static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 530{
 531        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 532
 533        ASSERT_RTNL();
 534
 535        if (!in_dev) {
 536                inet_free_ifa(ifa);
 537                return -ENOBUFS;
 538        }
 539        ipv4_devconf_setall(in_dev);
 540        neigh_parms_data_state_setall(in_dev->arp_parms);
 541        if (ifa->ifa_dev != in_dev) {
 542                WARN_ON(ifa->ifa_dev);
 543                in_dev_hold(in_dev);
 544                ifa->ifa_dev = in_dev;
 545        }
 546        if (ipv4_is_loopback(ifa->ifa_local))
 547                ifa->ifa_scope = RT_SCOPE_HOST;
 548        return inet_insert_ifa(ifa);
 549}
 550
 551/* Caller must hold RCU or RTNL :
 552 * We dont take a reference on found in_device
 553 */
 554struct in_device *inetdev_by_index(struct net *net, int ifindex)
 555{
 556        struct net_device *dev;
 557        struct in_device *in_dev = NULL;
 558
 559        rcu_read_lock();
 560        dev = dev_get_by_index_rcu(net, ifindex);
 561        if (dev)
 562                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 563        rcu_read_unlock();
 564        return in_dev;
 565}
 566EXPORT_SYMBOL(inetdev_by_index);
 567
 568/* Called only from RTNL semaphored context. No locks. */
 569
 570struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 571                                    __be32 mask)
 572{
 573        ASSERT_RTNL();
 574
 575        for_primary_ifa(in_dev) {
 576                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 577                        return ifa;
 578        } endfor_ifa(in_dev);
 579        return NULL;
 580}
 581
 582static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
 583{
 584        struct ip_mreqn mreq = {
 585                .imr_multiaddr.s_addr = ifa->ifa_address,
 586                .imr_ifindex = ifa->ifa_dev->dev->ifindex,
 587        };
 588        int ret;
 589
 590        ASSERT_RTNL();
 591
 592        lock_sock(sk);
 593        if (join)
 594                ret = ip_mc_join_group(sk, &mreq);
 595        else
 596                ret = ip_mc_leave_group(sk, &mreq);
 597        release_sock(sk);
 598
 599        return ret;
 600}
 601
 602static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 603                            struct netlink_ext_ack *extack)
 604{
 605        struct net *net = sock_net(skb->sk);
 606        struct nlattr *tb[IFA_MAX+1];
 607        struct in_device *in_dev;
 608        struct ifaddrmsg *ifm;
 609        struct in_ifaddr *ifa, **ifap;
 610        int err = -EINVAL;
 611
 612        ASSERT_RTNL();
 613
 614        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
 615                          extack);
 616        if (err < 0)
 617                goto errout;
 618
 619        ifm = nlmsg_data(nlh);
 620        in_dev = inetdev_by_index(net, ifm->ifa_index);
 621        if (!in_dev) {
 622                err = -ENODEV;
 623                goto errout;
 624        }
 625
 626        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 627             ifap = &ifa->ifa_next) {
 628                if (tb[IFA_LOCAL] &&
 629                    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
 630                        continue;
 631
 632                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 633                        continue;
 634
 635                if (tb[IFA_ADDRESS] &&
 636                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 637                    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
 638                        continue;
 639
 640                if (ipv4_is_multicast(ifa->ifa_address))
 641                        ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 642                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 643                return 0;
 644        }
 645
 646        err = -EADDRNOTAVAIL;
 647errout:
 648        return err;
 649}
 650
 651#define INFINITY_LIFE_TIME      0xFFFFFFFF
 652
 653static void check_lifetime(struct work_struct *work)
 654{
 655        unsigned long now, next, next_sec, next_sched;
 656        struct in_ifaddr *ifa;
 657        struct hlist_node *n;
 658        int i;
 659
 660        now = jiffies;
 661        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 662
 663        for (i = 0; i < IN4_ADDR_HSIZE; i++) {
 664                bool change_needed = false;
 665
 666                rcu_read_lock();
 667                hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 668                        unsigned long age;
 669
 670                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 671                                continue;
 672
 673                        /* We try to batch several events at once. */
 674                        age = (now - ifa->ifa_tstamp +
 675                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 676
 677                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 678                            age >= ifa->ifa_valid_lft) {
 679                                change_needed = true;
 680                        } else if (ifa->ifa_preferred_lft ==
 681                                   INFINITY_LIFE_TIME) {
 682                                continue;
 683                        } else if (age >= ifa->ifa_preferred_lft) {
 684                                if (time_before(ifa->ifa_tstamp +
 685                                                ifa->ifa_valid_lft * HZ, next))
 686                                        next = ifa->ifa_tstamp +
 687                                               ifa->ifa_valid_lft * HZ;
 688
 689                                if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
 690                                        change_needed = true;
 691                        } else if (time_before(ifa->ifa_tstamp +
 692                                               ifa->ifa_preferred_lft * HZ,
 693                                               next)) {
 694                                next = ifa->ifa_tstamp +
 695                                       ifa->ifa_preferred_lft * HZ;
 696                        }
 697                }
 698                rcu_read_unlock();
 699                if (!change_needed)
 700                        continue;
 701                rtnl_lock();
 702                hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
 703                        unsigned long age;
 704
 705                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 706                                continue;
 707
 708                        /* We try to batch several events at once. */
 709                        age = (now - ifa->ifa_tstamp +
 710                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 711
 712                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 713                            age >= ifa->ifa_valid_lft) {
 714                                struct in_ifaddr **ifap;
 715
 716                                for (ifap = &ifa->ifa_dev->ifa_list;
 717                                     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
 718                                        if (*ifap == ifa) {
 719                                                inet_del_ifa(ifa->ifa_dev,
 720                                                             ifap, 1);
 721                                                break;
 722                                        }
 723                                }
 724                        } else if (ifa->ifa_preferred_lft !=
 725                                   INFINITY_LIFE_TIME &&
 726                                   age >= ifa->ifa_preferred_lft &&
 727                                   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
 728                                ifa->ifa_flags |= IFA_F_DEPRECATED;
 729                                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 730                        }
 731                }
 732                rtnl_unlock();
 733        }
 734
 735        next_sec = round_jiffies_up(next);
 736        next_sched = next;
 737
 738        /* If rounded timeout is accurate enough, accept it. */
 739        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
 740                next_sched = next_sec;
 741
 742        now = jiffies;
 743        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
 744        if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
 745                next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
 746
 747        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
 748                        next_sched - now);
 749}
 750
 751static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
 752                             __u32 prefered_lft)
 753{
 754        unsigned long timeout;
 755
 756        ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
 757
 758        timeout = addrconf_timeout_fixup(valid_lft, HZ);
 759        if (addrconf_finite_timeout(timeout))
 760                ifa->ifa_valid_lft = timeout;
 761        else
 762                ifa->ifa_flags |= IFA_F_PERMANENT;
 763
 764        timeout = addrconf_timeout_fixup(prefered_lft, HZ);
 765        if (addrconf_finite_timeout(timeout)) {
 766                if (timeout == 0)
 767                        ifa->ifa_flags |= IFA_F_DEPRECATED;
 768                ifa->ifa_preferred_lft = timeout;
 769        }
 770        ifa->ifa_tstamp = jiffies;
 771        if (!ifa->ifa_cstamp)
 772                ifa->ifa_cstamp = ifa->ifa_tstamp;
 773}
 774
 775static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 776                                       __u32 *pvalid_lft, __u32 *pprefered_lft)
 777{
 778        struct nlattr *tb[IFA_MAX+1];
 779        struct in_ifaddr *ifa;
 780        struct ifaddrmsg *ifm;
 781        struct net_device *dev;
 782        struct in_device *in_dev;
 783        int err;
 784
 785        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
 786                          NULL);
 787        if (err < 0)
 788                goto errout;
 789
 790        ifm = nlmsg_data(nlh);
 791        err = -EINVAL;
 792        if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
 793                goto errout;
 794
 795        dev = __dev_get_by_index(net, ifm->ifa_index);
 796        err = -ENODEV;
 797        if (!dev)
 798                goto errout;
 799
 800        in_dev = __in_dev_get_rtnl(dev);
 801        err = -ENOBUFS;
 802        if (!in_dev)
 803                goto errout;
 804
 805        ifa = inet_alloc_ifa();
 806        if (!ifa)
 807                /*
 808                 * A potential indev allocation can be left alive, it stays
 809                 * assigned to its device and is destroy with it.
 810                 */
 811                goto errout;
 812
 813        ipv4_devconf_setall(in_dev);
 814        neigh_parms_data_state_setall(in_dev->arp_parms);
 815        in_dev_hold(in_dev);
 816
 817        if (!tb[IFA_ADDRESS])
 818                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 819
 820        INIT_HLIST_NODE(&ifa->hash);
 821        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 822        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 823        ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
 824                                         ifm->ifa_flags;
 825        ifa->ifa_scope = ifm->ifa_scope;
 826        ifa->ifa_dev = in_dev;
 827
 828        ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
 829        ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
 830
 831        if (tb[IFA_BROADCAST])
 832                ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
 833
 834        if (tb[IFA_LABEL])
 835                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 836        else
 837                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 838
 839        if (tb[IFA_RT_PRIORITY])
 840                ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
 841
 842        if (tb[IFA_CACHEINFO]) {
 843                struct ifa_cacheinfo *ci;
 844
 845                ci = nla_data(tb[IFA_CACHEINFO]);
 846                if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 847                        err = -EINVAL;
 848                        goto errout_free;
 849                }
 850                *pvalid_lft = ci->ifa_valid;
 851                *pprefered_lft = ci->ifa_prefered;
 852        }
 853
 854        return ifa;
 855
 856errout_free:
 857        inet_free_ifa(ifa);
 858errout:
 859        return ERR_PTR(err);
 860}
 861
 862static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 863{
 864        struct in_device *in_dev = ifa->ifa_dev;
 865        struct in_ifaddr *ifa1, **ifap;
 866
 867        if (!ifa->ifa_local)
 868                return NULL;
 869
 870        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 871             ifap = &ifa1->ifa_next) {
 872                if (ifa1->ifa_mask == ifa->ifa_mask &&
 873                    inet_ifa_match(ifa1->ifa_address, ifa) &&
 874                    ifa1->ifa_local == ifa->ifa_local)
 875                        return ifa1;
 876        }
 877        return NULL;
 878}
 879
 880static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 881                            struct netlink_ext_ack *extack)
 882{
 883        struct net *net = sock_net(skb->sk);
 884        struct in_ifaddr *ifa;
 885        struct in_ifaddr *ifa_existing;
 886        __u32 valid_lft = INFINITY_LIFE_TIME;
 887        __u32 prefered_lft = INFINITY_LIFE_TIME;
 888
 889        ASSERT_RTNL();
 890
 891        ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 892        if (IS_ERR(ifa))
 893                return PTR_ERR(ifa);
 894
 895        ifa_existing = find_matching_ifa(ifa);
 896        if (!ifa_existing) {
 897                /* It would be best to check for !NLM_F_CREATE here but
 898                 * userspace already relies on not having to provide this.
 899                 */
 900                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 901                if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
 902                        int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
 903                                               true, ifa);
 904
 905                        if (ret < 0) {
 906                                inet_free_ifa(ifa);
 907                                return ret;
 908                        }
 909                }
 910                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
 911                                         extack);
 912        } else {
 913                u32 new_metric = ifa->ifa_rt_priority;
 914
 915                inet_free_ifa(ifa);
 916
 917                if (nlh->nlmsg_flags & NLM_F_EXCL ||
 918                    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 919                        return -EEXIST;
 920                ifa = ifa_existing;
 921
 922                if (ifa->ifa_rt_priority != new_metric) {
 923                        fib_modify_prefix_metric(ifa, new_metric);
 924                        ifa->ifa_rt_priority = new_metric;
 925                }
 926
 927                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 928                cancel_delayed_work(&check_lifetime_work);
 929                queue_delayed_work(system_power_efficient_wq,
 930                                &check_lifetime_work, 0);
 931                rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
 932        }
 933        return 0;
 934}
 935
 936/*
 937 *      Determine a default network mask, based on the IP address.
 938 */
 939
 940static int inet_abc_len(__be32 addr)
 941{
 942        int rc = -1;    /* Something else, probably a multicast. */
 943
 944        if (ipv4_is_zeronet(addr))
 945                rc = 0;
 946        else {
 947                __u32 haddr = ntohl(addr);
 948
 949                if (IN_CLASSA(haddr))
 950                        rc = 8;
 951                else if (IN_CLASSB(haddr))
 952                        rc = 16;
 953                else if (IN_CLASSC(haddr))
 954                        rc = 24;
 955        }
 956
 957        return rc;
 958}
 959
 960
 961int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
 962{
 963        struct sockaddr_in sin_orig;
 964        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
 965        struct in_device *in_dev;
 966        struct in_ifaddr **ifap = NULL;
 967        struct in_ifaddr *ifa = NULL;
 968        struct net_device *dev;
 969        char *colon;
 970        int ret = -EFAULT;
 971        int tryaddrmatch = 0;
 972
 973        ifr->ifr_name[IFNAMSIZ - 1] = 0;
 974
 975        /* save original address for comparison */
 976        memcpy(&sin_orig, sin, sizeof(*sin));
 977
 978        colon = strchr(ifr->ifr_name, ':');
 979        if (colon)
 980                *colon = 0;
 981
 982        dev_load(net, ifr->ifr_name);
 983
 984        switch (cmd) {
 985        case SIOCGIFADDR:       /* Get interface address */
 986        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 987        case SIOCGIFDSTADDR:    /* Get the destination address */
 988        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 989                /* Note that these ioctls will not sleep,
 990                   so that we do not impose a lock.
 991                   One day we will be forced to put shlock here (I mean SMP)
 992                 */
 993                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 994                memset(sin, 0, sizeof(*sin));
 995                sin->sin_family = AF_INET;
 996                break;
 997
 998        case SIOCSIFFLAGS:
 999                ret = -EPERM;
1000                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1001                        goto out;
1002                break;
1003        case SIOCSIFADDR:       /* Set interface address (and family) */
1004        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1005        case SIOCSIFDSTADDR:    /* Set the destination address */
1006        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1007                ret = -EPERM;
1008                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1009                        goto out;
1010                ret = -EINVAL;
1011                if (sin->sin_family != AF_INET)
1012                        goto out;
1013                break;
1014        default:
1015                ret = -EINVAL;
1016                goto out;
1017        }
1018
1019        rtnl_lock();
1020
1021        ret = -ENODEV;
1022        dev = __dev_get_by_name(net, ifr->ifr_name);
1023        if (!dev)
1024                goto done;
1025
1026        if (colon)
1027                *colon = ':';
1028
1029        in_dev = __in_dev_get_rtnl(dev);
1030        if (in_dev) {
1031                if (tryaddrmatch) {
1032                        /* Matthias Andree */
1033                        /* compare label and address (4.4BSD style) */
1034                        /* note: we only do this for a limited set of ioctls
1035                           and only if the original address family was AF_INET.
1036                           This is checked above. */
1037                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038                             ifap = &ifa->ifa_next) {
1039                                if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1040                                    sin_orig.sin_addr.s_addr ==
1041                                                        ifa->ifa_local) {
1042                                        break; /* found */
1043                                }
1044                        }
1045                }
1046                /* we didn't get a match, maybe the application is
1047                   4.3BSD-style and passed in junk so we fall back to
1048                   comparing just the label */
1049                if (!ifa) {
1050                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1051                             ifap = &ifa->ifa_next)
1052                                if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1053                                        break;
1054                }
1055        }
1056
1057        ret = -EADDRNOTAVAIL;
1058        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1059                goto done;
1060
1061        switch (cmd) {
1062        case SIOCGIFADDR:       /* Get interface address */
1063                ret = 0;
1064                sin->sin_addr.s_addr = ifa->ifa_local;
1065                break;
1066
1067        case SIOCGIFBRDADDR:    /* Get the broadcast address */
1068                ret = 0;
1069                sin->sin_addr.s_addr = ifa->ifa_broadcast;
1070                break;
1071
1072        case SIOCGIFDSTADDR:    /* Get the destination address */
1073                ret = 0;
1074                sin->sin_addr.s_addr = ifa->ifa_address;
1075                break;
1076
1077        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1078                ret = 0;
1079                sin->sin_addr.s_addr = ifa->ifa_mask;
1080                break;
1081
1082        case SIOCSIFFLAGS:
1083                if (colon) {
1084                        ret = -EADDRNOTAVAIL;
1085                        if (!ifa)
1086                                break;
1087                        ret = 0;
1088                        if (!(ifr->ifr_flags & IFF_UP))
1089                                inet_del_ifa(in_dev, ifap, 1);
1090                        break;
1091                }
1092                ret = dev_change_flags(dev, ifr->ifr_flags);
1093                break;
1094
1095        case SIOCSIFADDR:       /* Set interface address (and family) */
1096                ret = -EINVAL;
1097                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1098                        break;
1099
1100                if (!ifa) {
1101                        ret = -ENOBUFS;
1102                        ifa = inet_alloc_ifa();
1103                        if (!ifa)
1104                                break;
1105                        INIT_HLIST_NODE(&ifa->hash);
1106                        if (colon)
1107                                memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1108                        else
1109                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1110                } else {
1111                        ret = 0;
1112                        if (ifa->ifa_local == sin->sin_addr.s_addr)
1113                                break;
1114                        inet_del_ifa(in_dev, ifap, 0);
1115                        ifa->ifa_broadcast = 0;
1116                        ifa->ifa_scope = 0;
1117                }
1118
1119                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1120
1121                if (!(dev->flags & IFF_POINTOPOINT)) {
1122                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1123                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1124                        if ((dev->flags & IFF_BROADCAST) &&
1125                            ifa->ifa_prefixlen < 31)
1126                                ifa->ifa_broadcast = ifa->ifa_address |
1127                                                     ~ifa->ifa_mask;
1128                } else {
1129                        ifa->ifa_prefixlen = 32;
1130                        ifa->ifa_mask = inet_make_mask(32);
1131                }
1132                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1133                ret = inet_set_ifa(dev, ifa);
1134                break;
1135
1136        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1137                ret = 0;
1138                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1139                        inet_del_ifa(in_dev, ifap, 0);
1140                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
1141                        inet_insert_ifa(ifa);
1142                }
1143                break;
1144
1145        case SIOCSIFDSTADDR:    /* Set the destination address */
1146                ret = 0;
1147                if (ifa->ifa_address == sin->sin_addr.s_addr)
1148                        break;
1149                ret = -EINVAL;
1150                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1151                        break;
1152                ret = 0;
1153                inet_del_ifa(in_dev, ifap, 0);
1154                ifa->ifa_address = sin->sin_addr.s_addr;
1155                inet_insert_ifa(ifa);
1156                break;
1157
1158        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1159
1160                /*
1161                 *      The mask we set must be legal.
1162                 */
1163                ret = -EINVAL;
1164                if (bad_mask(sin->sin_addr.s_addr, 0))
1165                        break;
1166                ret = 0;
1167                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1168                        __be32 old_mask = ifa->ifa_mask;
1169                        inet_del_ifa(in_dev, ifap, 0);
1170                        ifa->ifa_mask = sin->sin_addr.s_addr;
1171                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1172
1173                        /* See if current broadcast address matches
1174                         * with current netmask, then recalculate
1175                         * the broadcast address. Otherwise it's a
1176                         * funny address, so don't touch it since
1177                         * the user seems to know what (s)he's doing...
1178                         */
1179                        if ((dev->flags & IFF_BROADCAST) &&
1180                            (ifa->ifa_prefixlen < 31) &&
1181                            (ifa->ifa_broadcast ==
1182                             (ifa->ifa_local|~old_mask))) {
1183                                ifa->ifa_broadcast = (ifa->ifa_local |
1184                                                      ~sin->sin_addr.s_addr);
1185                        }
1186                        inet_insert_ifa(ifa);
1187                }
1188                break;
1189        }
1190done:
1191        rtnl_unlock();
1192out:
1193        return ret;
1194}
1195
1196static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1197{
1198        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1199        struct in_ifaddr *ifa;
1200        struct ifreq ifr;
1201        int done = 0;
1202
1203        if (WARN_ON(size > sizeof(struct ifreq)))
1204                goto out;
1205
1206        if (!in_dev)
1207                goto out;
1208
1209        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1210                if (!buf) {
1211                        done += size;
1212                        continue;
1213                }
1214                if (len < size)
1215                        break;
1216                memset(&ifr, 0, sizeof(struct ifreq));
1217                strcpy(ifr.ifr_name, ifa->ifa_label);
1218
1219                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1220                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1221                                                                ifa->ifa_local;
1222
1223                if (copy_to_user(buf + done, &ifr, size)) {
1224                        done = -EFAULT;
1225                        break;
1226                }
1227                len  -= size;
1228                done += size;
1229        }
1230out:
1231        return done;
1232}
1233
1234static __be32 in_dev_select_addr(const struct in_device *in_dev,
1235                                 int scope)
1236{
1237        for_primary_ifa(in_dev) {
1238                if (ifa->ifa_scope != RT_SCOPE_LINK &&
1239                    ifa->ifa_scope <= scope)
1240                        return ifa->ifa_local;
1241        } endfor_ifa(in_dev);
1242
1243        return 0;
1244}
1245
1246__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1247{
1248        __be32 addr = 0;
1249        struct in_device *in_dev;
1250        struct net *net = dev_net(dev);
1251        int master_idx;
1252
1253        rcu_read_lock();
1254        in_dev = __in_dev_get_rcu(dev);
1255        if (!in_dev)
1256                goto no_in_dev;
1257
1258        for_primary_ifa(in_dev) {
1259                if (ifa->ifa_scope > scope)
1260                        continue;
1261                if (!dst || inet_ifa_match(dst, ifa)) {
1262                        addr = ifa->ifa_local;
1263                        break;
1264                }
1265                if (!addr)
1266                        addr = ifa->ifa_local;
1267        } endfor_ifa(in_dev);
1268
1269        if (addr)
1270                goto out_unlock;
1271no_in_dev:
1272        master_idx = l3mdev_master_ifindex_rcu(dev);
1273
1274        /* For VRFs, the VRF device takes the place of the loopback device,
1275         * with addresses on it being preferred.  Note in such cases the
1276         * loopback device will be among the devices that fail the master_idx
1277         * equality check in the loop below.
1278         */
1279        if (master_idx &&
1280            (dev = dev_get_by_index_rcu(net, master_idx)) &&
1281            (in_dev = __in_dev_get_rcu(dev))) {
1282                addr = in_dev_select_addr(in_dev, scope);
1283                if (addr)
1284                        goto out_unlock;
1285        }
1286
1287        /* Not loopback addresses on loopback should be preferred
1288           in this case. It is important that lo is the first interface
1289           in dev_base list.
1290         */
1291        for_each_netdev_rcu(net, dev) {
1292                if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1293                        continue;
1294
1295                in_dev = __in_dev_get_rcu(dev);
1296                if (!in_dev)
1297                        continue;
1298
1299                addr = in_dev_select_addr(in_dev, scope);
1300                if (addr)
1301                        goto out_unlock;
1302        }
1303out_unlock:
1304        rcu_read_unlock();
1305        return addr;
1306}
1307EXPORT_SYMBOL(inet_select_addr);
1308
1309static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1310                              __be32 local, int scope)
1311{
1312        int same = 0;
1313        __be32 addr = 0;
1314
1315        for_ifa(in_dev) {
1316                if (!addr &&
1317                    (local == ifa->ifa_local || !local) &&
1318                    ifa->ifa_scope <= scope) {
1319                        addr = ifa->ifa_local;
1320                        if (same)
1321                                break;
1322                }
1323                if (!same) {
1324                        same = (!local || inet_ifa_match(local, ifa)) &&
1325                                (!dst || inet_ifa_match(dst, ifa));
1326                        if (same && addr) {
1327                                if (local || !dst)
1328                                        break;
1329                                /* Is the selected addr into dst subnet? */
1330                                if (inet_ifa_match(addr, ifa))
1331                                        break;
1332                                /* No, then can we use new local src? */
1333                                if (ifa->ifa_scope <= scope) {
1334                                        addr = ifa->ifa_local;
1335                                        break;
1336                                }
1337                                /* search for large dst subnet for addr */
1338                                same = 0;
1339                        }
1340                }
1341        } endfor_ifa(in_dev);
1342
1343        return same ? addr : 0;
1344}
1345
1346/*
1347 * Confirm that local IP address exists using wildcards:
1348 * - net: netns to check, cannot be NULL
1349 * - in_dev: only on this interface, NULL=any interface
1350 * - dst: only in the same subnet as dst, 0=any dst
1351 * - local: address, 0=autoselect the local address
1352 * - scope: maximum allowed scope value for the local address
1353 */
1354__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1355                         __be32 dst, __be32 local, int scope)
1356{
1357        __be32 addr = 0;
1358        struct net_device *dev;
1359
1360        if (in_dev)
1361                return confirm_addr_indev(in_dev, dst, local, scope);
1362
1363        rcu_read_lock();
1364        for_each_netdev_rcu(net, dev) {
1365                in_dev = __in_dev_get_rcu(dev);
1366                if (in_dev) {
1367                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1368                        if (addr)
1369                                break;
1370                }
1371        }
1372        rcu_read_unlock();
1373
1374        return addr;
1375}
1376EXPORT_SYMBOL(inet_confirm_addr);
1377
1378/*
1379 *      Device notifier
1380 */
1381
1382int register_inetaddr_notifier(struct notifier_block *nb)
1383{
1384        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1385}
1386EXPORT_SYMBOL(register_inetaddr_notifier);
1387
1388int unregister_inetaddr_notifier(struct notifier_block *nb)
1389{
1390        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1391}
1392EXPORT_SYMBOL(unregister_inetaddr_notifier);
1393
1394int register_inetaddr_validator_notifier(struct notifier_block *nb)
1395{
1396        return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1397}
1398EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1399
1400int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1401{
1402        return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1403            nb);
1404}
1405EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1406
1407/* Rename ifa_labels for a device name change. Make some effort to preserve
1408 * existing alias numbering and to create unique labels if possible.
1409*/
1410static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1411{
1412        struct in_ifaddr *ifa;
1413        int named = 0;
1414
1415        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1416                char old[IFNAMSIZ], *dot;
1417
1418                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1419                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1420                if (named++ == 0)
1421                        goto skip;
1422                dot = strchr(old, ':');
1423                if (!dot) {
1424                        sprintf(old, ":%d", named);
1425                        dot = old;
1426                }
1427                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1428                        strcat(ifa->ifa_label, dot);
1429                else
1430                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1431skip:
1432                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1433        }
1434}
1435
1436static bool inetdev_valid_mtu(unsigned int mtu)
1437{
1438        return mtu >= IPV4_MIN_MTU;
1439}
1440
1441static void inetdev_send_gratuitous_arp(struct net_device *dev,
1442                                        struct in_device *in_dev)
1443
1444{
1445        struct in_ifaddr *ifa;
1446
1447        for (ifa = in_dev->ifa_list; ifa;
1448             ifa = ifa->ifa_next) {
1449                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1450                         ifa->ifa_local, dev,
1451                         ifa->ifa_local, NULL,
1452                         dev->dev_addr, NULL);
1453        }
1454}
1455
1456/* Called only under RTNL semaphore */
1457
1458static int inetdev_event(struct notifier_block *this, unsigned long event,
1459                         void *ptr)
1460{
1461        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1462        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1463
1464        ASSERT_RTNL();
1465
1466        if (!in_dev) {
1467                if (event == NETDEV_REGISTER) {
1468                        in_dev = inetdev_init(dev);
1469                        if (IS_ERR(in_dev))
1470                                return notifier_from_errno(PTR_ERR(in_dev));
1471                        if (dev->flags & IFF_LOOPBACK) {
1472                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1473                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1474                        }
1475                } else if (event == NETDEV_CHANGEMTU) {
1476                        /* Re-enabling IP */
1477                        if (inetdev_valid_mtu(dev->mtu))
1478                                in_dev = inetdev_init(dev);
1479                }
1480                goto out;
1481        }
1482
1483        switch (event) {
1484        case NETDEV_REGISTER:
1485                pr_debug("%s: bug\n", __func__);
1486                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1487                break;
1488        case NETDEV_UP:
1489                if (!inetdev_valid_mtu(dev->mtu))
1490                        break;
1491                if (dev->flags & IFF_LOOPBACK) {
1492                        struct in_ifaddr *ifa = inet_alloc_ifa();
1493
1494                        if (ifa) {
1495                                INIT_HLIST_NODE(&ifa->hash);
1496                                ifa->ifa_local =
1497                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1498                                ifa->ifa_prefixlen = 8;
1499                                ifa->ifa_mask = inet_make_mask(8);
1500                                in_dev_hold(in_dev);
1501                                ifa->ifa_dev = in_dev;
1502                                ifa->ifa_scope = RT_SCOPE_HOST;
1503                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1504                                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1505                                                 INFINITY_LIFE_TIME);
1506                                ipv4_devconf_setall(in_dev);
1507                                neigh_parms_data_state_setall(in_dev->arp_parms);
1508                                inet_insert_ifa(ifa);
1509                        }
1510                }
1511                ip_mc_up(in_dev);
1512                /* fall through */
1513        case NETDEV_CHANGEADDR:
1514                if (!IN_DEV_ARP_NOTIFY(in_dev))
1515                        break;
1516                /* fall through */
1517        case NETDEV_NOTIFY_PEERS:
1518                /* Send gratuitous ARP to notify of link change */
1519                inetdev_send_gratuitous_arp(dev, in_dev);
1520                break;
1521        case NETDEV_DOWN:
1522                ip_mc_down(in_dev);
1523                break;
1524        case NETDEV_PRE_TYPE_CHANGE:
1525                ip_mc_unmap(in_dev);
1526                break;
1527        case NETDEV_POST_TYPE_CHANGE:
1528                ip_mc_remap(in_dev);
1529                break;
1530        case NETDEV_CHANGEMTU:
1531                if (inetdev_valid_mtu(dev->mtu))
1532                        break;
1533                /* disable IP when MTU is not enough */
1534                /* fall through */
1535        case NETDEV_UNREGISTER:
1536                inetdev_destroy(in_dev);
1537                break;
1538        case NETDEV_CHANGENAME:
1539                /* Do not notify about label change, this event is
1540                 * not interesting to applications using netlink.
1541                 */
1542                inetdev_changename(dev, in_dev);
1543
1544                devinet_sysctl_unregister(in_dev);
1545                devinet_sysctl_register(in_dev);
1546                break;
1547        }
1548out:
1549        return NOTIFY_DONE;
1550}
1551
1552static struct notifier_block ip_netdev_notifier = {
1553        .notifier_call = inetdev_event,
1554};
1555
1556static size_t inet_nlmsg_size(void)
1557{
1558        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1559               + nla_total_size(4) /* IFA_ADDRESS */
1560               + nla_total_size(4) /* IFA_LOCAL */
1561               + nla_total_size(4) /* IFA_BROADCAST */
1562               + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1563               + nla_total_size(4)  /* IFA_FLAGS */
1564               + nla_total_size(4)  /* IFA_RT_PRIORITY */
1565               + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1566}
1567
1568static inline u32 cstamp_delta(unsigned long cstamp)
1569{
1570        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1571}
1572
1573static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1574                         unsigned long tstamp, u32 preferred, u32 valid)
1575{
1576        struct ifa_cacheinfo ci;
1577
1578        ci.cstamp = cstamp_delta(cstamp);
1579        ci.tstamp = cstamp_delta(tstamp);
1580        ci.ifa_prefered = preferred;
1581        ci.ifa_valid = valid;
1582
1583        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1584}
1585
1586static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1587                            u32 portid, u32 seq, int event, unsigned int flags)
1588{
1589        struct ifaddrmsg *ifm;
1590        struct nlmsghdr  *nlh;
1591        u32 preferred, valid;
1592
1593        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1594        if (!nlh)
1595                return -EMSGSIZE;
1596
1597        ifm = nlmsg_data(nlh);
1598        ifm->ifa_family = AF_INET;
1599        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1600        ifm->ifa_flags = ifa->ifa_flags;
1601        ifm->ifa_scope = ifa->ifa_scope;
1602        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1603
1604        if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1605                preferred = ifa->ifa_preferred_lft;
1606                valid = ifa->ifa_valid_lft;
1607                if (preferred != INFINITY_LIFE_TIME) {
1608                        long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1609
1610                        if (preferred > tval)
1611                                preferred -= tval;
1612                        else
1613                                preferred = 0;
1614                        if (valid != INFINITY_LIFE_TIME) {
1615                                if (valid > tval)
1616                                        valid -= tval;
1617                                else
1618                                        valid = 0;
1619                        }
1620                }
1621        } else {
1622                preferred = INFINITY_LIFE_TIME;
1623                valid = INFINITY_LIFE_TIME;
1624        }
1625        if ((ifa->ifa_address &&
1626             nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1627            (ifa->ifa_local &&
1628             nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1629            (ifa->ifa_broadcast &&
1630             nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1631            (ifa->ifa_label[0] &&
1632             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1633            nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1634            (ifa->ifa_rt_priority &&
1635             nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1636            put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1637                          preferred, valid))
1638                goto nla_put_failure;
1639
1640        nlmsg_end(skb, nlh);
1641        return 0;
1642
1643nla_put_failure:
1644        nlmsg_cancel(skb, nlh);
1645        return -EMSGSIZE;
1646}
1647
1648static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1649{
1650        struct net *net = sock_net(skb->sk);
1651        int h, s_h;
1652        int idx, s_idx;
1653        int ip_idx, s_ip_idx;
1654        struct net_device *dev;
1655        struct in_device *in_dev;
1656        struct in_ifaddr *ifa;
1657        struct hlist_head *head;
1658
1659        s_h = cb->args[0];
1660        s_idx = idx = cb->args[1];
1661        s_ip_idx = ip_idx = cb->args[2];
1662
1663        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1664                idx = 0;
1665                head = &net->dev_index_head[h];
1666                rcu_read_lock();
1667                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1668                          net->dev_base_seq;
1669                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1670                        if (idx < s_idx)
1671                                goto cont;
1672                        if (h > s_h || idx > s_idx)
1673                                s_ip_idx = 0;
1674                        in_dev = __in_dev_get_rcu(dev);
1675                        if (!in_dev)
1676                                goto cont;
1677
1678                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1679                             ifa = ifa->ifa_next, ip_idx++) {
1680                                if (ip_idx < s_ip_idx)
1681                                        continue;
1682                                if (inet_fill_ifaddr(skb, ifa,
1683                                             NETLINK_CB(cb->skb).portid,
1684                                             cb->nlh->nlmsg_seq,
1685                                             RTM_NEWADDR, NLM_F_MULTI) < 0) {
1686                                        rcu_read_unlock();
1687                                        goto done;
1688                                }
1689                                nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1690                        }
1691cont:
1692                        idx++;
1693                }
1694                rcu_read_unlock();
1695        }
1696
1697done:
1698        cb->args[0] = h;
1699        cb->args[1] = idx;
1700        cb->args[2] = ip_idx;
1701
1702        return skb->len;
1703}
1704
1705static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1706                      u32 portid)
1707{
1708        struct sk_buff *skb;
1709        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1710        int err = -ENOBUFS;
1711        struct net *net;
1712
1713        net = dev_net(ifa->ifa_dev->dev);
1714        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1715        if (!skb)
1716                goto errout;
1717
1718        err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1719        if (err < 0) {
1720                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1721                WARN_ON(err == -EMSGSIZE);
1722                kfree_skb(skb);
1723                goto errout;
1724        }
1725        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1726        return;
1727errout:
1728        if (err < 0)
1729                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1730}
1731
1732static size_t inet_get_link_af_size(const struct net_device *dev,
1733                                    u32 ext_filter_mask)
1734{
1735        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1736
1737        if (!in_dev)
1738                return 0;
1739
1740        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1741}
1742
1743static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1744                             u32 ext_filter_mask)
1745{
1746        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1747        struct nlattr *nla;
1748        int i;
1749
1750        if (!in_dev)
1751                return -ENODATA;
1752
1753        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1754        if (!nla)
1755                return -EMSGSIZE;
1756
1757        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1758                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1759
1760        return 0;
1761}
1762
1763static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1764        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1765};
1766
1767static int inet_validate_link_af(const struct net_device *dev,
1768                                 const struct nlattr *nla)
1769{
1770        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1771        int err, rem;
1772
1773        if (dev && !__in_dev_get_rcu(dev))
1774                return -EAFNOSUPPORT;
1775
1776        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1777        if (err < 0)
1778                return err;
1779
1780        if (tb[IFLA_INET_CONF]) {
1781                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1782                        int cfgid = nla_type(a);
1783
1784                        if (nla_len(a) < 4)
1785                                return -EINVAL;
1786
1787                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1788                                return -EINVAL;
1789                }
1790        }
1791
1792        return 0;
1793}
1794
1795static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1796{
1797        struct in_device *in_dev = __in_dev_get_rcu(dev);
1798        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1799        int rem;
1800
1801        if (!in_dev)
1802                return -EAFNOSUPPORT;
1803
1804        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1805                BUG();
1806
1807        if (tb[IFLA_INET_CONF]) {
1808                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1809                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1810        }
1811
1812        return 0;
1813}
1814
1815static int inet_netconf_msgsize_devconf(int type)
1816{
1817        int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1818                   + nla_total_size(4); /* NETCONFA_IFINDEX */
1819        bool all = false;
1820
1821        if (type == NETCONFA_ALL)
1822                all = true;
1823
1824        if (all || type == NETCONFA_FORWARDING)
1825                size += nla_total_size(4);
1826        if (all || type == NETCONFA_RP_FILTER)
1827                size += nla_total_size(4);
1828        if (all || type == NETCONFA_MC_FORWARDING)
1829                size += nla_total_size(4);
1830        if (all || type == NETCONFA_PROXY_NEIGH)
1831                size += nla_total_size(4);
1832        if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1833                size += nla_total_size(4);
1834
1835        return size;
1836}
1837
1838static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1839                                     struct ipv4_devconf *devconf, u32 portid,
1840                                     u32 seq, int event, unsigned int flags,
1841                                     int type)
1842{
1843        struct nlmsghdr  *nlh;
1844        struct netconfmsg *ncm;
1845        bool all = false;
1846
1847        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1848                        flags);
1849        if (!nlh)
1850                return -EMSGSIZE;
1851
1852        if (type == NETCONFA_ALL)
1853                all = true;
1854
1855        ncm = nlmsg_data(nlh);
1856        ncm->ncm_family = AF_INET;
1857
1858        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1859                goto nla_put_failure;
1860
1861        if (!devconf)
1862                goto out;
1863
1864        if ((all || type == NETCONFA_FORWARDING) &&
1865            nla_put_s32(skb, NETCONFA_FORWARDING,
1866                        IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1867                goto nla_put_failure;
1868        if ((all || type == NETCONFA_RP_FILTER) &&
1869            nla_put_s32(skb, NETCONFA_RP_FILTER,
1870                        IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1871                goto nla_put_failure;
1872        if ((all || type == NETCONFA_MC_FORWARDING) &&
1873            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1874                        IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1875                goto nla_put_failure;
1876        if ((all || type == NETCONFA_PROXY_NEIGH) &&
1877            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1878                        IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1879                goto nla_put_failure;
1880        if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1881            nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1882                        IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1883                goto nla_put_failure;
1884
1885out:
1886        nlmsg_end(skb, nlh);
1887        return 0;
1888
1889nla_put_failure:
1890        nlmsg_cancel(skb, nlh);
1891        return -EMSGSIZE;
1892}
1893
1894void inet_netconf_notify_devconf(struct net *net, int event, int type,
1895                                 int ifindex, struct ipv4_devconf *devconf)
1896{
1897        struct sk_buff *skb;
1898        int err = -ENOBUFS;
1899
1900        skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1901        if (!skb)
1902                goto errout;
1903
1904        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1905                                        event, 0, type);
1906        if (err < 0) {
1907                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1908                WARN_ON(err == -EMSGSIZE);
1909                kfree_skb(skb);
1910                goto errout;
1911        }
1912        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1913        return;
1914errout:
1915        if (err < 0)
1916                rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1917}
1918
1919static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1920        [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1921        [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1922        [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1923        [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1924        [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1925};
1926
1927static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1928                                    struct nlmsghdr *nlh,
1929                                    struct netlink_ext_ack *extack)
1930{
1931        struct net *net = sock_net(in_skb->sk);
1932        struct nlattr *tb[NETCONFA_MAX+1];
1933        struct netconfmsg *ncm;
1934        struct sk_buff *skb;
1935        struct ipv4_devconf *devconf;
1936        struct in_device *in_dev;
1937        struct net_device *dev;
1938        int ifindex;
1939        int err;
1940
1941        err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1942                          devconf_ipv4_policy, extack);
1943        if (err < 0)
1944                goto errout;
1945
1946        err = -EINVAL;
1947        if (!tb[NETCONFA_IFINDEX])
1948                goto errout;
1949
1950        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1951        switch (ifindex) {
1952        case NETCONFA_IFINDEX_ALL:
1953                devconf = net->ipv4.devconf_all;
1954                break;
1955        case NETCONFA_IFINDEX_DEFAULT:
1956                devconf = net->ipv4.devconf_dflt;
1957                break;
1958        default:
1959                dev = __dev_get_by_index(net, ifindex);
1960                if (!dev)
1961                        goto errout;
1962                in_dev = __in_dev_get_rtnl(dev);
1963                if (!in_dev)
1964                        goto errout;
1965                devconf = &in_dev->cnf;
1966                break;
1967        }
1968
1969        err = -ENOBUFS;
1970        skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1971        if (!skb)
1972                goto errout;
1973
1974        err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1975                                        NETLINK_CB(in_skb).portid,
1976                                        nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1977                                        NETCONFA_ALL);
1978        if (err < 0) {
1979                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1980                WARN_ON(err == -EMSGSIZE);
1981                kfree_skb(skb);
1982                goto errout;
1983        }
1984        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1985errout:
1986        return err;
1987}
1988
1989static int inet_netconf_dump_devconf(struct sk_buff *skb,
1990                                     struct netlink_callback *cb)
1991{
1992        struct net *net = sock_net(skb->sk);
1993        int h, s_h;
1994        int idx, s_idx;
1995        struct net_device *dev;
1996        struct in_device *in_dev;
1997        struct hlist_head *head;
1998
1999        s_h = cb->args[0];
2000        s_idx = idx = cb->args[1];
2001
2002        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2003                idx = 0;
2004                head = &net->dev_index_head[h];
2005                rcu_read_lock();
2006                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2007                          net->dev_base_seq;
2008                hlist_for_each_entry_rcu(dev, head, index_hlist) {
2009                        if (idx < s_idx)
2010                                goto cont;
2011                        in_dev = __in_dev_get_rcu(dev);
2012                        if (!in_dev)
2013                                goto cont;
2014
2015                        if (inet_netconf_fill_devconf(skb, dev->ifindex,
2016                                                      &in_dev->cnf,
2017                                                      NETLINK_CB(cb->skb).portid,
2018                                                      cb->nlh->nlmsg_seq,
2019                                                      RTM_NEWNETCONF,
2020                                                      NLM_F_MULTI,
2021                                                      NETCONFA_ALL) < 0) {
2022                                rcu_read_unlock();
2023                                goto done;
2024                        }
2025                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2026cont:
2027                        idx++;
2028                }
2029                rcu_read_unlock();
2030        }
2031        if (h == NETDEV_HASHENTRIES) {
2032                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2033                                              net->ipv4.devconf_all,
2034                                              NETLINK_CB(cb->skb).portid,
2035                                              cb->nlh->nlmsg_seq,
2036                                              RTM_NEWNETCONF, NLM_F_MULTI,
2037                                              NETCONFA_ALL) < 0)
2038                        goto done;
2039                else
2040                        h++;
2041        }
2042        if (h == NETDEV_HASHENTRIES + 1) {
2043                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2044                                              net->ipv4.devconf_dflt,
2045                                              NETLINK_CB(cb->skb).portid,
2046                                              cb->nlh->nlmsg_seq,
2047                                              RTM_NEWNETCONF, NLM_F_MULTI,
2048                                              NETCONFA_ALL) < 0)
2049                        goto done;
2050                else
2051                        h++;
2052        }
2053done:
2054        cb->args[0] = h;
2055        cb->args[1] = idx;
2056
2057        return skb->len;
2058}
2059
2060#ifdef CONFIG_SYSCTL
2061
2062static void devinet_copy_dflt_conf(struct net *net, int i)
2063{
2064        struct net_device *dev;
2065
2066        rcu_read_lock();
2067        for_each_netdev_rcu(net, dev) {
2068                struct in_device *in_dev;
2069
2070                in_dev = __in_dev_get_rcu(dev);
2071                if (in_dev && !test_bit(i, in_dev->cnf.state))
2072                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2073        }
2074        rcu_read_unlock();
2075}
2076
2077/* called with RTNL locked */
2078static void inet_forward_change(struct net *net)
2079{
2080        struct net_device *dev;
2081        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2082
2083        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2084        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2085        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2086                                    NETCONFA_FORWARDING,
2087                                    NETCONFA_IFINDEX_ALL,
2088                                    net->ipv4.devconf_all);
2089        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2090                                    NETCONFA_FORWARDING,
2091                                    NETCONFA_IFINDEX_DEFAULT,
2092                                    net->ipv4.devconf_dflt);
2093
2094        for_each_netdev(net, dev) {
2095                struct in_device *in_dev;
2096
2097                if (on)
2098                        dev_disable_lro(dev);
2099
2100                in_dev = __in_dev_get_rtnl(dev);
2101                if (in_dev) {
2102                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2103                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2104                                                    NETCONFA_FORWARDING,
2105                                                    dev->ifindex, &in_dev->cnf);
2106                }
2107        }
2108}
2109
2110static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2111{
2112        if (cnf == net->ipv4.devconf_dflt)
2113                return NETCONFA_IFINDEX_DEFAULT;
2114        else if (cnf == net->ipv4.devconf_all)
2115                return NETCONFA_IFINDEX_ALL;
2116        else {
2117                struct in_device *idev
2118                        = container_of(cnf, struct in_device, cnf);
2119                return idev->dev->ifindex;
2120        }
2121}
2122
2123static int devinet_conf_proc(struct ctl_table *ctl, int write,
2124                             void __user *buffer,
2125                             size_t *lenp, loff_t *ppos)
2126{
2127        int old_value = *(int *)ctl->data;
2128        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2129        int new_value = *(int *)ctl->data;
2130
2131        if (write) {
2132                struct ipv4_devconf *cnf = ctl->extra1;
2133                struct net *net = ctl->extra2;
2134                int i = (int *)ctl->data - cnf->data;
2135                int ifindex;
2136
2137                set_bit(i, cnf->state);
2138
2139                if (cnf == net->ipv4.devconf_dflt)
2140                        devinet_copy_dflt_conf(net, i);
2141                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2142                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2143                        if ((new_value == 0) && (old_value != 0))
2144                                rt_cache_flush(net);
2145
2146                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2147                    new_value != old_value) {
2148                        ifindex = devinet_conf_ifindex(net, cnf);
2149                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2150                                                    NETCONFA_RP_FILTER,
2151                                                    ifindex, cnf);
2152                }
2153                if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2154                    new_value != old_value) {
2155                        ifindex = devinet_conf_ifindex(net, cnf);
2156                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2157                                                    NETCONFA_PROXY_NEIGH,
2158                                                    ifindex, cnf);
2159                }
2160                if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2161                    new_value != old_value) {
2162                        ifindex = devinet_conf_ifindex(net, cnf);
2163                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2164                                                    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2165                                                    ifindex, cnf);
2166                }
2167        }
2168
2169        return ret;
2170}
2171
2172static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2173                                  void __user *buffer,
2174                                  size_t *lenp, loff_t *ppos)
2175{
2176        int *valp = ctl->data;
2177        int val = *valp;
2178        loff_t pos = *ppos;
2179        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2180
2181        if (write && *valp != val) {
2182                struct net *net = ctl->extra2;
2183
2184                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2185                        if (!rtnl_trylock()) {
2186                                /* Restore the original values before restarting */
2187                                *valp = val;
2188                                *ppos = pos;
2189                                return restart_syscall();
2190                        }
2191                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2192                                inet_forward_change(net);
2193                        } else {
2194                                struct ipv4_devconf *cnf = ctl->extra1;
2195                                struct in_device *idev =
2196                                        container_of(cnf, struct in_device, cnf);
2197                                if (*valp)
2198                                        dev_disable_lro(idev->dev);
2199                                inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2200                                                            NETCONFA_FORWARDING,
2201                                                            idev->dev->ifindex,
2202                                                            cnf);
2203                        }
2204                        rtnl_unlock();
2205                        rt_cache_flush(net);
2206                } else
2207                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2208                                                    NETCONFA_FORWARDING,
2209                                                    NETCONFA_IFINDEX_DEFAULT,
2210                                                    net->ipv4.devconf_dflt);
2211        }
2212
2213        return ret;
2214}
2215
2216static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2217                                void __user *buffer,
2218                                size_t *lenp, loff_t *ppos)
2219{
2220        int *valp = ctl->data;
2221        int val = *valp;
2222        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2223        struct net *net = ctl->extra2;
2224
2225        if (write && *valp != val)
2226                rt_cache_flush(net);
2227
2228        return ret;
2229}
2230
2231#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2232        { \
2233                .procname       = name, \
2234                .data           = ipv4_devconf.data + \
2235                                  IPV4_DEVCONF_ ## attr - 1, \
2236                .maxlen         = sizeof(int), \
2237                .mode           = mval, \
2238                .proc_handler   = proc, \
2239                .extra1         = &ipv4_devconf, \
2240        }
2241
2242#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2243        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2244
2245#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2246        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2247
2248#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2249        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2250
2251#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2252        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2253
2254static struct devinet_sysctl_table {
2255        struct ctl_table_header *sysctl_header;
2256        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2257} devinet_sysctl = {
2258        .devinet_vars = {
2259                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2260                                             devinet_sysctl_forward),
2261                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2262
2263                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2264                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2265                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2266                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2267                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2268                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2269                                        "accept_source_route"),
2270                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2271                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2272                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2273                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2274                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2275                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2276                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2277                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2278                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2279                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2280                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2281                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2282                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2283                DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2284                                        "force_igmp_version"),
2285                DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2286                                        "igmpv2_unsolicited_report_interval"),
2287                DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2288                                        "igmpv3_unsolicited_report_interval"),
2289                DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2290                                        "ignore_routes_with_linkdown"),
2291                DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2292                                        "drop_gratuitous_arp"),
2293
2294                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2295                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2296                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2297                                              "promote_secondaries"),
2298                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2299                                              "route_localnet"),
2300                DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2301                                              "drop_unicast_in_l2_multicast"),
2302        },
2303};
2304
2305static int __devinet_sysctl_register(struct net *net, char *dev_name,
2306                                     int ifindex, struct ipv4_devconf *p)
2307{
2308        int i;
2309        struct devinet_sysctl_table *t;
2310        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2311
2312        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2313        if (!t)
2314                goto out;
2315
2316        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2317                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2318                t->devinet_vars[i].extra1 = p;
2319                t->devinet_vars[i].extra2 = net;
2320        }
2321
2322        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2323
2324        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2325        if (!t->sysctl_header)
2326                goto free;
2327
2328        p->sysctl = t;
2329
2330        inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2331                                    ifindex, p);
2332        return 0;
2333
2334free:
2335        kfree(t);
2336out:
2337        return -ENOBUFS;
2338}
2339
2340static void __devinet_sysctl_unregister(struct net *net,
2341                                        struct ipv4_devconf *cnf, int ifindex)
2342{
2343        struct devinet_sysctl_table *t = cnf->sysctl;
2344
2345        if (t) {
2346                cnf->sysctl = NULL;
2347                unregister_net_sysctl_table(t->sysctl_header);
2348                kfree(t);
2349        }
2350
2351        inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2352}
2353
2354static int devinet_sysctl_register(struct in_device *idev)
2355{
2356        int err;
2357
2358        if (!sysctl_dev_name_is_allowed(idev->dev->name))
2359                return -EINVAL;
2360
2361        err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2362        if (err)
2363                return err;
2364        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2365                                        idev->dev->ifindex, &idev->cnf);
2366        if (err)
2367                neigh_sysctl_unregister(idev->arp_parms);
2368        return err;
2369}
2370
2371static void devinet_sysctl_unregister(struct in_device *idev)
2372{
2373        struct net *net = dev_net(idev->dev);
2374
2375        __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2376        neigh_sysctl_unregister(idev->arp_parms);
2377}
2378
2379static struct ctl_table ctl_forward_entry[] = {
2380        {
2381                .procname       = "ip_forward",
2382                .data           = &ipv4_devconf.data[
2383                                        IPV4_DEVCONF_FORWARDING - 1],
2384                .maxlen         = sizeof(int),
2385                .mode           = 0644,
2386                .proc_handler   = devinet_sysctl_forward,
2387                .extra1         = &ipv4_devconf,
2388                .extra2         = &init_net,
2389        },
2390        { },
2391};
2392#endif
2393
2394static __net_init int devinet_init_net(struct net *net)
2395{
2396        int err;
2397        struct ipv4_devconf *all, *dflt;
2398#ifdef CONFIG_SYSCTL
2399        struct ctl_table *tbl = ctl_forward_entry;
2400        struct ctl_table_header *forw_hdr;
2401#endif
2402
2403        err = -ENOMEM;
2404        all = &ipv4_devconf;
2405        dflt = &ipv4_devconf_dflt;
2406
2407        if (!net_eq(net, &init_net)) {
2408                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2409                if (!all)
2410                        goto err_alloc_all;
2411
2412                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2413                if (!dflt)
2414                        goto err_alloc_dflt;
2415
2416#ifdef CONFIG_SYSCTL
2417                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2418                if (!tbl)
2419                        goto err_alloc_ctl;
2420
2421                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2422                tbl[0].extra1 = all;
2423                tbl[0].extra2 = net;
2424#endif
2425        }
2426
2427#ifdef CONFIG_SYSCTL
2428        err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2429        if (err < 0)
2430                goto err_reg_all;
2431
2432        err = __devinet_sysctl_register(net, "default",
2433                                        NETCONFA_IFINDEX_DEFAULT, dflt);
2434        if (err < 0)
2435                goto err_reg_dflt;
2436
2437        err = -ENOMEM;
2438        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2439        if (!forw_hdr)
2440                goto err_reg_ctl;
2441        net->ipv4.forw_hdr = forw_hdr;
2442#endif
2443
2444        net->ipv4.devconf_all = all;
2445        net->ipv4.devconf_dflt = dflt;
2446        return 0;
2447
2448#ifdef CONFIG_SYSCTL
2449err_reg_ctl:
2450        __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2451err_reg_dflt:
2452        __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2453err_reg_all:
2454        if (tbl != ctl_forward_entry)
2455                kfree(tbl);
2456err_alloc_ctl:
2457#endif
2458        if (dflt != &ipv4_devconf_dflt)
2459                kfree(dflt);
2460err_alloc_dflt:
2461        if (all != &ipv4_devconf)
2462                kfree(all);
2463err_alloc_all:
2464        return err;
2465}
2466
2467static __net_exit void devinet_exit_net(struct net *net)
2468{
2469#ifdef CONFIG_SYSCTL
2470        struct ctl_table *tbl;
2471
2472        tbl = net->ipv4.forw_hdr->ctl_table_arg;
2473        unregister_net_sysctl_table(net->ipv4.forw_hdr);
2474        __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2475                                    NETCONFA_IFINDEX_DEFAULT);
2476        __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2477                                    NETCONFA_IFINDEX_ALL);
2478        kfree(tbl);
2479#endif
2480        kfree(net->ipv4.devconf_dflt);
2481        kfree(net->ipv4.devconf_all);
2482}
2483
2484static __net_initdata struct pernet_operations devinet_ops = {
2485        .init = devinet_init_net,
2486        .exit = devinet_exit_net,
2487};
2488
2489static struct rtnl_af_ops inet_af_ops __read_mostly = {
2490        .family           = AF_INET,
2491        .fill_link_af     = inet_fill_link_af,
2492        .get_link_af_size = inet_get_link_af_size,
2493        .validate_link_af = inet_validate_link_af,
2494        .set_link_af      = inet_set_link_af,
2495};
2496
2497void __init devinet_init(void)
2498{
2499        int i;
2500
2501        BUILD_BUG_ON(__IPV4_DEVCONF_MAX > RH_KABI_IPV4_DEVCONF_STORAGE);
2502
2503        for (i = 0; i < IN4_ADDR_HSIZE; i++)
2504                INIT_HLIST_HEAD(&inet_addr_lst[i]);
2505
2506        register_pernet_subsys(&devinet_ops);
2507
2508        register_gifconf(PF_INET, inet_gifconf);
2509        register_netdevice_notifier(&ip_netdev_notifier);
2510
2511        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2512
2513        rtnl_af_register(&inet_af_ops);
2514
2515        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2516        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2517        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2518        rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2519                      inet_netconf_dump_devconf, 0);
2520}
2521