linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <linux/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/sched/signal.h>
  36#include <linux/string.h>
  37#include <linux/mm.h>
  38#include <linux/socket.h>
  39#include <linux/sockios.h>
  40#include <linux/in.h>
  41#include <linux/errno.h>
  42#include <linux/interrupt.h>
  43#include <linux/if_addr.h>
  44#include <linux/if_ether.h>
  45#include <linux/inet.h>
  46#include <linux/netdevice.h>
  47#include <linux/etherdevice.h>
  48#include <linux/skbuff.h>
  49#include <linux/init.h>
  50#include <linux/notifier.h>
  51#include <linux/inetdevice.h>
  52#include <linux/igmp.h>
  53#include <linux/slab.h>
  54#include <linux/hash.h>
  55#ifdef CONFIG_SYSCTL
  56#include <linux/sysctl.h>
  57#endif
  58#include <linux/kmod.h>
  59#include <linux/netconf.h>
  60
  61#include <net/arp.h>
  62#include <net/ip.h>
  63#include <net/route.h>
  64#include <net/ip_fib.h>
  65#include <net/rtnetlink.h>
  66#include <net/net_namespace.h>
  67#include <net/addrconf.h>
  68
  69static struct ipv4_devconf ipv4_devconf = {
  70        .data = {
  71                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  72                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  74                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  75                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  76                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  77        },
  78};
  79
  80static struct ipv4_devconf ipv4_devconf_dflt = {
  81        .data = {
  82                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  83                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  84                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  85                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  86                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  87                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  88                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  89        },
  90};
  91
  92#define IPV4_DEVCONF_DFLT(net, attr) \
  93        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  94
  95static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  96        [IFA_LOCAL]             = { .type = NLA_U32 },
  97        [IFA_ADDRESS]           = { .type = NLA_U32 },
  98        [IFA_BROADCAST]         = { .type = NLA_U32 },
  99        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 100        [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
 101        [IFA_FLAGS]             = { .type = NLA_U32 },
 102        [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
 103        [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
 104};
 105
 106struct inet_fill_args {
 107        u32 portid;
 108        u32 seq;
 109        int event;
 110        unsigned int flags;
 111        int netnsid;
 112        int ifindex;
 113};
 114
 115#define IN4_ADDR_HSIZE_SHIFT    8
 116#define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
 117
 118static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 119
 120static u32 inet_addr_hash(const struct net *net, __be32 addr)
 121{
 122        u32 val = (__force u32) addr ^ net_hash_mix(net);
 123
 124        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 125}
 126
 127static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 128{
 129        u32 hash = inet_addr_hash(net, ifa->ifa_local);
 130
 131        ASSERT_RTNL();
 132        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 133}
 134
 135static void inet_hash_remove(struct in_ifaddr *ifa)
 136{
 137        ASSERT_RTNL();
 138        hlist_del_init_rcu(&ifa->hash);
 139}
 140
 141/**
 142 * __ip_dev_find - find the first device with a given source address.
 143 * @net: the net namespace
 144 * @addr: the source address
 145 * @devref: if true, take a reference on the found device
 146 *
 147 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 148 */
 149struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 150{
 151        struct net_device *result = NULL;
 152        struct in_ifaddr *ifa;
 153
 154        rcu_read_lock();
 155        ifa = inet_lookup_ifaddr_rcu(net, addr);
 156        if (!ifa) {
 157                struct flowi4 fl4 = { .daddr = addr };
 158                struct fib_result res = { 0 };
 159                struct fib_table *local;
 160
 161                /* Fallback to FIB local table so that communication
 162                 * over loopback subnets work.
 163                 */
 164                local = fib_get_table(net, RT_TABLE_LOCAL);
 165                if (local &&
 166                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 167                    res.type == RTN_LOCAL)
 168                        result = FIB_RES_DEV(res);
 169        } else {
 170                result = ifa->ifa_dev->dev;
 171        }
 172        if (result && devref)
 173                dev_hold(result);
 174        rcu_read_unlock();
 175        return result;
 176}
 177EXPORT_SYMBOL(__ip_dev_find);
 178
 179/* called under RCU lock */
 180struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
 181{
 182        u32 hash = inet_addr_hash(net, addr);
 183        struct in_ifaddr *ifa;
 184
 185        hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
 186                if (ifa->ifa_local == addr &&
 187                    net_eq(dev_net(ifa->ifa_dev->dev), net))
 188                        return ifa;
 189
 190        return NULL;
 191}
 192
 193static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 194
 195static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 196static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
 197static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 198                         int destroy);
 199#ifdef CONFIG_SYSCTL
 200static int devinet_sysctl_register(struct in_device *idev);
 201static void devinet_sysctl_unregister(struct in_device *idev);
 202#else
 203static int devinet_sysctl_register(struct in_device *idev)
 204{
 205        return 0;
 206}
 207static void devinet_sysctl_unregister(struct in_device *idev)
 208{
 209}
 210#endif
 211
 212/* Locks all the inet devices. */
 213
 214static struct in_ifaddr *inet_alloc_ifa(void)
 215{
 216        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 217}
 218
 219static void inet_rcu_free_ifa(struct rcu_head *head)
 220{
 221        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 222        if (ifa->ifa_dev)
 223                in_dev_put(ifa->ifa_dev);
 224        kfree(ifa);
 225}
 226
 227static void inet_free_ifa(struct in_ifaddr *ifa)
 228{
 229        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 230}
 231
 232void in_dev_finish_destroy(struct in_device *idev)
 233{
 234        struct net_device *dev = idev->dev;
 235
 236        WARN_ON(idev->ifa_list);
 237        WARN_ON(idev->mc_list);
 238        kfree(rcu_dereference_protected(idev->mc_hash, 1));
 239#ifdef NET_REFCNT_DEBUG
 240        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 241#endif
 242        dev_put(dev);
 243        if (!idev->dead)
 244                pr_err("Freeing alive in_device %p\n", idev);
 245        else
 246                kfree(idev);
 247}
 248EXPORT_SYMBOL(in_dev_finish_destroy);
 249
 250static struct in_device *inetdev_init(struct net_device *dev)
 251{
 252        struct in_device *in_dev;
 253        int err = -ENOMEM;
 254
 255        ASSERT_RTNL();
 256
 257        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 258        if (!in_dev)
 259                goto out;
 260        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 261                        sizeof(in_dev->cnf));
 262        in_dev->cnf.sysctl = NULL;
 263        in_dev->dev = dev;
 264        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 265        if (!in_dev->arp_parms)
 266                goto out_kfree;
 267        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 268                dev_disable_lro(dev);
 269        /* Reference in_dev->dev */
 270        dev_hold(dev);
 271        /* Account for reference dev->ip_ptr (below) */
 272        refcount_set(&in_dev->refcnt, 1);
 273
 274        err = devinet_sysctl_register(in_dev);
 275        if (err) {
 276                in_dev->dead = 1;
 277                in_dev_put(in_dev);
 278                in_dev = NULL;
 279                goto out;
 280        }
 281        ip_mc_init_dev(in_dev);
 282        if (dev->flags & IFF_UP)
 283                ip_mc_up(in_dev);
 284
 285        /* we can receive as soon as ip_ptr is set -- do this last */
 286        rcu_assign_pointer(dev->ip_ptr, in_dev);
 287out:
 288        return in_dev ?: ERR_PTR(err);
 289out_kfree:
 290        kfree(in_dev);
 291        in_dev = NULL;
 292        goto out;
 293}
 294
 295static void in_dev_rcu_put(struct rcu_head *head)
 296{
 297        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 298        in_dev_put(idev);
 299}
 300
 301static void inetdev_destroy(struct in_device *in_dev)
 302{
 303        struct in_ifaddr *ifa;
 304        struct net_device *dev;
 305
 306        ASSERT_RTNL();
 307
 308        dev = in_dev->dev;
 309
 310        in_dev->dead = 1;
 311
 312        ip_mc_destroy_dev(in_dev);
 313
 314        while ((ifa = in_dev->ifa_list) != NULL) {
 315                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 316                inet_free_ifa(ifa);
 317        }
 318
 319        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 320
 321        devinet_sysctl_unregister(in_dev);
 322        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 323        arp_ifdown(dev);
 324
 325        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 326}
 327
 328int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 329{
 330        rcu_read_lock();
 331        for_primary_ifa(in_dev) {
 332                if (inet_ifa_match(a, ifa)) {
 333                        if (!b || inet_ifa_match(b, ifa)) {
 334                                rcu_read_unlock();
 335                                return 1;
 336                        }
 337                }
 338        } endfor_ifa(in_dev);
 339        rcu_read_unlock();
 340        return 0;
 341}
 342
 343static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 344                         int destroy, struct nlmsghdr *nlh, u32 portid)
 345{
 346        struct in_ifaddr *promote = NULL;
 347        struct in_ifaddr *ifa, *ifa1 = *ifap;
 348        struct in_ifaddr *last_prim = in_dev->ifa_list;
 349        struct in_ifaddr *prev_prom = NULL;
 350        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 351
 352        ASSERT_RTNL();
 353
 354        if (in_dev->dead)
 355                goto no_promotions;
 356
 357        /* 1. Deleting primary ifaddr forces deletion all secondaries
 358         * unless alias promotion is set
 359         **/
 360
 361        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 362                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 363
 364                while ((ifa = *ifap1) != NULL) {
 365                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 366                            ifa1->ifa_scope <= ifa->ifa_scope)
 367                                last_prim = ifa;
 368
 369                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 370                            ifa1->ifa_mask != ifa->ifa_mask ||
 371                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 372                                ifap1 = &ifa->ifa_next;
 373                                prev_prom = ifa;
 374                                continue;
 375                        }
 376
 377                        if (!do_promote) {
 378                                inet_hash_remove(ifa);
 379                                *ifap1 = ifa->ifa_next;
 380
 381                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 382                                blocking_notifier_call_chain(&inetaddr_chain,
 383                                                NETDEV_DOWN, ifa);
 384                                inet_free_ifa(ifa);
 385                        } else {
 386                                promote = ifa;
 387                                break;
 388                        }
 389                }
 390        }
 391
 392        /* On promotion all secondaries from subnet are changing
 393         * the primary IP, we must remove all their routes silently
 394         * and later to add them back with new prefsrc. Do this
 395         * while all addresses are on the device list.
 396         */
 397        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 398                if (ifa1->ifa_mask == ifa->ifa_mask &&
 399                    inet_ifa_match(ifa1->ifa_address, ifa))
 400                        fib_del_ifaddr(ifa, ifa1);
 401        }
 402
 403no_promotions:
 404        /* 2. Unlink it */
 405
 406        *ifap = ifa1->ifa_next;
 407        inet_hash_remove(ifa1);
 408
 409        /* 3. Announce address deletion */
 410
 411        /* Send message first, then call notifier.
 412           At first sight, FIB update triggered by notifier
 413           will refer to already deleted ifaddr, that could confuse
 414           netlink listeners. It is not true: look, gated sees
 415           that route deleted and if it still thinks that ifaddr
 416           is valid, it will try to restore deleted routes... Grr.
 417           So that, this order is correct.
 418         */
 419        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 420        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 421
 422        if (promote) {
 423                struct in_ifaddr *next_sec = promote->ifa_next;
 424
 425                if (prev_prom) {
 426                        prev_prom->ifa_next = promote->ifa_next;
 427                        promote->ifa_next = last_prim->ifa_next;
 428                        last_prim->ifa_next = promote;
 429                }
 430
 431                promote->ifa_flags &= ~IFA_F_SECONDARY;
 432                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 433                blocking_notifier_call_chain(&inetaddr_chain,
 434                                NETDEV_UP, promote);
 435                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 436                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 437                            !inet_ifa_match(ifa1->ifa_address, ifa))
 438                                        continue;
 439                        fib_add_ifaddr(ifa);
 440                }
 441
 442        }
 443        if (destroy)
 444                inet_free_ifa(ifa1);
 445}
 446
 447static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 448                         int destroy)
 449{
 450        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 451}
 452
 453static void check_lifetime(struct work_struct *work);
 454
 455static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 456
 457static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 458                             u32 portid, struct netlink_ext_ack *extack)
 459{
 460        struct in_device *in_dev = ifa->ifa_dev;
 461        struct in_ifaddr *ifa1, **ifap, **last_primary;
 462        struct in_validator_info ivi;
 463        int ret;
 464
 465        ASSERT_RTNL();
 466
 467        if (!ifa->ifa_local) {
 468                inet_free_ifa(ifa);
 469                return 0;
 470        }
 471
 472        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 473        last_primary = &in_dev->ifa_list;
 474
 475        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 476             ifap = &ifa1->ifa_next) {
 477                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 478                    ifa->ifa_scope <= ifa1->ifa_scope)
 479                        last_primary = &ifa1->ifa_next;
 480                if (ifa1->ifa_mask == ifa->ifa_mask &&
 481                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 482                        if (ifa1->ifa_local == ifa->ifa_local) {
 483                                inet_free_ifa(ifa);
 484                                return -EEXIST;
 485                        }
 486                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 487                                inet_free_ifa(ifa);
 488                                return -EINVAL;
 489                        }
 490                        ifa->ifa_flags |= IFA_F_SECONDARY;
 491                }
 492        }
 493
 494        /* Allow any devices that wish to register ifaddr validtors to weigh
 495         * in now, before changes are committed.  The rntl lock is serializing
 496         * access here, so the state should not change between a validator call
 497         * and a final notify on commit.  This isn't invoked on promotion under
 498         * the assumption that validators are checking the address itself, and
 499         * not the flags.
 500         */
 501        ivi.ivi_addr = ifa->ifa_address;
 502        ivi.ivi_dev = ifa->ifa_dev;
 503        ivi.extack = extack;
 504        ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
 505                                           NETDEV_UP, &ivi);
 506        ret = notifier_to_errno(ret);
 507        if (ret) {
 508                inet_free_ifa(ifa);
 509                return ret;
 510        }
 511
 512        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 513                prandom_seed((__force u32) ifa->ifa_local);
 514                ifap = last_primary;
 515        }
 516
 517        ifa->ifa_next = *ifap;
 518        *ifap = ifa;
 519
 520        inet_hash_insert(dev_net(in_dev->dev), ifa);
 521
 522        cancel_delayed_work(&check_lifetime_work);
 523        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
 524
 525        /* Send message first, then call notifier.
 526           Notifier will trigger FIB update, so that
 527           listeners of netlink will know about new ifaddr */
 528        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 529        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 530
 531        return 0;
 532}
 533
 534static int inet_insert_ifa(struct in_ifaddr *ifa)
 535{
 536        return __inet_insert_ifa(ifa, NULL, 0, NULL);
 537}
 538
 539static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 540{
 541        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 542
 543        ASSERT_RTNL();
 544
 545        if (!in_dev) {
 546                inet_free_ifa(ifa);
 547                return -ENOBUFS;
 548        }
 549        ipv4_devconf_setall(in_dev);
 550        neigh_parms_data_state_setall(in_dev->arp_parms);
 551        if (ifa->ifa_dev != in_dev) {
 552                WARN_ON(ifa->ifa_dev);
 553                in_dev_hold(in_dev);
 554                ifa->ifa_dev = in_dev;
 555        }
 556        if (ipv4_is_loopback(ifa->ifa_local))
 557                ifa->ifa_scope = RT_SCOPE_HOST;
 558        return inet_insert_ifa(ifa);
 559}
 560
 561/* Caller must hold RCU or RTNL :
 562 * We dont take a reference on found in_device
 563 */
 564struct in_device *inetdev_by_index(struct net *net, int ifindex)
 565{
 566        struct net_device *dev;
 567        struct in_device *in_dev = NULL;
 568
 569        rcu_read_lock();
 570        dev = dev_get_by_index_rcu(net, ifindex);
 571        if (dev)
 572                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 573        rcu_read_unlock();
 574        return in_dev;
 575}
 576EXPORT_SYMBOL(inetdev_by_index);
 577
 578/* Called only from RTNL semaphored context. No locks. */
 579
 580struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 581                                    __be32 mask)
 582{
 583        ASSERT_RTNL();
 584
 585        for_primary_ifa(in_dev) {
 586                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 587                        return ifa;
 588        } endfor_ifa(in_dev);
 589        return NULL;
 590}
 591
 592static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
 593{
 594        struct ip_mreqn mreq = {
 595                .imr_multiaddr.s_addr = ifa->ifa_address,
 596                .imr_ifindex = ifa->ifa_dev->dev->ifindex,
 597        };
 598        int ret;
 599
 600        ASSERT_RTNL();
 601
 602        lock_sock(sk);
 603        if (join)
 604                ret = ip_mc_join_group(sk, &mreq);
 605        else
 606                ret = ip_mc_leave_group(sk, &mreq);
 607        release_sock(sk);
 608
 609        return ret;
 610}
 611
 612static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 613                            struct netlink_ext_ack *extack)
 614{
 615        struct net *net = sock_net(skb->sk);
 616        struct nlattr *tb[IFA_MAX+1];
 617        struct in_device *in_dev;
 618        struct ifaddrmsg *ifm;
 619        struct in_ifaddr *ifa, **ifap;
 620        int err = -EINVAL;
 621
 622        ASSERT_RTNL();
 623
 624        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
 625                          extack);
 626        if (err < 0)
 627                goto errout;
 628
 629        ifm = nlmsg_data(nlh);
 630        in_dev = inetdev_by_index(net, ifm->ifa_index);
 631        if (!in_dev) {
 632                err = -ENODEV;
 633                goto errout;
 634        }
 635
 636        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 637             ifap = &ifa->ifa_next) {
 638                if (tb[IFA_LOCAL] &&
 639                    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
 640                        continue;
 641
 642                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 643                        continue;
 644
 645                if (tb[IFA_ADDRESS] &&
 646                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 647                    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
 648                        continue;
 649
 650                if (ipv4_is_multicast(ifa->ifa_address))
 651                        ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 652                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 653                return 0;
 654        }
 655
 656        err = -EADDRNOTAVAIL;
 657errout:
 658        return err;
 659}
 660
 661#define INFINITY_LIFE_TIME      0xFFFFFFFF
 662
 663static void check_lifetime(struct work_struct *work)
 664{
 665        unsigned long now, next, next_sec, next_sched;
 666        struct in_ifaddr *ifa;
 667        struct hlist_node *n;
 668        int i;
 669
 670        now = jiffies;
 671        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 672
 673        for (i = 0; i < IN4_ADDR_HSIZE; i++) {
 674                bool change_needed = false;
 675
 676                rcu_read_lock();
 677                hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 678                        unsigned long age;
 679
 680                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 681                                continue;
 682
 683                        /* We try to batch several events at once. */
 684                        age = (now - ifa->ifa_tstamp +
 685                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 686
 687                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 688                            age >= ifa->ifa_valid_lft) {
 689                                change_needed = true;
 690                        } else if (ifa->ifa_preferred_lft ==
 691                                   INFINITY_LIFE_TIME) {
 692                                continue;
 693                        } else if (age >= ifa->ifa_preferred_lft) {
 694                                if (time_before(ifa->ifa_tstamp +
 695                                                ifa->ifa_valid_lft * HZ, next))
 696                                        next = ifa->ifa_tstamp +
 697                                               ifa->ifa_valid_lft * HZ;
 698
 699                                if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
 700                                        change_needed = true;
 701                        } else if (time_before(ifa->ifa_tstamp +
 702                                               ifa->ifa_preferred_lft * HZ,
 703                                               next)) {
 704                                next = ifa->ifa_tstamp +
 705                                       ifa->ifa_preferred_lft * HZ;
 706                        }
 707                }
 708                rcu_read_unlock();
 709                if (!change_needed)
 710                        continue;
 711                rtnl_lock();
 712                hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
 713                        unsigned long age;
 714
 715                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 716                                continue;
 717
 718                        /* We try to batch several events at once. */
 719                        age = (now - ifa->ifa_tstamp +
 720                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 721
 722                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 723                            age >= ifa->ifa_valid_lft) {
 724                                struct in_ifaddr **ifap;
 725
 726                                for (ifap = &ifa->ifa_dev->ifa_list;
 727                                     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
 728                                        if (*ifap == ifa) {
 729                                                inet_del_ifa(ifa->ifa_dev,
 730                                                             ifap, 1);
 731                                                break;
 732                                        }
 733                                }
 734                        } else if (ifa->ifa_preferred_lft !=
 735                                   INFINITY_LIFE_TIME &&
 736                                   age >= ifa->ifa_preferred_lft &&
 737                                   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
 738                                ifa->ifa_flags |= IFA_F_DEPRECATED;
 739                                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 740                        }
 741                }
 742                rtnl_unlock();
 743        }
 744
 745        next_sec = round_jiffies_up(next);
 746        next_sched = next;
 747
 748        /* If rounded timeout is accurate enough, accept it. */
 749        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
 750                next_sched = next_sec;
 751
 752        now = jiffies;
 753        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
 754        if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
 755                next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
 756
 757        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
 758                        next_sched - now);
 759}
 760
 761static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
 762                             __u32 prefered_lft)
 763{
 764        unsigned long timeout;
 765
 766        ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
 767
 768        timeout = addrconf_timeout_fixup(valid_lft, HZ);
 769        if (addrconf_finite_timeout(timeout))
 770                ifa->ifa_valid_lft = timeout;
 771        else
 772                ifa->ifa_flags |= IFA_F_PERMANENT;
 773
 774        timeout = addrconf_timeout_fixup(prefered_lft, HZ);
 775        if (addrconf_finite_timeout(timeout)) {
 776                if (timeout == 0)
 777                        ifa->ifa_flags |= IFA_F_DEPRECATED;
 778                ifa->ifa_preferred_lft = timeout;
 779        }
 780        ifa->ifa_tstamp = jiffies;
 781        if (!ifa->ifa_cstamp)
 782                ifa->ifa_cstamp = ifa->ifa_tstamp;
 783}
 784
 785static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 786                                       __u32 *pvalid_lft, __u32 *pprefered_lft,
 787                                       struct netlink_ext_ack *extack)
 788{
 789        struct nlattr *tb[IFA_MAX+1];
 790        struct in_ifaddr *ifa;
 791        struct ifaddrmsg *ifm;
 792        struct net_device *dev;
 793        struct in_device *in_dev;
 794        int err;
 795
 796        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
 797                          extack);
 798        if (err < 0)
 799                goto errout;
 800
 801        ifm = nlmsg_data(nlh);
 802        err = -EINVAL;
 803        if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
 804                goto errout;
 805
 806        dev = __dev_get_by_index(net, ifm->ifa_index);
 807        err = -ENODEV;
 808        if (!dev)
 809                goto errout;
 810
 811        in_dev = __in_dev_get_rtnl(dev);
 812        err = -ENOBUFS;
 813        if (!in_dev)
 814                goto errout;
 815
 816        ifa = inet_alloc_ifa();
 817        if (!ifa)
 818                /*
 819                 * A potential indev allocation can be left alive, it stays
 820                 * assigned to its device and is destroy with it.
 821                 */
 822                goto errout;
 823
 824        ipv4_devconf_setall(in_dev);
 825        neigh_parms_data_state_setall(in_dev->arp_parms);
 826        in_dev_hold(in_dev);
 827
 828        if (!tb[IFA_ADDRESS])
 829                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 830
 831        INIT_HLIST_NODE(&ifa->hash);
 832        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 833        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 834        ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
 835                                         ifm->ifa_flags;
 836        ifa->ifa_scope = ifm->ifa_scope;
 837        ifa->ifa_dev = in_dev;
 838
 839        ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
 840        ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
 841
 842        if (tb[IFA_BROADCAST])
 843                ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
 844
 845        if (tb[IFA_LABEL])
 846                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 847        else
 848                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 849
 850        if (tb[IFA_RT_PRIORITY])
 851                ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
 852
 853        if (tb[IFA_CACHEINFO]) {
 854                struct ifa_cacheinfo *ci;
 855
 856                ci = nla_data(tb[IFA_CACHEINFO]);
 857                if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 858                        err = -EINVAL;
 859                        goto errout_free;
 860                }
 861                *pvalid_lft = ci->ifa_valid;
 862                *pprefered_lft = ci->ifa_prefered;
 863        }
 864
 865        return ifa;
 866
 867errout_free:
 868        inet_free_ifa(ifa);
 869errout:
 870        return ERR_PTR(err);
 871}
 872
 873static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 874{
 875        struct in_device *in_dev = ifa->ifa_dev;
 876        struct in_ifaddr *ifa1, **ifap;
 877
 878        if (!ifa->ifa_local)
 879                return NULL;
 880
 881        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 882             ifap = &ifa1->ifa_next) {
 883                if (ifa1->ifa_mask == ifa->ifa_mask &&
 884                    inet_ifa_match(ifa1->ifa_address, ifa) &&
 885                    ifa1->ifa_local == ifa->ifa_local)
 886                        return ifa1;
 887        }
 888        return NULL;
 889}
 890
 891static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 892                            struct netlink_ext_ack *extack)
 893{
 894        struct net *net = sock_net(skb->sk);
 895        struct in_ifaddr *ifa;
 896        struct in_ifaddr *ifa_existing;
 897        __u32 valid_lft = INFINITY_LIFE_TIME;
 898        __u32 prefered_lft = INFINITY_LIFE_TIME;
 899
 900        ASSERT_RTNL();
 901
 902        ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
 903        if (IS_ERR(ifa))
 904                return PTR_ERR(ifa);
 905
 906        ifa_existing = find_matching_ifa(ifa);
 907        if (!ifa_existing) {
 908                /* It would be best to check for !NLM_F_CREATE here but
 909                 * userspace already relies on not having to provide this.
 910                 */
 911                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 912                if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
 913                        int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
 914                                               true, ifa);
 915
 916                        if (ret < 0) {
 917                                inet_free_ifa(ifa);
 918                                return ret;
 919                        }
 920                }
 921                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
 922                                         extack);
 923        } else {
 924                u32 new_metric = ifa->ifa_rt_priority;
 925
 926                inet_free_ifa(ifa);
 927
 928                if (nlh->nlmsg_flags & NLM_F_EXCL ||
 929                    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 930                        return -EEXIST;
 931                ifa = ifa_existing;
 932
 933                if (ifa->ifa_rt_priority != new_metric) {
 934                        fib_modify_prefix_metric(ifa, new_metric);
 935                        ifa->ifa_rt_priority = new_metric;
 936                }
 937
 938                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 939                cancel_delayed_work(&check_lifetime_work);
 940                queue_delayed_work(system_power_efficient_wq,
 941                                &check_lifetime_work, 0);
 942                rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
 943        }
 944        return 0;
 945}
 946
 947/*
 948 *      Determine a default network mask, based on the IP address.
 949 */
 950
 951static int inet_abc_len(__be32 addr)
 952{
 953        int rc = -1;    /* Something else, probably a multicast. */
 954
 955        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 956                rc = 0;
 957        else {
 958                __u32 haddr = ntohl(addr);
 959                if (IN_CLASSA(haddr))
 960                        rc = 8;
 961                else if (IN_CLASSB(haddr))
 962                        rc = 16;
 963                else if (IN_CLASSC(haddr))
 964                        rc = 24;
 965                else if (IN_CLASSE(haddr))
 966                        rc = 32;
 967        }
 968
 969        return rc;
 970}
 971
 972
 973int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
 974{
 975        struct sockaddr_in sin_orig;
 976        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
 977        struct in_device *in_dev;
 978        struct in_ifaddr **ifap = NULL;
 979        struct in_ifaddr *ifa = NULL;
 980        struct net_device *dev;
 981        char *colon;
 982        int ret = -EFAULT;
 983        int tryaddrmatch = 0;
 984
 985        ifr->ifr_name[IFNAMSIZ - 1] = 0;
 986
 987        /* save original address for comparison */
 988        memcpy(&sin_orig, sin, sizeof(*sin));
 989
 990        colon = strchr(ifr->ifr_name, ':');
 991        if (colon)
 992                *colon = 0;
 993
 994        dev_load(net, ifr->ifr_name);
 995
 996        switch (cmd) {
 997        case SIOCGIFADDR:       /* Get interface address */
 998        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 999        case SIOCGIFDSTADDR:    /* Get the destination address */
1000        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1001                /* Note that these ioctls will not sleep,
1002                   so that we do not impose a lock.
1003                   One day we will be forced to put shlock here (I mean SMP)
1004                 */
1005                tryaddrmatch = (sin_orig.sin_family == AF_INET);
1006                memset(sin, 0, sizeof(*sin));
1007                sin->sin_family = AF_INET;
1008                break;
1009
1010        case SIOCSIFFLAGS:
1011                ret = -EPERM;
1012                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1013                        goto out;
1014                break;
1015        case SIOCSIFADDR:       /* Set interface address (and family) */
1016        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1017        case SIOCSIFDSTADDR:    /* Set the destination address */
1018        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1019                ret = -EPERM;
1020                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1021                        goto out;
1022                ret = -EINVAL;
1023                if (sin->sin_family != AF_INET)
1024                        goto out;
1025                break;
1026        default:
1027                ret = -EINVAL;
1028                goto out;
1029        }
1030
1031        rtnl_lock();
1032
1033        ret = -ENODEV;
1034        dev = __dev_get_by_name(net, ifr->ifr_name);
1035        if (!dev)
1036                goto done;
1037
1038        if (colon)
1039                *colon = ':';
1040
1041        in_dev = __in_dev_get_rtnl(dev);
1042        if (in_dev) {
1043                if (tryaddrmatch) {
1044                        /* Matthias Andree */
1045                        /* compare label and address (4.4BSD style) */
1046                        /* note: we only do this for a limited set of ioctls
1047                           and only if the original address family was AF_INET.
1048                           This is checked above. */
1049                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1050                             ifap = &ifa->ifa_next) {
1051                                if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1052                                    sin_orig.sin_addr.s_addr ==
1053                                                        ifa->ifa_local) {
1054                                        break; /* found */
1055                                }
1056                        }
1057                }
1058                /* we didn't get a match, maybe the application is
1059                   4.3BSD-style and passed in junk so we fall back to
1060                   comparing just the label */
1061                if (!ifa) {
1062                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1063                             ifap = &ifa->ifa_next)
1064                                if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1065                                        break;
1066                }
1067        }
1068
1069        ret = -EADDRNOTAVAIL;
1070        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1071                goto done;
1072
1073        switch (cmd) {
1074        case SIOCGIFADDR:       /* Get interface address */
1075                ret = 0;
1076                sin->sin_addr.s_addr = ifa->ifa_local;
1077                break;
1078
1079        case SIOCGIFBRDADDR:    /* Get the broadcast address */
1080                ret = 0;
1081                sin->sin_addr.s_addr = ifa->ifa_broadcast;
1082                break;
1083
1084        case SIOCGIFDSTADDR:    /* Get the destination address */
1085                ret = 0;
1086                sin->sin_addr.s_addr = ifa->ifa_address;
1087                break;
1088
1089        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1090                ret = 0;
1091                sin->sin_addr.s_addr = ifa->ifa_mask;
1092                break;
1093
1094        case SIOCSIFFLAGS:
1095                if (colon) {
1096                        ret = -EADDRNOTAVAIL;
1097                        if (!ifa)
1098                                break;
1099                        ret = 0;
1100                        if (!(ifr->ifr_flags & IFF_UP))
1101                                inet_del_ifa(in_dev, ifap, 1);
1102                        break;
1103                }
1104                ret = dev_change_flags(dev, ifr->ifr_flags);
1105                break;
1106
1107        case SIOCSIFADDR:       /* Set interface address (and family) */
1108                ret = -EINVAL;
1109                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110                        break;
1111
1112                if (!ifa) {
1113                        ret = -ENOBUFS;
1114                        ifa = inet_alloc_ifa();
1115                        if (!ifa)
1116                                break;
1117                        INIT_HLIST_NODE(&ifa->hash);
1118                        if (colon)
1119                                memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1120                        else
1121                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1122                } else {
1123                        ret = 0;
1124                        if (ifa->ifa_local == sin->sin_addr.s_addr)
1125                                break;
1126                        inet_del_ifa(in_dev, ifap, 0);
1127                        ifa->ifa_broadcast = 0;
1128                        ifa->ifa_scope = 0;
1129                }
1130
1131                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1132
1133                if (!(dev->flags & IFF_POINTOPOINT)) {
1134                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1135                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1136                        if ((dev->flags & IFF_BROADCAST) &&
1137                            ifa->ifa_prefixlen < 31)
1138                                ifa->ifa_broadcast = ifa->ifa_address |
1139                                                     ~ifa->ifa_mask;
1140                } else {
1141                        ifa->ifa_prefixlen = 32;
1142                        ifa->ifa_mask = inet_make_mask(32);
1143                }
1144                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1145                ret = inet_set_ifa(dev, ifa);
1146                break;
1147
1148        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1149                ret = 0;
1150                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1151                        inet_del_ifa(in_dev, ifap, 0);
1152                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
1153                        inet_insert_ifa(ifa);
1154                }
1155                break;
1156
1157        case SIOCSIFDSTADDR:    /* Set the destination address */
1158                ret = 0;
1159                if (ifa->ifa_address == sin->sin_addr.s_addr)
1160                        break;
1161                ret = -EINVAL;
1162                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1163                        break;
1164                ret = 0;
1165                inet_del_ifa(in_dev, ifap, 0);
1166                ifa->ifa_address = sin->sin_addr.s_addr;
1167                inet_insert_ifa(ifa);
1168                break;
1169
1170        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1171
1172                /*
1173                 *      The mask we set must be legal.
1174                 */
1175                ret = -EINVAL;
1176                if (bad_mask(sin->sin_addr.s_addr, 0))
1177                        break;
1178                ret = 0;
1179                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1180                        __be32 old_mask = ifa->ifa_mask;
1181                        inet_del_ifa(in_dev, ifap, 0);
1182                        ifa->ifa_mask = sin->sin_addr.s_addr;
1183                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1184
1185                        /* See if current broadcast address matches
1186                         * with current netmask, then recalculate
1187                         * the broadcast address. Otherwise it's a
1188                         * funny address, so don't touch it since
1189                         * the user seems to know what (s)he's doing...
1190                         */
1191                        if ((dev->flags & IFF_BROADCAST) &&
1192                            (ifa->ifa_prefixlen < 31) &&
1193                            (ifa->ifa_broadcast ==
1194                             (ifa->ifa_local|~old_mask))) {
1195                                ifa->ifa_broadcast = (ifa->ifa_local |
1196                                                      ~sin->sin_addr.s_addr);
1197                        }
1198                        inet_insert_ifa(ifa);
1199                }
1200                break;
1201        }
1202done:
1203        rtnl_unlock();
1204out:
1205        return ret;
1206}
1207
1208static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1209{
1210        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1211        struct in_ifaddr *ifa;
1212        struct ifreq ifr;
1213        int done = 0;
1214
1215        if (WARN_ON(size > sizeof(struct ifreq)))
1216                goto out;
1217
1218        if (!in_dev)
1219                goto out;
1220
1221        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1222                if (!buf) {
1223                        done += size;
1224                        continue;
1225                }
1226                if (len < size)
1227                        break;
1228                memset(&ifr, 0, sizeof(struct ifreq));
1229                strcpy(ifr.ifr_name, ifa->ifa_label);
1230
1231                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1232                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1233                                                                ifa->ifa_local;
1234
1235                if (copy_to_user(buf + done, &ifr, size)) {
1236                        done = -EFAULT;
1237                        break;
1238                }
1239                len  -= size;
1240                done += size;
1241        }
1242out:
1243        return done;
1244}
1245
1246static __be32 in_dev_select_addr(const struct in_device *in_dev,
1247                                 int scope)
1248{
1249        for_primary_ifa(in_dev) {
1250                if (ifa->ifa_scope != RT_SCOPE_LINK &&
1251                    ifa->ifa_scope <= scope)
1252                        return ifa->ifa_local;
1253        } endfor_ifa(in_dev);
1254
1255        return 0;
1256}
1257
1258__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1259{
1260        __be32 addr = 0;
1261        struct in_device *in_dev;
1262        struct net *net = dev_net(dev);
1263        int master_idx;
1264
1265        rcu_read_lock();
1266        in_dev = __in_dev_get_rcu(dev);
1267        if (!in_dev)
1268                goto no_in_dev;
1269
1270        for_primary_ifa(in_dev) {
1271                if (ifa->ifa_scope > scope)
1272                        continue;
1273                if (!dst || inet_ifa_match(dst, ifa)) {
1274                        addr = ifa->ifa_local;
1275                        break;
1276                }
1277                if (!addr)
1278                        addr = ifa->ifa_local;
1279        } endfor_ifa(in_dev);
1280
1281        if (addr)
1282                goto out_unlock;
1283no_in_dev:
1284        master_idx = l3mdev_master_ifindex_rcu(dev);
1285
1286        /* For VRFs, the VRF device takes the place of the loopback device,
1287         * with addresses on it being preferred.  Note in such cases the
1288         * loopback device will be among the devices that fail the master_idx
1289         * equality check in the loop below.
1290         */
1291        if (master_idx &&
1292            (dev = dev_get_by_index_rcu(net, master_idx)) &&
1293            (in_dev = __in_dev_get_rcu(dev))) {
1294                addr = in_dev_select_addr(in_dev, scope);
1295                if (addr)
1296                        goto out_unlock;
1297        }
1298
1299        /* Not loopback addresses on loopback should be preferred
1300           in this case. It is important that lo is the first interface
1301           in dev_base list.
1302         */
1303        for_each_netdev_rcu(net, dev) {
1304                if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1305                        continue;
1306
1307                in_dev = __in_dev_get_rcu(dev);
1308                if (!in_dev)
1309                        continue;
1310
1311                addr = in_dev_select_addr(in_dev, scope);
1312                if (addr)
1313                        goto out_unlock;
1314        }
1315out_unlock:
1316        rcu_read_unlock();
1317        return addr;
1318}
1319EXPORT_SYMBOL(inet_select_addr);
1320
1321static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1322                              __be32 local, int scope)
1323{
1324        int same = 0;
1325        __be32 addr = 0;
1326
1327        for_ifa(in_dev) {
1328                if (!addr &&
1329                    (local == ifa->ifa_local || !local) &&
1330                    ifa->ifa_scope <= scope) {
1331                        addr = ifa->ifa_local;
1332                        if (same)
1333                                break;
1334                }
1335                if (!same) {
1336                        same = (!local || inet_ifa_match(local, ifa)) &&
1337                                (!dst || inet_ifa_match(dst, ifa));
1338                        if (same && addr) {
1339                                if (local || !dst)
1340                                        break;
1341                                /* Is the selected addr into dst subnet? */
1342                                if (inet_ifa_match(addr, ifa))
1343                                        break;
1344                                /* No, then can we use new local src? */
1345                                if (ifa->ifa_scope <= scope) {
1346                                        addr = ifa->ifa_local;
1347                                        break;
1348                                }
1349                                /* search for large dst subnet for addr */
1350                                same = 0;
1351                        }
1352                }
1353        } endfor_ifa(in_dev);
1354
1355        return same ? addr : 0;
1356}
1357
1358/*
1359 * Confirm that local IP address exists using wildcards:
1360 * - net: netns to check, cannot be NULL
1361 * - in_dev: only on this interface, NULL=any interface
1362 * - dst: only in the same subnet as dst, 0=any dst
1363 * - local: address, 0=autoselect the local address
1364 * - scope: maximum allowed scope value for the local address
1365 */
1366__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1367                         __be32 dst, __be32 local, int scope)
1368{
1369        __be32 addr = 0;
1370        struct net_device *dev;
1371
1372        if (in_dev)
1373                return confirm_addr_indev(in_dev, dst, local, scope);
1374
1375        rcu_read_lock();
1376        for_each_netdev_rcu(net, dev) {
1377                in_dev = __in_dev_get_rcu(dev);
1378                if (in_dev) {
1379                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1380                        if (addr)
1381                                break;
1382                }
1383        }
1384        rcu_read_unlock();
1385
1386        return addr;
1387}
1388EXPORT_SYMBOL(inet_confirm_addr);
1389
1390/*
1391 *      Device notifier
1392 */
1393
1394int register_inetaddr_notifier(struct notifier_block *nb)
1395{
1396        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1397}
1398EXPORT_SYMBOL(register_inetaddr_notifier);
1399
1400int unregister_inetaddr_notifier(struct notifier_block *nb)
1401{
1402        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1403}
1404EXPORT_SYMBOL(unregister_inetaddr_notifier);
1405
1406int register_inetaddr_validator_notifier(struct notifier_block *nb)
1407{
1408        return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1409}
1410EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1411
1412int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1413{
1414        return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1415            nb);
1416}
1417EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1418
1419/* Rename ifa_labels for a device name change. Make some effort to preserve
1420 * existing alias numbering and to create unique labels if possible.
1421*/
1422static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1423{
1424        struct in_ifaddr *ifa;
1425        int named = 0;
1426
1427        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1428                char old[IFNAMSIZ], *dot;
1429
1430                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1431                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1432                if (named++ == 0)
1433                        goto skip;
1434                dot = strchr(old, ':');
1435                if (!dot) {
1436                        sprintf(old, ":%d", named);
1437                        dot = old;
1438                }
1439                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1440                        strcat(ifa->ifa_label, dot);
1441                else
1442                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1443skip:
1444                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1445        }
1446}
1447
1448static bool inetdev_valid_mtu(unsigned int mtu)
1449{
1450        return mtu >= IPV4_MIN_MTU;
1451}
1452
1453static void inetdev_send_gratuitous_arp(struct net_device *dev,
1454                                        struct in_device *in_dev)
1455
1456{
1457        struct in_ifaddr *ifa;
1458
1459        for (ifa = in_dev->ifa_list; ifa;
1460             ifa = ifa->ifa_next) {
1461                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1462                         ifa->ifa_local, dev,
1463                         ifa->ifa_local, NULL,
1464                         dev->dev_addr, NULL);
1465        }
1466}
1467
1468/* Called only under RTNL semaphore */
1469
1470static int inetdev_event(struct notifier_block *this, unsigned long event,
1471                         void *ptr)
1472{
1473        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1474        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1475
1476        ASSERT_RTNL();
1477
1478        if (!in_dev) {
1479                if (event == NETDEV_REGISTER) {
1480                        in_dev = inetdev_init(dev);
1481                        if (IS_ERR(in_dev))
1482                                return notifier_from_errno(PTR_ERR(in_dev));
1483                        if (dev->flags & IFF_LOOPBACK) {
1484                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1485                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1486                        }
1487                } else if (event == NETDEV_CHANGEMTU) {
1488                        /* Re-enabling IP */
1489                        if (inetdev_valid_mtu(dev->mtu))
1490                                in_dev = inetdev_init(dev);
1491                }
1492                goto out;
1493        }
1494
1495        switch (event) {
1496        case NETDEV_REGISTER:
1497                pr_debug("%s: bug\n", __func__);
1498                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1499                break;
1500        case NETDEV_UP:
1501                if (!inetdev_valid_mtu(dev->mtu))
1502                        break;
1503                if (dev->flags & IFF_LOOPBACK) {
1504                        struct in_ifaddr *ifa = inet_alloc_ifa();
1505
1506                        if (ifa) {
1507                                INIT_HLIST_NODE(&ifa->hash);
1508                                ifa->ifa_local =
1509                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1510                                ifa->ifa_prefixlen = 8;
1511                                ifa->ifa_mask = inet_make_mask(8);
1512                                in_dev_hold(in_dev);
1513                                ifa->ifa_dev = in_dev;
1514                                ifa->ifa_scope = RT_SCOPE_HOST;
1515                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1516                                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1517                                                 INFINITY_LIFE_TIME);
1518                                ipv4_devconf_setall(in_dev);
1519                                neigh_parms_data_state_setall(in_dev->arp_parms);
1520                                inet_insert_ifa(ifa);
1521                        }
1522                }
1523                ip_mc_up(in_dev);
1524                /* fall through */
1525        case NETDEV_CHANGEADDR:
1526                if (!IN_DEV_ARP_NOTIFY(in_dev))
1527                        break;
1528                /* fall through */
1529        case NETDEV_NOTIFY_PEERS:
1530                /* Send gratuitous ARP to notify of link change */
1531                inetdev_send_gratuitous_arp(dev, in_dev);
1532                break;
1533        case NETDEV_DOWN:
1534                ip_mc_down(in_dev);
1535                break;
1536        case NETDEV_PRE_TYPE_CHANGE:
1537                ip_mc_unmap(in_dev);
1538                break;
1539        case NETDEV_POST_TYPE_CHANGE:
1540                ip_mc_remap(in_dev);
1541                break;
1542        case NETDEV_CHANGEMTU:
1543                if (inetdev_valid_mtu(dev->mtu))
1544                        break;
1545                /* disable IP when MTU is not enough */
1546                /* fall through */
1547        case NETDEV_UNREGISTER:
1548                inetdev_destroy(in_dev);
1549                break;
1550        case NETDEV_CHANGENAME:
1551                /* Do not notify about label change, this event is
1552                 * not interesting to applications using netlink.
1553                 */
1554                inetdev_changename(dev, in_dev);
1555
1556                devinet_sysctl_unregister(in_dev);
1557                devinet_sysctl_register(in_dev);
1558                break;
1559        }
1560out:
1561        return NOTIFY_DONE;
1562}
1563
1564static struct notifier_block ip_netdev_notifier = {
1565        .notifier_call = inetdev_event,
1566};
1567
1568static size_t inet_nlmsg_size(void)
1569{
1570        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1571               + nla_total_size(4) /* IFA_ADDRESS */
1572               + nla_total_size(4) /* IFA_LOCAL */
1573               + nla_total_size(4) /* IFA_BROADCAST */
1574               + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1575               + nla_total_size(4)  /* IFA_FLAGS */
1576               + nla_total_size(4)  /* IFA_RT_PRIORITY */
1577               + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1578}
1579
1580static inline u32 cstamp_delta(unsigned long cstamp)
1581{
1582        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1583}
1584
1585static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1586                         unsigned long tstamp, u32 preferred, u32 valid)
1587{
1588        struct ifa_cacheinfo ci;
1589
1590        ci.cstamp = cstamp_delta(cstamp);
1591        ci.tstamp = cstamp_delta(tstamp);
1592        ci.ifa_prefered = preferred;
1593        ci.ifa_valid = valid;
1594
1595        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1596}
1597
1598static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1599                            struct inet_fill_args *args)
1600{
1601        struct ifaddrmsg *ifm;
1602        struct nlmsghdr  *nlh;
1603        u32 preferred, valid;
1604
1605        nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1606                        args->flags);
1607        if (!nlh)
1608                return -EMSGSIZE;
1609
1610        ifm = nlmsg_data(nlh);
1611        ifm->ifa_family = AF_INET;
1612        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1613        ifm->ifa_flags = ifa->ifa_flags;
1614        ifm->ifa_scope = ifa->ifa_scope;
1615        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1616
1617        if (args->netnsid >= 0 &&
1618            nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1619                goto nla_put_failure;
1620
1621        if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1622                preferred = ifa->ifa_preferred_lft;
1623                valid = ifa->ifa_valid_lft;
1624                if (preferred != INFINITY_LIFE_TIME) {
1625                        long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1626
1627                        if (preferred > tval)
1628                                preferred -= tval;
1629                        else
1630                                preferred = 0;
1631                        if (valid != INFINITY_LIFE_TIME) {
1632                                if (valid > tval)
1633                                        valid -= tval;
1634                                else
1635                                        valid = 0;
1636                        }
1637                }
1638        } else {
1639                preferred = INFINITY_LIFE_TIME;
1640                valid = INFINITY_LIFE_TIME;
1641        }
1642        if ((ifa->ifa_address &&
1643             nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1644            (ifa->ifa_local &&
1645             nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1646            (ifa->ifa_broadcast &&
1647             nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1648            (ifa->ifa_label[0] &&
1649             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1650            nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1651            (ifa->ifa_rt_priority &&
1652             nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1653            put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1654                          preferred, valid))
1655                goto nla_put_failure;
1656
1657        nlmsg_end(skb, nlh);
1658        return 0;
1659
1660nla_put_failure:
1661        nlmsg_cancel(skb, nlh);
1662        return -EMSGSIZE;
1663}
1664
1665static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1666                                      struct inet_fill_args *fillargs,
1667                                      struct net **tgt_net, struct sock *sk,
1668                                      struct netlink_callback *cb)
1669{
1670        struct netlink_ext_ack *extack = cb->extack;
1671        struct nlattr *tb[IFA_MAX+1];
1672        struct ifaddrmsg *ifm;
1673        int err, i;
1674
1675        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1676                NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1677                return -EINVAL;
1678        }
1679
1680        ifm = nlmsg_data(nlh);
1681        if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1682                NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1683                return -EINVAL;
1684        }
1685
1686        fillargs->ifindex = ifm->ifa_index;
1687        if (fillargs->ifindex) {
1688                cb->answer_flags |= NLM_F_DUMP_FILTERED;
1689                fillargs->flags |= NLM_F_DUMP_FILTERED;
1690        }
1691
1692        err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1693                                 ifa_ipv4_policy, extack);
1694        if (err < 0)
1695                return err;
1696
1697        for (i = 0; i <= IFA_MAX; ++i) {
1698                if (!tb[i])
1699                        continue;
1700
1701                if (i == IFA_TARGET_NETNSID) {
1702                        struct net *net;
1703
1704                        fillargs->netnsid = nla_get_s32(tb[i]);
1705
1706                        net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1707                        if (IS_ERR(net)) {
1708                                fillargs->netnsid = -1;
1709                                NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1710                                return PTR_ERR(net);
1711                        }
1712                        *tgt_net = net;
1713                } else {
1714                        NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1715                        return -EINVAL;
1716                }
1717        }
1718
1719        return 0;
1720}
1721
1722static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1723                            struct netlink_callback *cb, int s_ip_idx,
1724                            struct inet_fill_args *fillargs)
1725{
1726        struct in_ifaddr *ifa;
1727        int ip_idx = 0;
1728        int err;
1729
1730        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1731                if (ip_idx < s_ip_idx)
1732                        continue;
1733
1734                err = inet_fill_ifaddr(skb, ifa, fillargs);
1735                if (err < 0)
1736                        goto done;
1737
1738                nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1739        }
1740        err = 0;
1741
1742done:
1743        cb->args[2] = ip_idx;
1744
1745        return err;
1746}
1747
1748static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1749{
1750        const struct nlmsghdr *nlh = cb->nlh;
1751        struct inet_fill_args fillargs = {
1752                .portid = NETLINK_CB(cb->skb).portid,
1753                .seq = nlh->nlmsg_seq,
1754                .event = RTM_NEWADDR,
1755                .flags = NLM_F_MULTI,
1756                .netnsid = -1,
1757        };
1758        struct net *net = sock_net(skb->sk);
1759        struct net *tgt_net = net;
1760        int h, s_h;
1761        int idx, s_idx;
1762        int s_ip_idx;
1763        struct net_device *dev;
1764        struct in_device *in_dev;
1765        struct hlist_head *head;
1766        int err = 0;
1767
1768        s_h = cb->args[0];
1769        s_idx = idx = cb->args[1];
1770        s_ip_idx = cb->args[2];
1771
1772        if (cb->strict_check) {
1773                err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1774                                                 skb->sk, cb);
1775                if (err < 0)
1776                        goto put_tgt_net;
1777
1778                err = 0;
1779                if (fillargs.ifindex) {
1780                        dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1781                        if (!dev) {
1782                                err = -ENODEV;
1783                                goto put_tgt_net;
1784                        }
1785
1786                        in_dev = __in_dev_get_rtnl(dev);
1787                        if (in_dev) {
1788                                err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1789                                                       &fillargs);
1790                        }
1791                        goto put_tgt_net;
1792                }
1793        }
1794
1795        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1796                idx = 0;
1797                head = &tgt_net->dev_index_head[h];
1798                rcu_read_lock();
1799                cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1800                          tgt_net->dev_base_seq;
1801                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1802                        if (idx < s_idx)
1803                                goto cont;
1804                        if (h > s_h || idx > s_idx)
1805                                s_ip_idx = 0;
1806                        in_dev = __in_dev_get_rcu(dev);
1807                        if (!in_dev)
1808                                goto cont;
1809
1810                        err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1811                                               &fillargs);
1812                        if (err < 0) {
1813                                rcu_read_unlock();
1814                                goto done;
1815                        }
1816cont:
1817                        idx++;
1818                }
1819                rcu_read_unlock();
1820        }
1821
1822done:
1823        cb->args[0] = h;
1824        cb->args[1] = idx;
1825put_tgt_net:
1826        if (fillargs.netnsid >= 0)
1827                put_net(tgt_net);
1828
1829        return err < 0 ? err : skb->len;
1830}
1831
1832static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1833                      u32 portid)
1834{
1835        struct inet_fill_args fillargs = {
1836                .portid = portid,
1837                .seq = nlh ? nlh->nlmsg_seq : 0,
1838                .event = event,
1839                .flags = 0,
1840                .netnsid = -1,
1841        };
1842        struct sk_buff *skb;
1843        int err = -ENOBUFS;
1844        struct net *net;
1845
1846        net = dev_net(ifa->ifa_dev->dev);
1847        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1848        if (!skb)
1849                goto errout;
1850
1851        err = inet_fill_ifaddr(skb, ifa, &fillargs);
1852        if (err < 0) {
1853                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1854                WARN_ON(err == -EMSGSIZE);
1855                kfree_skb(skb);
1856                goto errout;
1857        }
1858        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1859        return;
1860errout:
1861        if (err < 0)
1862                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1863}
1864
1865static size_t inet_get_link_af_size(const struct net_device *dev,
1866                                    u32 ext_filter_mask)
1867{
1868        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1869
1870        if (!in_dev)
1871                return 0;
1872
1873        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1874}
1875
1876static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1877                             u32 ext_filter_mask)
1878{
1879        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1880        struct nlattr *nla;
1881        int i;
1882
1883        if (!in_dev)
1884                return -ENODATA;
1885
1886        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1887        if (!nla)
1888                return -EMSGSIZE;
1889
1890        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1891                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1892
1893        return 0;
1894}
1895
1896static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1897        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1898};
1899
1900static int inet_validate_link_af(const struct net_device *dev,
1901                                 const struct nlattr *nla)
1902{
1903        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1904        int err, rem;
1905
1906        if (dev && !__in_dev_get_rcu(dev))
1907                return -EAFNOSUPPORT;
1908
1909        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1910        if (err < 0)
1911                return err;
1912
1913        if (tb[IFLA_INET_CONF]) {
1914                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1915                        int cfgid = nla_type(a);
1916
1917                        if (nla_len(a) < 4)
1918                                return -EINVAL;
1919
1920                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1921                                return -EINVAL;
1922                }
1923        }
1924
1925        return 0;
1926}
1927
1928static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1929{
1930        struct in_device *in_dev = __in_dev_get_rcu(dev);
1931        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1932        int rem;
1933
1934        if (!in_dev)
1935                return -EAFNOSUPPORT;
1936
1937        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1938                BUG();
1939
1940        if (tb[IFLA_INET_CONF]) {
1941                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1942                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1943        }
1944
1945        return 0;
1946}
1947
1948static int inet_netconf_msgsize_devconf(int type)
1949{
1950        int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1951                   + nla_total_size(4); /* NETCONFA_IFINDEX */
1952        bool all = false;
1953
1954        if (type == NETCONFA_ALL)
1955                all = true;
1956
1957        if (all || type == NETCONFA_FORWARDING)
1958                size += nla_total_size(4);
1959        if (all || type == NETCONFA_RP_FILTER)
1960                size += nla_total_size(4);
1961        if (all || type == NETCONFA_MC_FORWARDING)
1962                size += nla_total_size(4);
1963        if (all || type == NETCONFA_BC_FORWARDING)
1964                size += nla_total_size(4);
1965        if (all || type == NETCONFA_PROXY_NEIGH)
1966                size += nla_total_size(4);
1967        if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1968                size += nla_total_size(4);
1969
1970        return size;
1971}
1972
1973static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1974                                     struct ipv4_devconf *devconf, u32 portid,
1975                                     u32 seq, int event, unsigned int flags,
1976                                     int type)
1977{
1978        struct nlmsghdr  *nlh;
1979        struct netconfmsg *ncm;
1980        bool all = false;
1981
1982        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1983                        flags);
1984        if (!nlh)
1985                return -EMSGSIZE;
1986
1987        if (type == NETCONFA_ALL)
1988                all = true;
1989
1990        ncm = nlmsg_data(nlh);
1991        ncm->ncm_family = AF_INET;
1992
1993        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1994                goto nla_put_failure;
1995
1996        if (!devconf)
1997                goto out;
1998
1999        if ((all || type == NETCONFA_FORWARDING) &&
2000            nla_put_s32(skb, NETCONFA_FORWARDING,
2001                        IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2002                goto nla_put_failure;
2003        if ((all || type == NETCONFA_RP_FILTER) &&
2004            nla_put_s32(skb, NETCONFA_RP_FILTER,
2005                        IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2006                goto nla_put_failure;
2007        if ((all || type == NETCONFA_MC_FORWARDING) &&
2008            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2009                        IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2010                goto nla_put_failure;
2011        if ((all || type == NETCONFA_BC_FORWARDING) &&
2012            nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2013                        IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2014                goto nla_put_failure;
2015        if ((all || type == NETCONFA_PROXY_NEIGH) &&
2016            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2017                        IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2018                goto nla_put_failure;
2019        if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2020            nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2021                        IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2022                goto nla_put_failure;
2023
2024out:
2025        nlmsg_end(skb, nlh);
2026        return 0;
2027
2028nla_put_failure:
2029        nlmsg_cancel(skb, nlh);
2030        return -EMSGSIZE;
2031}
2032
2033void inet_netconf_notify_devconf(struct net *net, int event, int type,
2034                                 int ifindex, struct ipv4_devconf *devconf)
2035{
2036        struct sk_buff *skb;
2037        int err = -ENOBUFS;
2038
2039        skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2040        if (!skb)
2041                goto errout;
2042
2043        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2044                                        event, 0, type);
2045        if (err < 0) {
2046                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2047                WARN_ON(err == -EMSGSIZE);
2048                kfree_skb(skb);
2049                goto errout;
2050        }
2051        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2052        return;
2053errout:
2054        if (err < 0)
2055                rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2056}
2057
2058static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2059        [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2060        [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2061        [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2062        [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2063        [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2064};
2065
2066static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2067                                    struct nlmsghdr *nlh,
2068                                    struct netlink_ext_ack *extack)
2069{
2070        struct net *net = sock_net(in_skb->sk);
2071        struct nlattr *tb[NETCONFA_MAX+1];
2072        struct netconfmsg *ncm;
2073        struct sk_buff *skb;
2074        struct ipv4_devconf *devconf;
2075        struct in_device *in_dev;
2076        struct net_device *dev;
2077        int ifindex;
2078        int err;
2079
2080        err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2081                          devconf_ipv4_policy, extack);
2082        if (err < 0)
2083                goto errout;
2084
2085        err = -EINVAL;
2086        if (!tb[NETCONFA_IFINDEX])
2087                goto errout;
2088
2089        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2090        switch (ifindex) {
2091        case NETCONFA_IFINDEX_ALL:
2092                devconf = net->ipv4.devconf_all;
2093                break;
2094        case NETCONFA_IFINDEX_DEFAULT:
2095                devconf = net->ipv4.devconf_dflt;
2096                break;
2097        default:
2098                dev = __dev_get_by_index(net, ifindex);
2099                if (!dev)
2100                        goto errout;
2101                in_dev = __in_dev_get_rtnl(dev);
2102                if (!in_dev)
2103                        goto errout;
2104                devconf = &in_dev->cnf;
2105                break;
2106        }
2107
2108        err = -ENOBUFS;
2109        skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2110        if (!skb)
2111                goto errout;
2112
2113        err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2114                                        NETLINK_CB(in_skb).portid,
2115                                        nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2116                                        NETCONFA_ALL);
2117        if (err < 0) {
2118                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2119                WARN_ON(err == -EMSGSIZE);
2120                kfree_skb(skb);
2121                goto errout;
2122        }
2123        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2124errout:
2125        return err;
2126}
2127
2128static int inet_netconf_dump_devconf(struct sk_buff *skb,
2129                                     struct netlink_callback *cb)
2130{
2131        const struct nlmsghdr *nlh = cb->nlh;
2132        struct net *net = sock_net(skb->sk);
2133        int h, s_h;
2134        int idx, s_idx;
2135        struct net_device *dev;
2136        struct in_device *in_dev;
2137        struct hlist_head *head;
2138
2139        if (cb->strict_check) {
2140                struct netlink_ext_ack *extack = cb->extack;
2141                struct netconfmsg *ncm;
2142
2143                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2144                        NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2145                        return -EINVAL;
2146                }
2147
2148                if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2149                        NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2150                        return -EINVAL;
2151                }
2152        }
2153
2154        s_h = cb->args[0];
2155        s_idx = idx = cb->args[1];
2156
2157        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2158                idx = 0;
2159                head = &net->dev_index_head[h];
2160                rcu_read_lock();
2161                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2162                          net->dev_base_seq;
2163                hlist_for_each_entry_rcu(dev, head, index_hlist) {
2164                        if (idx < s_idx)
2165                                goto cont;
2166                        in_dev = __in_dev_get_rcu(dev);
2167                        if (!in_dev)
2168                                goto cont;
2169
2170                        if (inet_netconf_fill_devconf(skb, dev->ifindex,
2171                                                      &in_dev->cnf,
2172                                                      NETLINK_CB(cb->skb).portid,
2173                                                      nlh->nlmsg_seq,
2174                                                      RTM_NEWNETCONF,
2175                                                      NLM_F_MULTI,
2176                                                      NETCONFA_ALL) < 0) {
2177                                rcu_read_unlock();
2178                                goto done;
2179                        }
2180                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2181cont:
2182                        idx++;
2183                }
2184                rcu_read_unlock();
2185        }
2186        if (h == NETDEV_HASHENTRIES) {
2187                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2188                                              net->ipv4.devconf_all,
2189                                              NETLINK_CB(cb->skb).portid,
2190                                              nlh->nlmsg_seq,
2191                                              RTM_NEWNETCONF, NLM_F_MULTI,
2192                                              NETCONFA_ALL) < 0)
2193                        goto done;
2194                else
2195                        h++;
2196        }
2197        if (h == NETDEV_HASHENTRIES + 1) {
2198                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2199                                              net->ipv4.devconf_dflt,
2200                                              NETLINK_CB(cb->skb).portid,
2201                                              nlh->nlmsg_seq,
2202                                              RTM_NEWNETCONF, NLM_F_MULTI,
2203                                              NETCONFA_ALL) < 0)
2204                        goto done;
2205                else
2206                        h++;
2207        }
2208done:
2209        cb->args[0] = h;
2210        cb->args[1] = idx;
2211
2212        return skb->len;
2213}
2214
2215#ifdef CONFIG_SYSCTL
2216
2217static void devinet_copy_dflt_conf(struct net *net, int i)
2218{
2219        struct net_device *dev;
2220
2221        rcu_read_lock();
2222        for_each_netdev_rcu(net, dev) {
2223                struct in_device *in_dev;
2224
2225                in_dev = __in_dev_get_rcu(dev);
2226                if (in_dev && !test_bit(i, in_dev->cnf.state))
2227                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2228        }
2229        rcu_read_unlock();
2230}
2231
2232/* called with RTNL locked */
2233static void inet_forward_change(struct net *net)
2234{
2235        struct net_device *dev;
2236        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2237
2238        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2239        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2240        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2241                                    NETCONFA_FORWARDING,
2242                                    NETCONFA_IFINDEX_ALL,
2243                                    net->ipv4.devconf_all);
2244        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2245                                    NETCONFA_FORWARDING,
2246                                    NETCONFA_IFINDEX_DEFAULT,
2247                                    net->ipv4.devconf_dflt);
2248
2249        for_each_netdev(net, dev) {
2250                struct in_device *in_dev;
2251
2252                if (on)
2253                        dev_disable_lro(dev);
2254
2255                in_dev = __in_dev_get_rtnl(dev);
2256                if (in_dev) {
2257                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2258                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2259                                                    NETCONFA_FORWARDING,
2260                                                    dev->ifindex, &in_dev->cnf);
2261                }
2262        }
2263}
2264
2265static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2266{
2267        if (cnf == net->ipv4.devconf_dflt)
2268                return NETCONFA_IFINDEX_DEFAULT;
2269        else if (cnf == net->ipv4.devconf_all)
2270                return NETCONFA_IFINDEX_ALL;
2271        else {
2272                struct in_device *idev
2273                        = container_of(cnf, struct in_device, cnf);
2274                return idev->dev->ifindex;
2275        }
2276}
2277
2278static int devinet_conf_proc(struct ctl_table *ctl, int write,
2279                             void __user *buffer,
2280                             size_t *lenp, loff_t *ppos)
2281{
2282        int old_value = *(int *)ctl->data;
2283        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2284        int new_value = *(int *)ctl->data;
2285
2286        if (write) {
2287                struct ipv4_devconf *cnf = ctl->extra1;
2288                struct net *net = ctl->extra2;
2289                int i = (int *)ctl->data - cnf->data;
2290                int ifindex;
2291
2292                set_bit(i, cnf->state);
2293
2294                if (cnf == net->ipv4.devconf_dflt)
2295                        devinet_copy_dflt_conf(net, i);
2296                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2297                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2298                        if ((new_value == 0) && (old_value != 0))
2299                                rt_cache_flush(net);
2300
2301                if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2302                    new_value != old_value)
2303                        rt_cache_flush(net);
2304
2305                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2306                    new_value != old_value) {
2307                        ifindex = devinet_conf_ifindex(net, cnf);
2308                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2309                                                    NETCONFA_RP_FILTER,
2310                                                    ifindex, cnf);
2311                }
2312                if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2313                    new_value != old_value) {
2314                        ifindex = devinet_conf_ifindex(net, cnf);
2315                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2316                                                    NETCONFA_PROXY_NEIGH,
2317                                                    ifindex, cnf);
2318                }
2319                if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2320                    new_value != old_value) {
2321                        ifindex = devinet_conf_ifindex(net, cnf);
2322                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2323                                                    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2324                                                    ifindex, cnf);
2325                }
2326        }
2327
2328        return ret;
2329}
2330
2331static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2332                                  void __user *buffer,
2333                                  size_t *lenp, loff_t *ppos)
2334{
2335        int *valp = ctl->data;
2336        int val = *valp;
2337        loff_t pos = *ppos;
2338        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2339
2340        if (write && *valp != val) {
2341                struct net *net = ctl->extra2;
2342
2343                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2344                        if (!rtnl_trylock()) {
2345                                /* Restore the original values before restarting */
2346                                *valp = val;
2347                                *ppos = pos;
2348                                return restart_syscall();
2349                        }
2350                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2351                                inet_forward_change(net);
2352                        } else {
2353                                struct ipv4_devconf *cnf = ctl->extra1;
2354                                struct in_device *idev =
2355                                        container_of(cnf, struct in_device, cnf);
2356                                if (*valp)
2357                                        dev_disable_lro(idev->dev);
2358                                inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2359                                                            NETCONFA_FORWARDING,
2360                                                            idev->dev->ifindex,
2361                                                            cnf);
2362                        }
2363                        rtnl_unlock();
2364                        rt_cache_flush(net);
2365                } else
2366                        inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2367                                                    NETCONFA_FORWARDING,
2368                                                    NETCONFA_IFINDEX_DEFAULT,
2369                                                    net->ipv4.devconf_dflt);
2370        }
2371
2372        return ret;
2373}
2374
2375static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2376                                void __user *buffer,
2377                                size_t *lenp, loff_t *ppos)
2378{
2379        int *valp = ctl->data;
2380        int val = *valp;
2381        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382        struct net *net = ctl->extra2;
2383
2384        if (write && *valp != val)
2385                rt_cache_flush(net);
2386
2387        return ret;
2388}
2389
2390#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2391        { \
2392                .procname       = name, \
2393                .data           = ipv4_devconf.data + \
2394                                  IPV4_DEVCONF_ ## attr - 1, \
2395                .maxlen         = sizeof(int), \
2396                .mode           = mval, \
2397                .proc_handler   = proc, \
2398                .extra1         = &ipv4_devconf, \
2399        }
2400
2401#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2402        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2403
2404#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2405        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2406
2407#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2408        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2409
2410#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2411        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2412
2413static struct devinet_sysctl_table {
2414        struct ctl_table_header *sysctl_header;
2415        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2416} devinet_sysctl = {
2417        .devinet_vars = {
2418                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2419                                             devinet_sysctl_forward),
2420                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2421                DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2422
2423                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2424                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2425                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2426                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2427                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2428                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2429                                        "accept_source_route"),
2430                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2431                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2432                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2433                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2434                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2435                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2436                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2437                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2438                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2439                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2440                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2441                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2442                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2443                DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2444                                        "force_igmp_version"),
2445                DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2446                                        "igmpv2_unsolicited_report_interval"),
2447                DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2448                                        "igmpv3_unsolicited_report_interval"),
2449                DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2450                                        "ignore_routes_with_linkdown"),
2451                DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2452                                        "drop_gratuitous_arp"),
2453
2454                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2455                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2456                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2457                                              "promote_secondaries"),
2458                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2459                                              "route_localnet"),
2460                DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2461                                              "drop_unicast_in_l2_multicast"),
2462        },
2463};
2464
2465static int __devinet_sysctl_register(struct net *net, char *dev_name,
2466                                     int ifindex, struct ipv4_devconf *p)
2467{
2468        int i;
2469        struct devinet_sysctl_table *t;
2470        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2471
2472        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2473        if (!t)
2474                goto out;
2475
2476        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2477                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2478                t->devinet_vars[i].extra1 = p;
2479                t->devinet_vars[i].extra2 = net;
2480        }
2481
2482        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2483
2484        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2485        if (!t->sysctl_header)
2486                goto free;
2487
2488        p->sysctl = t;
2489
2490        inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2491                                    ifindex, p);
2492        return 0;
2493
2494free:
2495        kfree(t);
2496out:
2497        return -ENOBUFS;
2498}
2499
2500static void __devinet_sysctl_unregister(struct net *net,
2501                                        struct ipv4_devconf *cnf, int ifindex)
2502{
2503        struct devinet_sysctl_table *t = cnf->sysctl;
2504
2505        if (t) {
2506                cnf->sysctl = NULL;
2507                unregister_net_sysctl_table(t->sysctl_header);
2508                kfree(t);
2509        }
2510
2511        inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2512}
2513
2514static int devinet_sysctl_register(struct in_device *idev)
2515{
2516        int err;
2517
2518        if (!sysctl_dev_name_is_allowed(idev->dev->name))
2519                return -EINVAL;
2520
2521        err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2522        if (err)
2523                return err;
2524        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2525                                        idev->dev->ifindex, &idev->cnf);
2526        if (err)
2527                neigh_sysctl_unregister(idev->arp_parms);
2528        return err;
2529}
2530
2531static void devinet_sysctl_unregister(struct in_device *idev)
2532{
2533        struct net *net = dev_net(idev->dev);
2534
2535        __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2536        neigh_sysctl_unregister(idev->arp_parms);
2537}
2538
2539static struct ctl_table ctl_forward_entry[] = {
2540        {
2541                .procname       = "ip_forward",
2542                .data           = &ipv4_devconf.data[
2543                                        IPV4_DEVCONF_FORWARDING - 1],
2544                .maxlen         = sizeof(int),
2545                .mode           = 0644,
2546                .proc_handler   = devinet_sysctl_forward,
2547                .extra1         = &ipv4_devconf,
2548                .extra2         = &init_net,
2549        },
2550        { },
2551};
2552#endif
2553
2554static __net_init int devinet_init_net(struct net *net)
2555{
2556        int err;
2557        struct ipv4_devconf *all, *dflt;
2558#ifdef CONFIG_SYSCTL
2559        struct ctl_table *tbl = ctl_forward_entry;
2560        struct ctl_table_header *forw_hdr;
2561#endif
2562
2563        err = -ENOMEM;
2564        all = &ipv4_devconf;
2565        dflt = &ipv4_devconf_dflt;
2566
2567        if (!net_eq(net, &init_net)) {
2568                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2569                if (!all)
2570                        goto err_alloc_all;
2571
2572                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2573                if (!dflt)
2574                        goto err_alloc_dflt;
2575
2576#ifdef CONFIG_SYSCTL
2577                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2578                if (!tbl)
2579                        goto err_alloc_ctl;
2580
2581                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2582                tbl[0].extra1 = all;
2583                tbl[0].extra2 = net;
2584#endif
2585        }
2586
2587#ifdef CONFIG_SYSCTL
2588        err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2589        if (err < 0)
2590                goto err_reg_all;
2591
2592        err = __devinet_sysctl_register(net, "default",
2593                                        NETCONFA_IFINDEX_DEFAULT, dflt);
2594        if (err < 0)
2595                goto err_reg_dflt;
2596
2597        err = -ENOMEM;
2598        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2599        if (!forw_hdr)
2600                goto err_reg_ctl;
2601        net->ipv4.forw_hdr = forw_hdr;
2602#endif
2603
2604        net->ipv4.devconf_all = all;
2605        net->ipv4.devconf_dflt = dflt;
2606        return 0;
2607
2608#ifdef CONFIG_SYSCTL
2609err_reg_ctl:
2610        __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2611err_reg_dflt:
2612        __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2613err_reg_all:
2614        if (tbl != ctl_forward_entry)
2615                kfree(tbl);
2616err_alloc_ctl:
2617#endif
2618        if (dflt != &ipv4_devconf_dflt)
2619                kfree(dflt);
2620err_alloc_dflt:
2621        if (all != &ipv4_devconf)
2622                kfree(all);
2623err_alloc_all:
2624        return err;
2625}
2626
2627static __net_exit void devinet_exit_net(struct net *net)
2628{
2629#ifdef CONFIG_SYSCTL
2630        struct ctl_table *tbl;
2631
2632        tbl = net->ipv4.forw_hdr->ctl_table_arg;
2633        unregister_net_sysctl_table(net->ipv4.forw_hdr);
2634        __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2635                                    NETCONFA_IFINDEX_DEFAULT);
2636        __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2637                                    NETCONFA_IFINDEX_ALL);
2638        kfree(tbl);
2639#endif
2640        kfree(net->ipv4.devconf_dflt);
2641        kfree(net->ipv4.devconf_all);
2642}
2643
2644static __net_initdata struct pernet_operations devinet_ops = {
2645        .init = devinet_init_net,
2646        .exit = devinet_exit_net,
2647};
2648
2649static struct rtnl_af_ops inet_af_ops __read_mostly = {
2650        .family           = AF_INET,
2651        .fill_link_af     = inet_fill_link_af,
2652        .get_link_af_size = inet_get_link_af_size,
2653        .validate_link_af = inet_validate_link_af,
2654        .set_link_af      = inet_set_link_af,
2655};
2656
2657void __init devinet_init(void)
2658{
2659        int i;
2660
2661        for (i = 0; i < IN4_ADDR_HSIZE; i++)
2662                INIT_HLIST_HEAD(&inet_addr_lst[i]);
2663
2664        register_pernet_subsys(&devinet_ops);
2665
2666        register_gifconf(PF_INET, inet_gifconf);
2667        register_netdevice_notifier(&ip_netdev_notifier);
2668
2669        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2670
2671        rtnl_af_register(&inet_af_ops);
2672
2673        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2674        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2675        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2676        rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2677                      inet_netconf_dump_devconf, 0);
2678}
2679