linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/string.h>
  36#include <linux/mm.h>
  37#include <linux/socket.h>
  38#include <linux/sockios.h>
  39#include <linux/in.h>
  40#include <linux/errno.h>
  41#include <linux/interrupt.h>
  42#include <linux/if_addr.h>
  43#include <linux/if_ether.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/etherdevice.h>
  47#include <linux/skbuff.h>
  48#include <linux/init.h>
  49#include <linux/notifier.h>
  50#include <linux/inetdevice.h>
  51#include <linux/igmp.h>
  52#include <linux/slab.h>
  53#include <linux/hash.h>
  54#ifdef CONFIG_SYSCTL
  55#include <linux/sysctl.h>
  56#endif
  57#include <linux/kmod.h>
  58#include <linux/netconf.h>
  59
  60#include <net/arp.h>
  61#include <net/ip.h>
  62#include <net/route.h>
  63#include <net/ip_fib.h>
  64#include <net/rtnetlink.h>
  65#include <net/net_namespace.h>
  66#include <net/addrconf.h>
  67
  68#include "fib_lookup.h"
  69
  70static struct ipv4_devconf ipv4_devconf = {
  71        .data = {
  72                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  74                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  75                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  76                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  77                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  78        },
  79};
  80
  81static struct ipv4_devconf ipv4_devconf_dflt = {
  82        .data = {
  83                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  84                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  85                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  86                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  87                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  88                [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  89                [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  90        },
  91};
  92
  93#define IPV4_DEVCONF_DFLT(net, attr) \
  94        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  95
  96static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  97        [IFA_LOCAL]             = { .type = NLA_U32 },
  98        [IFA_ADDRESS]           = { .type = NLA_U32 },
  99        [IFA_BROADCAST]         = { .type = NLA_U32 },
 100        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 101        [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
 102        [IFA_FLAGS]             = { .type = NLA_U32 },
 103};
 104
 105#define IN4_ADDR_HSIZE_SHIFT    8
 106#define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
 107
 108static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 109
 110static u32 inet_addr_hash(const struct net *net, __be32 addr)
 111{
 112        u32 val = (__force u32) addr ^ net_hash_mix(net);
 113
 114        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 115}
 116
 117static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 118{
 119        u32 hash = inet_addr_hash(net, ifa->ifa_local);
 120
 121        ASSERT_RTNL();
 122        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 123}
 124
 125static void inet_hash_remove(struct in_ifaddr *ifa)
 126{
 127        ASSERT_RTNL();
 128        hlist_del_init_rcu(&ifa->hash);
 129}
 130
 131/**
 132 * __ip_dev_find - find the first device with a given source address.
 133 * @net: the net namespace
 134 * @addr: the source address
 135 * @devref: if true, take a reference on the found device
 136 *
 137 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 138 */
 139struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 140{
 141        u32 hash = inet_addr_hash(net, addr);
 142        struct net_device *result = NULL;
 143        struct in_ifaddr *ifa;
 144
 145        rcu_read_lock();
 146        hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
 147                if (ifa->ifa_local == addr) {
 148                        struct net_device *dev = ifa->ifa_dev->dev;
 149
 150                        if (!net_eq(dev_net(dev), net))
 151                                continue;
 152                        result = dev;
 153                        break;
 154                }
 155        }
 156        if (!result) {
 157                struct flowi4 fl4 = { .daddr = addr };
 158                struct fib_result res = { 0 };
 159                struct fib_table *local;
 160
 161                /* Fallback to FIB local table so that communication
 162                 * over loopback subnets work.
 163                 */
 164                local = fib_get_table(net, RT_TABLE_LOCAL);
 165                if (local &&
 166                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 167                    res.type == RTN_LOCAL)
 168                        result = FIB_RES_DEV(res);
 169        }
 170        if (result && devref)
 171                dev_hold(result);
 172        rcu_read_unlock();
 173        return result;
 174}
 175EXPORT_SYMBOL(__ip_dev_find);
 176
 177static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 178
 179static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 180static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 181                         int destroy);
 182#ifdef CONFIG_SYSCTL
 183static int devinet_sysctl_register(struct in_device *idev);
 184static void devinet_sysctl_unregister(struct in_device *idev);
 185#else
 186static int devinet_sysctl_register(struct in_device *idev)
 187{
 188        return 0;
 189}
 190static void devinet_sysctl_unregister(struct in_device *idev)
 191{
 192}
 193#endif
 194
 195/* Locks all the inet devices. */
 196
 197static struct in_ifaddr *inet_alloc_ifa(void)
 198{
 199        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 200}
 201
 202static void inet_rcu_free_ifa(struct rcu_head *head)
 203{
 204        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 205        if (ifa->ifa_dev)
 206                in_dev_put(ifa->ifa_dev);
 207        kfree(ifa);
 208}
 209
 210static void inet_free_ifa(struct in_ifaddr *ifa)
 211{
 212        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 213}
 214
 215void in_dev_finish_destroy(struct in_device *idev)
 216{
 217        struct net_device *dev = idev->dev;
 218
 219        WARN_ON(idev->ifa_list);
 220        WARN_ON(idev->mc_list);
 221        kfree(rcu_dereference_protected(idev->mc_hash, 1));
 222#ifdef NET_REFCNT_DEBUG
 223        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 224#endif
 225        dev_put(dev);
 226        if (!idev->dead)
 227                pr_err("Freeing alive in_device %p\n", idev);
 228        else
 229                kfree(idev);
 230}
 231EXPORT_SYMBOL(in_dev_finish_destroy);
 232
 233static struct in_device *inetdev_init(struct net_device *dev)
 234{
 235        struct in_device *in_dev;
 236        int err = -ENOMEM;
 237
 238        ASSERT_RTNL();
 239
 240        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 241        if (!in_dev)
 242                goto out;
 243        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 244                        sizeof(in_dev->cnf));
 245        in_dev->cnf.sysctl = NULL;
 246        in_dev->dev = dev;
 247        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 248        if (!in_dev->arp_parms)
 249                goto out_kfree;
 250        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 251                dev_disable_lro(dev);
 252        /* Reference in_dev->dev */
 253        dev_hold(dev);
 254        /* Account for reference dev->ip_ptr (below) */
 255        in_dev_hold(in_dev);
 256
 257        err = devinet_sysctl_register(in_dev);
 258        if (err) {
 259                in_dev->dead = 1;
 260                in_dev_put(in_dev);
 261                in_dev = NULL;
 262                goto out;
 263        }
 264        ip_mc_init_dev(in_dev);
 265        if (dev->flags & IFF_UP)
 266                ip_mc_up(in_dev);
 267
 268        /* we can receive as soon as ip_ptr is set -- do this last */
 269        rcu_assign_pointer(dev->ip_ptr, in_dev);
 270out:
 271        return in_dev ?: ERR_PTR(err);
 272out_kfree:
 273        kfree(in_dev);
 274        in_dev = NULL;
 275        goto out;
 276}
 277
 278static void in_dev_rcu_put(struct rcu_head *head)
 279{
 280        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 281        in_dev_put(idev);
 282}
 283
 284static void inetdev_destroy(struct in_device *in_dev)
 285{
 286        struct in_ifaddr *ifa;
 287        struct net_device *dev;
 288
 289        ASSERT_RTNL();
 290
 291        dev = in_dev->dev;
 292
 293        in_dev->dead = 1;
 294
 295        ip_mc_destroy_dev(in_dev);
 296
 297        while ((ifa = in_dev->ifa_list) != NULL) {
 298                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 299                inet_free_ifa(ifa);
 300        }
 301
 302        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 303
 304        devinet_sysctl_unregister(in_dev);
 305        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 306        arp_ifdown(dev);
 307
 308        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 309}
 310
 311int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 312{
 313        rcu_read_lock();
 314        for_primary_ifa(in_dev) {
 315                if (inet_ifa_match(a, ifa)) {
 316                        if (!b || inet_ifa_match(b, ifa)) {
 317                                rcu_read_unlock();
 318                                return 1;
 319                        }
 320                }
 321        } endfor_ifa(in_dev);
 322        rcu_read_unlock();
 323        return 0;
 324}
 325
 326static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 327                         int destroy, struct nlmsghdr *nlh, u32 portid)
 328{
 329        struct in_ifaddr *promote = NULL;
 330        struct in_ifaddr *ifa, *ifa1 = *ifap;
 331        struct in_ifaddr *last_prim = in_dev->ifa_list;
 332        struct in_ifaddr *prev_prom = NULL;
 333        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 334
 335        ASSERT_RTNL();
 336
 337        /* 1. Deleting primary ifaddr forces deletion all secondaries
 338         * unless alias promotion is set
 339         **/
 340
 341        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 342                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 343
 344                while ((ifa = *ifap1) != NULL) {
 345                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 346                            ifa1->ifa_scope <= ifa->ifa_scope)
 347                                last_prim = ifa;
 348
 349                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 350                            ifa1->ifa_mask != ifa->ifa_mask ||
 351                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 352                                ifap1 = &ifa->ifa_next;
 353                                prev_prom = ifa;
 354                                continue;
 355                        }
 356
 357                        if (!do_promote) {
 358                                inet_hash_remove(ifa);
 359                                *ifap1 = ifa->ifa_next;
 360
 361                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 362                                blocking_notifier_call_chain(&inetaddr_chain,
 363                                                NETDEV_DOWN, ifa);
 364                                inet_free_ifa(ifa);
 365                        } else {
 366                                promote = ifa;
 367                                break;
 368                        }
 369                }
 370        }
 371
 372        /* On promotion all secondaries from subnet are changing
 373         * the primary IP, we must remove all their routes silently
 374         * and later to add them back with new prefsrc. Do this
 375         * while all addresses are on the device list.
 376         */
 377        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 378                if (ifa1->ifa_mask == ifa->ifa_mask &&
 379                    inet_ifa_match(ifa1->ifa_address, ifa))
 380                        fib_del_ifaddr(ifa, ifa1);
 381        }
 382
 383        /* 2. Unlink it */
 384
 385        *ifap = ifa1->ifa_next;
 386        inet_hash_remove(ifa1);
 387
 388        /* 3. Announce address deletion */
 389
 390        /* Send message first, then call notifier.
 391           At first sight, FIB update triggered by notifier
 392           will refer to already deleted ifaddr, that could confuse
 393           netlink listeners. It is not true: look, gated sees
 394           that route deleted and if it still thinks that ifaddr
 395           is valid, it will try to restore deleted routes... Grr.
 396           So that, this order is correct.
 397         */
 398        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 399        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 400
 401        if (promote) {
 402                struct in_ifaddr *next_sec = promote->ifa_next;
 403
 404                if (prev_prom) {
 405                        prev_prom->ifa_next = promote->ifa_next;
 406                        promote->ifa_next = last_prim->ifa_next;
 407                        last_prim->ifa_next = promote;
 408                }
 409
 410                promote->ifa_flags &= ~IFA_F_SECONDARY;
 411                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 412                blocking_notifier_call_chain(&inetaddr_chain,
 413                                NETDEV_UP, promote);
 414                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 415                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 416                            !inet_ifa_match(ifa1->ifa_address, ifa))
 417                                        continue;
 418                        fib_add_ifaddr(ifa);
 419                }
 420
 421        }
 422        if (destroy)
 423                inet_free_ifa(ifa1);
 424}
 425
 426static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 427                         int destroy)
 428{
 429        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 430}
 431
 432static void check_lifetime(struct work_struct *work);
 433
 434static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 435
 436static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 437                             u32 portid)
 438{
 439        struct in_device *in_dev = ifa->ifa_dev;
 440        struct in_ifaddr *ifa1, **ifap, **last_primary;
 441
 442        ASSERT_RTNL();
 443
 444        if (!ifa->ifa_local) {
 445                inet_free_ifa(ifa);
 446                return 0;
 447        }
 448
 449        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 450        last_primary = &in_dev->ifa_list;
 451
 452        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 453             ifap = &ifa1->ifa_next) {
 454                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 455                    ifa->ifa_scope <= ifa1->ifa_scope)
 456                        last_primary = &ifa1->ifa_next;
 457                if (ifa1->ifa_mask == ifa->ifa_mask &&
 458                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 459                        if (ifa1->ifa_local == ifa->ifa_local) {
 460                                inet_free_ifa(ifa);
 461                                return -EEXIST;
 462                        }
 463                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 464                                inet_free_ifa(ifa);
 465                                return -EINVAL;
 466                        }
 467                        ifa->ifa_flags |= IFA_F_SECONDARY;
 468                }
 469        }
 470
 471        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 472                prandom_seed((__force u32) ifa->ifa_local);
 473                ifap = last_primary;
 474        }
 475
 476        ifa->ifa_next = *ifap;
 477        *ifap = ifa;
 478
 479        inet_hash_insert(dev_net(in_dev->dev), ifa);
 480
 481        cancel_delayed_work(&check_lifetime_work);
 482        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
 483
 484        /* Send message first, then call notifier.
 485           Notifier will trigger FIB update, so that
 486           listeners of netlink will know about new ifaddr */
 487        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 488        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 489
 490        return 0;
 491}
 492
 493static int inet_insert_ifa(struct in_ifaddr *ifa)
 494{
 495        return __inet_insert_ifa(ifa, NULL, 0);
 496}
 497
 498static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 499{
 500        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 501
 502        ASSERT_RTNL();
 503
 504        if (!in_dev) {
 505                inet_free_ifa(ifa);
 506                return -ENOBUFS;
 507        }
 508        ipv4_devconf_setall(in_dev);
 509        neigh_parms_data_state_setall(in_dev->arp_parms);
 510        if (ifa->ifa_dev != in_dev) {
 511                WARN_ON(ifa->ifa_dev);
 512                in_dev_hold(in_dev);
 513                ifa->ifa_dev = in_dev;
 514        }
 515        if (ipv4_is_loopback(ifa->ifa_local))
 516                ifa->ifa_scope = RT_SCOPE_HOST;
 517        return inet_insert_ifa(ifa);
 518}
 519
 520/* Caller must hold RCU or RTNL :
 521 * We dont take a reference on found in_device
 522 */
 523struct in_device *inetdev_by_index(struct net *net, int ifindex)
 524{
 525        struct net_device *dev;
 526        struct in_device *in_dev = NULL;
 527
 528        rcu_read_lock();
 529        dev = dev_get_by_index_rcu(net, ifindex);
 530        if (dev)
 531                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 532        rcu_read_unlock();
 533        return in_dev;
 534}
 535EXPORT_SYMBOL(inetdev_by_index);
 536
 537/* Called only from RTNL semaphored context. No locks. */
 538
 539struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 540                                    __be32 mask)
 541{
 542        ASSERT_RTNL();
 543
 544        for_primary_ifa(in_dev) {
 545                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 546                        return ifa;
 547        } endfor_ifa(in_dev);
 548        return NULL;
 549}
 550
 551static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
 552{
 553        struct ip_mreqn mreq = {
 554                .imr_multiaddr.s_addr = ifa->ifa_address,
 555                .imr_ifindex = ifa->ifa_dev->dev->ifindex,
 556        };
 557        int ret;
 558
 559        ASSERT_RTNL();
 560
 561        lock_sock(sk);
 562        if (join)
 563                ret = ip_mc_join_group(sk, &mreq);
 564        else
 565                ret = ip_mc_leave_group(sk, &mreq);
 566        release_sock(sk);
 567
 568        return ret;
 569}
 570
 571static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 572{
 573        struct net *net = sock_net(skb->sk);
 574        struct nlattr *tb[IFA_MAX+1];
 575        struct in_device *in_dev;
 576        struct ifaddrmsg *ifm;
 577        struct in_ifaddr *ifa, **ifap;
 578        int err = -EINVAL;
 579
 580        ASSERT_RTNL();
 581
 582        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 583        if (err < 0)
 584                goto errout;
 585
 586        ifm = nlmsg_data(nlh);
 587        in_dev = inetdev_by_index(net, ifm->ifa_index);
 588        if (!in_dev) {
 589                err = -ENODEV;
 590                goto errout;
 591        }
 592
 593        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 594             ifap = &ifa->ifa_next) {
 595                if (tb[IFA_LOCAL] &&
 596                    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
 597                        continue;
 598
 599                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 600                        continue;
 601
 602                if (tb[IFA_ADDRESS] &&
 603                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 604                    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
 605                        continue;
 606
 607                if (ipv4_is_multicast(ifa->ifa_address))
 608                        ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 609                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 610                return 0;
 611        }
 612
 613        err = -EADDRNOTAVAIL;
 614errout:
 615        return err;
 616}
 617
 618#define INFINITY_LIFE_TIME      0xFFFFFFFF
 619
 620static void check_lifetime(struct work_struct *work)
 621{
 622        unsigned long now, next, next_sec, next_sched;
 623        struct in_ifaddr *ifa;
 624        struct hlist_node *n;
 625        int i;
 626
 627        now = jiffies;
 628        next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 629
 630        for (i = 0; i < IN4_ADDR_HSIZE; i++) {
 631                bool change_needed = false;
 632
 633                rcu_read_lock();
 634                hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 635                        unsigned long age;
 636
 637                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 638                                continue;
 639
 640                        /* We try to batch several events at once. */
 641                        age = (now - ifa->ifa_tstamp +
 642                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 643
 644                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 645                            age >= ifa->ifa_valid_lft) {
 646                                change_needed = true;
 647                        } else if (ifa->ifa_preferred_lft ==
 648                                   INFINITY_LIFE_TIME) {
 649                                continue;
 650                        } else if (age >= ifa->ifa_preferred_lft) {
 651                                if (time_before(ifa->ifa_tstamp +
 652                                                ifa->ifa_valid_lft * HZ, next))
 653                                        next = ifa->ifa_tstamp +
 654                                               ifa->ifa_valid_lft * HZ;
 655
 656                                if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
 657                                        change_needed = true;
 658                        } else if (time_before(ifa->ifa_tstamp +
 659                                               ifa->ifa_preferred_lft * HZ,
 660                                               next)) {
 661                                next = ifa->ifa_tstamp +
 662                                       ifa->ifa_preferred_lft * HZ;
 663                        }
 664                }
 665                rcu_read_unlock();
 666                if (!change_needed)
 667                        continue;
 668                rtnl_lock();
 669                hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
 670                        unsigned long age;
 671
 672                        if (ifa->ifa_flags & IFA_F_PERMANENT)
 673                                continue;
 674
 675                        /* We try to batch several events at once. */
 676                        age = (now - ifa->ifa_tstamp +
 677                               ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 678
 679                        if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 680                            age >= ifa->ifa_valid_lft) {
 681                                struct in_ifaddr **ifap;
 682
 683                                for (ifap = &ifa->ifa_dev->ifa_list;
 684                                     *ifap != NULL; ifap = &(*ifap)->ifa_next) {
 685                                        if (*ifap == ifa) {
 686                                                inet_del_ifa(ifa->ifa_dev,
 687                                                             ifap, 1);
 688                                                break;
 689                                        }
 690                                }
 691                        } else if (ifa->ifa_preferred_lft !=
 692                                   INFINITY_LIFE_TIME &&
 693                                   age >= ifa->ifa_preferred_lft &&
 694                                   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
 695                                ifa->ifa_flags |= IFA_F_DEPRECATED;
 696                                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 697                        }
 698                }
 699                rtnl_unlock();
 700        }
 701
 702        next_sec = round_jiffies_up(next);
 703        next_sched = next;
 704
 705        /* If rounded timeout is accurate enough, accept it. */
 706        if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
 707                next_sched = next_sec;
 708
 709        now = jiffies;
 710        /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
 711        if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
 712                next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
 713
 714        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
 715                        next_sched - now);
 716}
 717
 718static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
 719                             __u32 prefered_lft)
 720{
 721        unsigned long timeout;
 722
 723        ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
 724
 725        timeout = addrconf_timeout_fixup(valid_lft, HZ);
 726        if (addrconf_finite_timeout(timeout))
 727                ifa->ifa_valid_lft = timeout;
 728        else
 729                ifa->ifa_flags |= IFA_F_PERMANENT;
 730
 731        timeout = addrconf_timeout_fixup(prefered_lft, HZ);
 732        if (addrconf_finite_timeout(timeout)) {
 733                if (timeout == 0)
 734                        ifa->ifa_flags |= IFA_F_DEPRECATED;
 735                ifa->ifa_preferred_lft = timeout;
 736        }
 737        ifa->ifa_tstamp = jiffies;
 738        if (!ifa->ifa_cstamp)
 739                ifa->ifa_cstamp = ifa->ifa_tstamp;
 740}
 741
 742static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 743                                       __u32 *pvalid_lft, __u32 *pprefered_lft)
 744{
 745        struct nlattr *tb[IFA_MAX+1];
 746        struct in_ifaddr *ifa;
 747        struct ifaddrmsg *ifm;
 748        struct net_device *dev;
 749        struct in_device *in_dev;
 750        int err;
 751
 752        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 753        if (err < 0)
 754                goto errout;
 755
 756        ifm = nlmsg_data(nlh);
 757        err = -EINVAL;
 758        if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
 759                goto errout;
 760
 761        dev = __dev_get_by_index(net, ifm->ifa_index);
 762        err = -ENODEV;
 763        if (!dev)
 764                goto errout;
 765
 766        in_dev = __in_dev_get_rtnl(dev);
 767        err = -ENOBUFS;
 768        if (!in_dev)
 769                goto errout;
 770
 771        ifa = inet_alloc_ifa();
 772        if (!ifa)
 773                /*
 774                 * A potential indev allocation can be left alive, it stays
 775                 * assigned to its device and is destroy with it.
 776                 */
 777                goto errout;
 778
 779        ipv4_devconf_setall(in_dev);
 780        neigh_parms_data_state_setall(in_dev->arp_parms);
 781        in_dev_hold(in_dev);
 782
 783        if (!tb[IFA_ADDRESS])
 784                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 785
 786        INIT_HLIST_NODE(&ifa->hash);
 787        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 788        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 789        ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
 790                                         ifm->ifa_flags;
 791        ifa->ifa_scope = ifm->ifa_scope;
 792        ifa->ifa_dev = in_dev;
 793
 794        ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
 795        ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
 796
 797        if (tb[IFA_BROADCAST])
 798                ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
 799
 800        if (tb[IFA_LABEL])
 801                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 802        else
 803                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 804
 805        if (tb[IFA_CACHEINFO]) {
 806                struct ifa_cacheinfo *ci;
 807
 808                ci = nla_data(tb[IFA_CACHEINFO]);
 809                if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 810                        err = -EINVAL;
 811                        goto errout_free;
 812                }
 813                *pvalid_lft = ci->ifa_valid;
 814                *pprefered_lft = ci->ifa_prefered;
 815        }
 816
 817        return ifa;
 818
 819errout_free:
 820        inet_free_ifa(ifa);
 821errout:
 822        return ERR_PTR(err);
 823}
 824
 825static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 826{
 827        struct in_device *in_dev = ifa->ifa_dev;
 828        struct in_ifaddr *ifa1, **ifap;
 829
 830        if (!ifa->ifa_local)
 831                return NULL;
 832
 833        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 834             ifap = &ifa1->ifa_next) {
 835                if (ifa1->ifa_mask == ifa->ifa_mask &&
 836                    inet_ifa_match(ifa1->ifa_address, ifa) &&
 837                    ifa1->ifa_local == ifa->ifa_local)
 838                        return ifa1;
 839        }
 840        return NULL;
 841}
 842
 843static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 844{
 845        struct net *net = sock_net(skb->sk);
 846        struct in_ifaddr *ifa;
 847        struct in_ifaddr *ifa_existing;
 848        __u32 valid_lft = INFINITY_LIFE_TIME;
 849        __u32 prefered_lft = INFINITY_LIFE_TIME;
 850
 851        ASSERT_RTNL();
 852
 853        ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
 854        if (IS_ERR(ifa))
 855                return PTR_ERR(ifa);
 856
 857        ifa_existing = find_matching_ifa(ifa);
 858        if (!ifa_existing) {
 859                /* It would be best to check for !NLM_F_CREATE here but
 860                 * userspace already relies on not having to provide this.
 861                 */
 862                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 863                if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
 864                        int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
 865                                               true, ifa);
 866
 867                        if (ret < 0) {
 868                                inet_free_ifa(ifa);
 869                                return ret;
 870                        }
 871                }
 872                return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 873        } else {
 874                inet_free_ifa(ifa);
 875
 876                if (nlh->nlmsg_flags & NLM_F_EXCL ||
 877                    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 878                        return -EEXIST;
 879                ifa = ifa_existing;
 880                set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 881                cancel_delayed_work(&check_lifetime_work);
 882                queue_delayed_work(system_power_efficient_wq,
 883                                &check_lifetime_work, 0);
 884                rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
 885        }
 886        return 0;
 887}
 888
 889/*
 890 *      Determine a default network mask, based on the IP address.
 891 */
 892
 893static int inet_abc_len(__be32 addr)
 894{
 895        int rc = -1;    /* Something else, probably a multicast. */
 896
 897        if (ipv4_is_zeronet(addr))
 898                rc = 0;
 899        else {
 900                __u32 haddr = ntohl(addr);
 901
 902                if (IN_CLASSA(haddr))
 903                        rc = 8;
 904                else if (IN_CLASSB(haddr))
 905                        rc = 16;
 906                else if (IN_CLASSC(haddr))
 907                        rc = 24;
 908        }
 909
 910        return rc;
 911}
 912
 913
 914int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 915{
 916        struct ifreq ifr;
 917        struct sockaddr_in sin_orig;
 918        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 919        struct in_device *in_dev;
 920        struct in_ifaddr **ifap = NULL;
 921        struct in_ifaddr *ifa = NULL;
 922        struct net_device *dev;
 923        char *colon;
 924        int ret = -EFAULT;
 925        int tryaddrmatch = 0;
 926
 927        /*
 928         *      Fetch the caller's info block into kernel space
 929         */
 930
 931        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 932                goto out;
 933        ifr.ifr_name[IFNAMSIZ - 1] = 0;
 934
 935        /* save original address for comparison */
 936        memcpy(&sin_orig, sin, sizeof(*sin));
 937
 938        colon = strchr(ifr.ifr_name, ':');
 939        if (colon)
 940                *colon = 0;
 941
 942        dev_load(net, ifr.ifr_name);
 943
 944        switch (cmd) {
 945        case SIOCGIFADDR:       /* Get interface address */
 946        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 947        case SIOCGIFDSTADDR:    /* Get the destination address */
 948        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 949                /* Note that these ioctls will not sleep,
 950                   so that we do not impose a lock.
 951                   One day we will be forced to put shlock here (I mean SMP)
 952                 */
 953                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 954                memset(sin, 0, sizeof(*sin));
 955                sin->sin_family = AF_INET;
 956                break;
 957
 958        case SIOCSIFFLAGS:
 959                ret = -EPERM;
 960                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 961                        goto out;
 962                break;
 963        case SIOCSIFADDR:       /* Set interface address (and family) */
 964        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 965        case SIOCSIFDSTADDR:    /* Set the destination address */
 966        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 967                ret = -EPERM;
 968                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 969                        goto out;
 970                ret = -EINVAL;
 971                if (sin->sin_family != AF_INET)
 972                        goto out;
 973                break;
 974        default:
 975                ret = -EINVAL;
 976                goto out;
 977        }
 978
 979        rtnl_lock();
 980
 981        ret = -ENODEV;
 982        dev = __dev_get_by_name(net, ifr.ifr_name);
 983        if (!dev)
 984                goto done;
 985
 986        if (colon)
 987                *colon = ':';
 988
 989        in_dev = __in_dev_get_rtnl(dev);
 990        if (in_dev) {
 991                if (tryaddrmatch) {
 992                        /* Matthias Andree */
 993                        /* compare label and address (4.4BSD style) */
 994                        /* note: we only do this for a limited set of ioctls
 995                           and only if the original address family was AF_INET.
 996                           This is checked above. */
 997                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 998                             ifap = &ifa->ifa_next) {
 999                                if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1000                                    sin_orig.sin_addr.s_addr ==
1001                                                        ifa->ifa_local) {
1002                                        break; /* found */
1003                                }
1004                        }
1005                }
1006                /* we didn't get a match, maybe the application is
1007                   4.3BSD-style and passed in junk so we fall back to
1008                   comparing just the label */
1009                if (!ifa) {
1010                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1011                             ifap = &ifa->ifa_next)
1012                                if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1013                                        break;
1014                }
1015        }
1016
1017        ret = -EADDRNOTAVAIL;
1018        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1019                goto done;
1020
1021        switch (cmd) {
1022        case SIOCGIFADDR:       /* Get interface address */
1023                sin->sin_addr.s_addr = ifa->ifa_local;
1024                goto rarok;
1025
1026        case SIOCGIFBRDADDR:    /* Get the broadcast address */
1027                sin->sin_addr.s_addr = ifa->ifa_broadcast;
1028                goto rarok;
1029
1030        case SIOCGIFDSTADDR:    /* Get the destination address */
1031                sin->sin_addr.s_addr = ifa->ifa_address;
1032                goto rarok;
1033
1034        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1035                sin->sin_addr.s_addr = ifa->ifa_mask;
1036                goto rarok;
1037
1038        case SIOCSIFFLAGS:
1039                if (colon) {
1040                        ret = -EADDRNOTAVAIL;
1041                        if (!ifa)
1042                                break;
1043                        ret = 0;
1044                        if (!(ifr.ifr_flags & IFF_UP))
1045                                inet_del_ifa(in_dev, ifap, 1);
1046                        break;
1047                }
1048                ret = dev_change_flags(dev, ifr.ifr_flags);
1049                break;
1050
1051        case SIOCSIFADDR:       /* Set interface address (and family) */
1052                ret = -EINVAL;
1053                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1054                        break;
1055
1056                if (!ifa) {
1057                        ret = -ENOBUFS;
1058                        ifa = inet_alloc_ifa();
1059                        if (!ifa)
1060                                break;
1061                        INIT_HLIST_NODE(&ifa->hash);
1062                        if (colon)
1063                                memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1064                        else
1065                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1066                } else {
1067                        ret = 0;
1068                        if (ifa->ifa_local == sin->sin_addr.s_addr)
1069                                break;
1070                        inet_del_ifa(in_dev, ifap, 0);
1071                        ifa->ifa_broadcast = 0;
1072                        ifa->ifa_scope = 0;
1073                }
1074
1075                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1076
1077                if (!(dev->flags & IFF_POINTOPOINT)) {
1078                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1079                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1080                        if ((dev->flags & IFF_BROADCAST) &&
1081                            ifa->ifa_prefixlen < 31)
1082                                ifa->ifa_broadcast = ifa->ifa_address |
1083                                                     ~ifa->ifa_mask;
1084                } else {
1085                        ifa->ifa_prefixlen = 32;
1086                        ifa->ifa_mask = inet_make_mask(32);
1087                }
1088                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1089                ret = inet_set_ifa(dev, ifa);
1090                break;
1091
1092        case SIOCSIFBRDADDR:    /* Set the broadcast address */
1093                ret = 0;
1094                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1095                        inet_del_ifa(in_dev, ifap, 0);
1096                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
1097                        inet_insert_ifa(ifa);
1098                }
1099                break;
1100
1101        case SIOCSIFDSTADDR:    /* Set the destination address */
1102                ret = 0;
1103                if (ifa->ifa_address == sin->sin_addr.s_addr)
1104                        break;
1105                ret = -EINVAL;
1106                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1107                        break;
1108                ret = 0;
1109                inet_del_ifa(in_dev, ifap, 0);
1110                ifa->ifa_address = sin->sin_addr.s_addr;
1111                inet_insert_ifa(ifa);
1112                break;
1113
1114        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1115
1116                /*
1117                 *      The mask we set must be legal.
1118                 */
1119                ret = -EINVAL;
1120                if (bad_mask(sin->sin_addr.s_addr, 0))
1121                        break;
1122                ret = 0;
1123                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1124                        __be32 old_mask = ifa->ifa_mask;
1125                        inet_del_ifa(in_dev, ifap, 0);
1126                        ifa->ifa_mask = sin->sin_addr.s_addr;
1127                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1128
1129                        /* See if current broadcast address matches
1130                         * with current netmask, then recalculate
1131                         * the broadcast address. Otherwise it's a
1132                         * funny address, so don't touch it since
1133                         * the user seems to know what (s)he's doing...
1134                         */
1135                        if ((dev->flags & IFF_BROADCAST) &&
1136                            (ifa->ifa_prefixlen < 31) &&
1137                            (ifa->ifa_broadcast ==
1138                             (ifa->ifa_local|~old_mask))) {
1139                                ifa->ifa_broadcast = (ifa->ifa_local |
1140                                                      ~sin->sin_addr.s_addr);
1141                        }
1142                        inet_insert_ifa(ifa);
1143                }
1144                break;
1145        }
1146done:
1147        rtnl_unlock();
1148out:
1149        return ret;
1150rarok:
1151        rtnl_unlock();
1152        ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1153        goto out;
1154}
1155
1156static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1157{
1158        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1159        struct in_ifaddr *ifa;
1160        struct ifreq ifr;
1161        int done = 0;
1162
1163        if (!in_dev)
1164                goto out;
1165
1166        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1167                if (!buf) {
1168                        done += sizeof(ifr);
1169                        continue;
1170                }
1171                if (len < (int) sizeof(ifr))
1172                        break;
1173                memset(&ifr, 0, sizeof(struct ifreq));
1174                strcpy(ifr.ifr_name, ifa->ifa_label);
1175
1176                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1177                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1178                                                                ifa->ifa_local;
1179
1180                if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1181                        done = -EFAULT;
1182                        break;
1183                }
1184                buf  += sizeof(struct ifreq);
1185                len  -= sizeof(struct ifreq);
1186                done += sizeof(struct ifreq);
1187        }
1188out:
1189        return done;
1190}
1191
1192__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1193{
1194        __be32 addr = 0;
1195        struct in_device *in_dev;
1196        struct net *net = dev_net(dev);
1197
1198        rcu_read_lock();
1199        in_dev = __in_dev_get_rcu(dev);
1200        if (!in_dev)
1201                goto no_in_dev;
1202
1203        for_primary_ifa(in_dev) {
1204                if (ifa->ifa_scope > scope)
1205                        continue;
1206                if (!dst || inet_ifa_match(dst, ifa)) {
1207                        addr = ifa->ifa_local;
1208                        break;
1209                }
1210                if (!addr)
1211                        addr = ifa->ifa_local;
1212        } endfor_ifa(in_dev);
1213
1214        if (addr)
1215                goto out_unlock;
1216no_in_dev:
1217
1218        /* Not loopback addresses on loopback should be preferred
1219           in this case. It is important that lo is the first interface
1220           in dev_base list.
1221         */
1222        for_each_netdev_rcu(net, dev) {
1223                in_dev = __in_dev_get_rcu(dev);
1224                if (!in_dev)
1225                        continue;
1226
1227                for_primary_ifa(in_dev) {
1228                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
1229                            ifa->ifa_scope <= scope) {
1230                                addr = ifa->ifa_local;
1231                                goto out_unlock;
1232                        }
1233                } endfor_ifa(in_dev);
1234        }
1235out_unlock:
1236        rcu_read_unlock();
1237        return addr;
1238}
1239EXPORT_SYMBOL(inet_select_addr);
1240
1241static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1242                              __be32 local, int scope)
1243{
1244        int same = 0;
1245        __be32 addr = 0;
1246
1247        for_ifa(in_dev) {
1248                if (!addr &&
1249                    (local == ifa->ifa_local || !local) &&
1250                    ifa->ifa_scope <= scope) {
1251                        addr = ifa->ifa_local;
1252                        if (same)
1253                                break;
1254                }
1255                if (!same) {
1256                        same = (!local || inet_ifa_match(local, ifa)) &&
1257                                (!dst || inet_ifa_match(dst, ifa));
1258                        if (same && addr) {
1259                                if (local || !dst)
1260                                        break;
1261                                /* Is the selected addr into dst subnet? */
1262                                if (inet_ifa_match(addr, ifa))
1263                                        break;
1264                                /* No, then can we use new local src? */
1265                                if (ifa->ifa_scope <= scope) {
1266                                        addr = ifa->ifa_local;
1267                                        break;
1268                                }
1269                                /* search for large dst subnet for addr */
1270                                same = 0;
1271                        }
1272                }
1273        } endfor_ifa(in_dev);
1274
1275        return same ? addr : 0;
1276}
1277
1278/*
1279 * Confirm that local IP address exists using wildcards:
1280 * - net: netns to check, cannot be NULL
1281 * - in_dev: only on this interface, NULL=any interface
1282 * - dst: only in the same subnet as dst, 0=any dst
1283 * - local: address, 0=autoselect the local address
1284 * - scope: maximum allowed scope value for the local address
1285 */
1286__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1287                         __be32 dst, __be32 local, int scope)
1288{
1289        __be32 addr = 0;
1290        struct net_device *dev;
1291
1292        if (in_dev)
1293                return confirm_addr_indev(in_dev, dst, local, scope);
1294
1295        rcu_read_lock();
1296        for_each_netdev_rcu(net, dev) {
1297                in_dev = __in_dev_get_rcu(dev);
1298                if (in_dev) {
1299                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1300                        if (addr)
1301                                break;
1302                }
1303        }
1304        rcu_read_unlock();
1305
1306        return addr;
1307}
1308EXPORT_SYMBOL(inet_confirm_addr);
1309
1310/*
1311 *      Device notifier
1312 */
1313
1314int register_inetaddr_notifier(struct notifier_block *nb)
1315{
1316        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1317}
1318EXPORT_SYMBOL(register_inetaddr_notifier);
1319
1320int unregister_inetaddr_notifier(struct notifier_block *nb)
1321{
1322        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1323}
1324EXPORT_SYMBOL(unregister_inetaddr_notifier);
1325
1326/* Rename ifa_labels for a device name change. Make some effort to preserve
1327 * existing alias numbering and to create unique labels if possible.
1328*/
1329static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1330{
1331        struct in_ifaddr *ifa;
1332        int named = 0;
1333
1334        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1335                char old[IFNAMSIZ], *dot;
1336
1337                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1338                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1339                if (named++ == 0)
1340                        goto skip;
1341                dot = strchr(old, ':');
1342                if (!dot) {
1343                        sprintf(old, ":%d", named);
1344                        dot = old;
1345                }
1346                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1347                        strcat(ifa->ifa_label, dot);
1348                else
1349                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1350skip:
1351                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1352        }
1353}
1354
1355static bool inetdev_valid_mtu(unsigned int mtu)
1356{
1357        return mtu >= 68;
1358}
1359
1360static void inetdev_send_gratuitous_arp(struct net_device *dev,
1361                                        struct in_device *in_dev)
1362
1363{
1364        struct in_ifaddr *ifa;
1365
1366        for (ifa = in_dev->ifa_list; ifa;
1367             ifa = ifa->ifa_next) {
1368                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1369                         ifa->ifa_local, dev,
1370                         ifa->ifa_local, NULL,
1371                         dev->dev_addr, NULL);
1372        }
1373}
1374
1375/* Called only under RTNL semaphore */
1376
1377static int inetdev_event(struct notifier_block *this, unsigned long event,
1378                         void *ptr)
1379{
1380        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1381        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1382
1383        ASSERT_RTNL();
1384
1385        if (!in_dev) {
1386                if (event == NETDEV_REGISTER) {
1387                        in_dev = inetdev_init(dev);
1388                        if (IS_ERR(in_dev))
1389                                return notifier_from_errno(PTR_ERR(in_dev));
1390                        if (dev->flags & IFF_LOOPBACK) {
1391                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1392                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1393                        }
1394                } else if (event == NETDEV_CHANGEMTU) {
1395                        /* Re-enabling IP */
1396                        if (inetdev_valid_mtu(dev->mtu))
1397                                in_dev = inetdev_init(dev);
1398                }
1399                goto out;
1400        }
1401
1402        switch (event) {
1403        case NETDEV_REGISTER:
1404                pr_debug("%s: bug\n", __func__);
1405                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1406                break;
1407        case NETDEV_UP:
1408                if (!inetdev_valid_mtu(dev->mtu))
1409                        break;
1410                if (dev->flags & IFF_LOOPBACK) {
1411                        struct in_ifaddr *ifa = inet_alloc_ifa();
1412
1413                        if (ifa) {
1414                                INIT_HLIST_NODE(&ifa->hash);
1415                                ifa->ifa_local =
1416                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1417                                ifa->ifa_prefixlen = 8;
1418                                ifa->ifa_mask = inet_make_mask(8);
1419                                in_dev_hold(in_dev);
1420                                ifa->ifa_dev = in_dev;
1421                                ifa->ifa_scope = RT_SCOPE_HOST;
1422                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1423                                set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1424                                                 INFINITY_LIFE_TIME);
1425                                ipv4_devconf_setall(in_dev);
1426                                neigh_parms_data_state_setall(in_dev->arp_parms);
1427                                inet_insert_ifa(ifa);
1428                        }
1429                }
1430                ip_mc_up(in_dev);
1431                /* fall through */
1432        case NETDEV_CHANGEADDR:
1433                if (!IN_DEV_ARP_NOTIFY(in_dev))
1434                        break;
1435                /* fall through */
1436        case NETDEV_NOTIFY_PEERS:
1437                /* Send gratuitous ARP to notify of link change */
1438                inetdev_send_gratuitous_arp(dev, in_dev);
1439                break;
1440        case NETDEV_DOWN:
1441                ip_mc_down(in_dev);
1442                break;
1443        case NETDEV_PRE_TYPE_CHANGE:
1444                ip_mc_unmap(in_dev);
1445                break;
1446        case NETDEV_POST_TYPE_CHANGE:
1447                ip_mc_remap(in_dev);
1448                break;
1449        case NETDEV_CHANGEMTU:
1450                if (inetdev_valid_mtu(dev->mtu))
1451                        break;
1452                /* disable IP when MTU is not enough */
1453        case NETDEV_UNREGISTER:
1454                inetdev_destroy(in_dev);
1455                break;
1456        case NETDEV_CHANGENAME:
1457                /* Do not notify about label change, this event is
1458                 * not interesting to applications using netlink.
1459                 */
1460                inetdev_changename(dev, in_dev);
1461
1462                devinet_sysctl_unregister(in_dev);
1463                devinet_sysctl_register(in_dev);
1464                break;
1465        }
1466out:
1467        return NOTIFY_DONE;
1468}
1469
1470static struct notifier_block ip_netdev_notifier = {
1471        .notifier_call = inetdev_event,
1472};
1473
1474static size_t inet_nlmsg_size(void)
1475{
1476        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1477               + nla_total_size(4) /* IFA_ADDRESS */
1478               + nla_total_size(4) /* IFA_LOCAL */
1479               + nla_total_size(4) /* IFA_BROADCAST */
1480               + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1481               + nla_total_size(4)  /* IFA_FLAGS */
1482               + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1483}
1484
1485static inline u32 cstamp_delta(unsigned long cstamp)
1486{
1487        return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1488}
1489
1490static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1491                         unsigned long tstamp, u32 preferred, u32 valid)
1492{
1493        struct ifa_cacheinfo ci;
1494
1495        ci.cstamp = cstamp_delta(cstamp);
1496        ci.tstamp = cstamp_delta(tstamp);
1497        ci.ifa_prefered = preferred;
1498        ci.ifa_valid = valid;
1499
1500        return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1501}
1502
1503static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1504                            u32 portid, u32 seq, int event, unsigned int flags)
1505{
1506        struct ifaddrmsg *ifm;
1507        struct nlmsghdr  *nlh;
1508        u32 preferred, valid;
1509
1510        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1511        if (!nlh)
1512                return -EMSGSIZE;
1513
1514        ifm = nlmsg_data(nlh);
1515        ifm->ifa_family = AF_INET;
1516        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1517        ifm->ifa_flags = ifa->ifa_flags;
1518        ifm->ifa_scope = ifa->ifa_scope;
1519        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1520
1521        if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1522                preferred = ifa->ifa_preferred_lft;
1523                valid = ifa->ifa_valid_lft;
1524                if (preferred != INFINITY_LIFE_TIME) {
1525                        long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1526
1527                        if (preferred > tval)
1528                                preferred -= tval;
1529                        else
1530                                preferred = 0;
1531                        if (valid != INFINITY_LIFE_TIME) {
1532                                if (valid > tval)
1533                                        valid -= tval;
1534                                else
1535                                        valid = 0;
1536                        }
1537                }
1538        } else {
1539                preferred = INFINITY_LIFE_TIME;
1540                valid = INFINITY_LIFE_TIME;
1541        }
1542        if ((ifa->ifa_address &&
1543             nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1544            (ifa->ifa_local &&
1545             nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1546            (ifa->ifa_broadcast &&
1547             nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1548            (ifa->ifa_label[0] &&
1549             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1550            nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1551            put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1552                          preferred, valid))
1553                goto nla_put_failure;
1554
1555        nlmsg_end(skb, nlh);
1556        return 0;
1557
1558nla_put_failure:
1559        nlmsg_cancel(skb, nlh);
1560        return -EMSGSIZE;
1561}
1562
1563static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1564{
1565        struct net *net = sock_net(skb->sk);
1566        int h, s_h;
1567        int idx, s_idx;
1568        int ip_idx, s_ip_idx;
1569        struct net_device *dev;
1570        struct in_device *in_dev;
1571        struct in_ifaddr *ifa;
1572        struct hlist_head *head;
1573
1574        s_h = cb->args[0];
1575        s_idx = idx = cb->args[1];
1576        s_ip_idx = ip_idx = cb->args[2];
1577
1578        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1579                idx = 0;
1580                head = &net->dev_index_head[h];
1581                rcu_read_lock();
1582                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1583                          net->dev_base_seq;
1584                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1585                        if (idx < s_idx)
1586                                goto cont;
1587                        if (h > s_h || idx > s_idx)
1588                                s_ip_idx = 0;
1589                        in_dev = __in_dev_get_rcu(dev);
1590                        if (!in_dev)
1591                                goto cont;
1592
1593                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1594                             ifa = ifa->ifa_next, ip_idx++) {
1595                                if (ip_idx < s_ip_idx)
1596                                        continue;
1597                                if (inet_fill_ifaddr(skb, ifa,
1598                                             NETLINK_CB(cb->skb).portid,
1599                                             cb->nlh->nlmsg_seq,
1600                                             RTM_NEWADDR, NLM_F_MULTI) < 0) {
1601                                        rcu_read_unlock();
1602                                        goto done;
1603                                }
1604                                nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1605                        }
1606cont:
1607                        idx++;
1608                }
1609                rcu_read_unlock();
1610        }
1611
1612done:
1613        cb->args[0] = h;
1614        cb->args[1] = idx;
1615        cb->args[2] = ip_idx;
1616
1617        return skb->len;
1618}
1619
1620static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1621                      u32 portid)
1622{
1623        struct sk_buff *skb;
1624        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1625        int err = -ENOBUFS;
1626        struct net *net;
1627
1628        net = dev_net(ifa->ifa_dev->dev);
1629        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1630        if (!skb)
1631                goto errout;
1632
1633        err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1634        if (err < 0) {
1635                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1636                WARN_ON(err == -EMSGSIZE);
1637                kfree_skb(skb);
1638                goto errout;
1639        }
1640        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1641        return;
1642errout:
1643        if (err < 0)
1644                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1645}
1646
1647static size_t inet_get_link_af_size(const struct net_device *dev)
1648{
1649        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1650
1651        if (!in_dev)
1652                return 0;
1653
1654        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1655}
1656
1657static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1658{
1659        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1660        struct nlattr *nla;
1661        int i;
1662
1663        if (!in_dev)
1664                return -ENODATA;
1665
1666        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1667        if (!nla)
1668                return -EMSGSIZE;
1669
1670        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1671                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1672
1673        return 0;
1674}
1675
1676static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1677        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1678};
1679
1680static int inet_validate_link_af(const struct net_device *dev,
1681                                 const struct nlattr *nla)
1682{
1683        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1684        int err, rem;
1685
1686        if (dev && !__in_dev_get_rtnl(dev))
1687                return -EAFNOSUPPORT;
1688
1689        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1690        if (err < 0)
1691                return err;
1692
1693        if (tb[IFLA_INET_CONF]) {
1694                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1695                        int cfgid = nla_type(a);
1696
1697                        if (nla_len(a) < 4)
1698                                return -EINVAL;
1699
1700                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1701                                return -EINVAL;
1702                }
1703        }
1704
1705        return 0;
1706}
1707
1708static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1709{
1710        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1711        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1712        int rem;
1713
1714        if (!in_dev)
1715                return -EAFNOSUPPORT;
1716
1717        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1718                BUG();
1719
1720        if (tb[IFLA_INET_CONF]) {
1721                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1722                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1723        }
1724
1725        return 0;
1726}
1727
1728static int inet_netconf_msgsize_devconf(int type)
1729{
1730        int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1731                   + nla_total_size(4); /* NETCONFA_IFINDEX */
1732
1733        /* type -1 is used for ALL */
1734        if (type == -1 || type == NETCONFA_FORWARDING)
1735                size += nla_total_size(4);
1736        if (type == -1 || type == NETCONFA_RP_FILTER)
1737                size += nla_total_size(4);
1738        if (type == -1 || type == NETCONFA_MC_FORWARDING)
1739                size += nla_total_size(4);
1740        if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1741                size += nla_total_size(4);
1742        if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1743                size += nla_total_size(4);
1744
1745        return size;
1746}
1747
1748static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1749                                     struct ipv4_devconf *devconf, u32 portid,
1750                                     u32 seq, int event, unsigned int flags,
1751                                     int type)
1752{
1753        struct nlmsghdr  *nlh;
1754        struct netconfmsg *ncm;
1755
1756        nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1757                        flags);
1758        if (!nlh)
1759                return -EMSGSIZE;
1760
1761        ncm = nlmsg_data(nlh);
1762        ncm->ncm_family = AF_INET;
1763
1764        if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1765                goto nla_put_failure;
1766
1767        /* type -1 is used for ALL */
1768        if ((type == -1 || type == NETCONFA_FORWARDING) &&
1769            nla_put_s32(skb, NETCONFA_FORWARDING,
1770                        IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1771                goto nla_put_failure;
1772        if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1773            nla_put_s32(skb, NETCONFA_RP_FILTER,
1774                        IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1775                goto nla_put_failure;
1776        if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1777            nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1778                        IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1779                goto nla_put_failure;
1780        if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1781            nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1782                        IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1783                goto nla_put_failure;
1784        if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1785            nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1786                        IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1787                goto nla_put_failure;
1788
1789        nlmsg_end(skb, nlh);
1790        return 0;
1791
1792nla_put_failure:
1793        nlmsg_cancel(skb, nlh);
1794        return -EMSGSIZE;
1795}
1796
1797void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1798                                 struct ipv4_devconf *devconf)
1799{
1800        struct sk_buff *skb;
1801        int err = -ENOBUFS;
1802
1803        skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1804        if (!skb)
1805                goto errout;
1806
1807        err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1808                                        RTM_NEWNETCONF, 0, type);
1809        if (err < 0) {
1810                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1811                WARN_ON(err == -EMSGSIZE);
1812                kfree_skb(skb);
1813                goto errout;
1814        }
1815        rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1816        return;
1817errout:
1818        if (err < 0)
1819                rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1820}
1821
1822static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1823        [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1824        [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1825        [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1826        [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1827        [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1828};
1829
1830static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1831                                    struct nlmsghdr *nlh)
1832{
1833        struct net *net = sock_net(in_skb->sk);
1834        struct nlattr *tb[NETCONFA_MAX+1];
1835        struct netconfmsg *ncm;
1836        struct sk_buff *skb;
1837        struct ipv4_devconf *devconf;
1838        struct in_device *in_dev;
1839        struct net_device *dev;
1840        int ifindex;
1841        int err;
1842
1843        err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1844                          devconf_ipv4_policy);
1845        if (err < 0)
1846                goto errout;
1847
1848        err = EINVAL;
1849        if (!tb[NETCONFA_IFINDEX])
1850                goto errout;
1851
1852        ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1853        switch (ifindex) {
1854        case NETCONFA_IFINDEX_ALL:
1855                devconf = net->ipv4.devconf_all;
1856                break;
1857        case NETCONFA_IFINDEX_DEFAULT:
1858                devconf = net->ipv4.devconf_dflt;
1859                break;
1860        default:
1861                dev = __dev_get_by_index(net, ifindex);
1862                if (!dev)
1863                        goto errout;
1864                in_dev = __in_dev_get_rtnl(dev);
1865                if (!in_dev)
1866                        goto errout;
1867                devconf = &in_dev->cnf;
1868                break;
1869        }
1870
1871        err = -ENOBUFS;
1872        skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1873        if (!skb)
1874                goto errout;
1875
1876        err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1877                                        NETLINK_CB(in_skb).portid,
1878                                        nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1879                                        -1);
1880        if (err < 0) {
1881                /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1882                WARN_ON(err == -EMSGSIZE);
1883                kfree_skb(skb);
1884                goto errout;
1885        }
1886        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1887errout:
1888        return err;
1889}
1890
1891static int inet_netconf_dump_devconf(struct sk_buff *skb,
1892                                     struct netlink_callback *cb)
1893{
1894        struct net *net = sock_net(skb->sk);
1895        int h, s_h;
1896        int idx, s_idx;
1897        struct net_device *dev;
1898        struct in_device *in_dev;
1899        struct hlist_head *head;
1900
1901        s_h = cb->args[0];
1902        s_idx = idx = cb->args[1];
1903
1904        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1905                idx = 0;
1906                head = &net->dev_index_head[h];
1907                rcu_read_lock();
1908                cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1909                          net->dev_base_seq;
1910                hlist_for_each_entry_rcu(dev, head, index_hlist) {
1911                        if (idx < s_idx)
1912                                goto cont;
1913                        in_dev = __in_dev_get_rcu(dev);
1914                        if (!in_dev)
1915                                goto cont;
1916
1917                        if (inet_netconf_fill_devconf(skb, dev->ifindex,
1918                                                      &in_dev->cnf,
1919                                                      NETLINK_CB(cb->skb).portid,
1920                                                      cb->nlh->nlmsg_seq,
1921                                                      RTM_NEWNETCONF,
1922                                                      NLM_F_MULTI,
1923                                                      -1) < 0) {
1924                                rcu_read_unlock();
1925                                goto done;
1926                        }
1927                        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1928cont:
1929                        idx++;
1930                }
1931                rcu_read_unlock();
1932        }
1933        if (h == NETDEV_HASHENTRIES) {
1934                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1935                                              net->ipv4.devconf_all,
1936                                              NETLINK_CB(cb->skb).portid,
1937                                              cb->nlh->nlmsg_seq,
1938                                              RTM_NEWNETCONF, NLM_F_MULTI,
1939                                              -1) < 0)
1940                        goto done;
1941                else
1942                        h++;
1943        }
1944        if (h == NETDEV_HASHENTRIES + 1) {
1945                if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1946                                              net->ipv4.devconf_dflt,
1947                                              NETLINK_CB(cb->skb).portid,
1948                                              cb->nlh->nlmsg_seq,
1949                                              RTM_NEWNETCONF, NLM_F_MULTI,
1950                                              -1) < 0)
1951                        goto done;
1952                else
1953                        h++;
1954        }
1955done:
1956        cb->args[0] = h;
1957        cb->args[1] = idx;
1958
1959        return skb->len;
1960}
1961
1962#ifdef CONFIG_SYSCTL
1963
1964static void devinet_copy_dflt_conf(struct net *net, int i)
1965{
1966        struct net_device *dev;
1967
1968        rcu_read_lock();
1969        for_each_netdev_rcu(net, dev) {
1970                struct in_device *in_dev;
1971
1972                in_dev = __in_dev_get_rcu(dev);
1973                if (in_dev && !test_bit(i, in_dev->cnf.state))
1974                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1975        }
1976        rcu_read_unlock();
1977}
1978
1979/* called with RTNL locked */
1980static void inet_forward_change(struct net *net)
1981{
1982        struct net_device *dev;
1983        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1984
1985        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1986        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1987        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1988                                    NETCONFA_IFINDEX_ALL,
1989                                    net->ipv4.devconf_all);
1990        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1991                                    NETCONFA_IFINDEX_DEFAULT,
1992                                    net->ipv4.devconf_dflt);
1993
1994        for_each_netdev(net, dev) {
1995                struct in_device *in_dev;
1996                if (on)
1997                        dev_disable_lro(dev);
1998                rcu_read_lock();
1999                in_dev = __in_dev_get_rcu(dev);
2000                if (in_dev) {
2001                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2002                        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2003                                                    dev->ifindex, &in_dev->cnf);
2004                }
2005                rcu_read_unlock();
2006        }
2007}
2008
2009static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2010{
2011        if (cnf == net->ipv4.devconf_dflt)
2012                return NETCONFA_IFINDEX_DEFAULT;
2013        else if (cnf == net->ipv4.devconf_all)
2014                return NETCONFA_IFINDEX_ALL;
2015        else {
2016                struct in_device *idev
2017                        = container_of(cnf, struct in_device, cnf);
2018                return idev->dev->ifindex;
2019        }
2020}
2021
2022static int devinet_conf_proc(struct ctl_table *ctl, int write,
2023                             void __user *buffer,
2024                             size_t *lenp, loff_t *ppos)
2025{
2026        int old_value = *(int *)ctl->data;
2027        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2028        int new_value = *(int *)ctl->data;
2029
2030        if (write) {
2031                struct ipv4_devconf *cnf = ctl->extra1;
2032                struct net *net = ctl->extra2;
2033                int i = (int *)ctl->data - cnf->data;
2034                int ifindex;
2035
2036                set_bit(i, cnf->state);
2037
2038                if (cnf == net->ipv4.devconf_dflt)
2039                        devinet_copy_dflt_conf(net, i);
2040                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2041                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2042                        if ((new_value == 0) && (old_value != 0))
2043                                rt_cache_flush(net);
2044
2045                if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2046                    new_value != old_value) {
2047                        ifindex = devinet_conf_ifindex(net, cnf);
2048                        inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2049                                                    ifindex, cnf);
2050                }
2051                if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2052                    new_value != old_value) {
2053                        ifindex = devinet_conf_ifindex(net, cnf);
2054                        inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2055                                                    ifindex, cnf);
2056                }
2057                if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2058                    new_value != old_value) {
2059                        ifindex = devinet_conf_ifindex(net, cnf);
2060                        inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2061                                                    ifindex, cnf);
2062                }
2063        }
2064
2065        return ret;
2066}
2067
2068static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2069                                  void __user *buffer,
2070                                  size_t *lenp, loff_t *ppos)
2071{
2072        int *valp = ctl->data;
2073        int val = *valp;
2074        loff_t pos = *ppos;
2075        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2076
2077        if (write && *valp != val) {
2078                struct net *net = ctl->extra2;
2079
2080                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2081                        if (!rtnl_trylock()) {
2082                                /* Restore the original values before restarting */
2083                                *valp = val;
2084                                *ppos = pos;
2085                                return restart_syscall();
2086                        }
2087                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2088                                inet_forward_change(net);
2089                        } else {
2090                                struct ipv4_devconf *cnf = ctl->extra1;
2091                                struct in_device *idev =
2092                                        container_of(cnf, struct in_device, cnf);
2093                                if (*valp)
2094                                        dev_disable_lro(idev->dev);
2095                                inet_netconf_notify_devconf(net,
2096                                                            NETCONFA_FORWARDING,
2097                                                            idev->dev->ifindex,
2098                                                            cnf);
2099                        }
2100                        rtnl_unlock();
2101                        rt_cache_flush(net);
2102                } else
2103                        inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2104                                                    NETCONFA_IFINDEX_DEFAULT,
2105                                                    net->ipv4.devconf_dflt);
2106        }
2107
2108        return ret;
2109}
2110
2111static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2112                                void __user *buffer,
2113                                size_t *lenp, loff_t *ppos)
2114{
2115        int *valp = ctl->data;
2116        int val = *valp;
2117        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2118        struct net *net = ctl->extra2;
2119
2120        if (write && *valp != val)
2121                rt_cache_flush(net);
2122
2123        return ret;
2124}
2125
2126#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2127        { \
2128                .procname       = name, \
2129                .data           = ipv4_devconf.data + \
2130                                  IPV4_DEVCONF_ ## attr - 1, \
2131                .maxlen         = sizeof(int), \
2132                .mode           = mval, \
2133                .proc_handler   = proc, \
2134                .extra1         = &ipv4_devconf, \
2135        }
2136
2137#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2138        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2139
2140#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2141        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2142
2143#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2144        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2145
2146#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2147        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2148
2149static struct devinet_sysctl_table {
2150        struct ctl_table_header *sysctl_header;
2151        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2152} devinet_sysctl = {
2153        .devinet_vars = {
2154                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2155                                             devinet_sysctl_forward),
2156                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2157
2158                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2159                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2160                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2161                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2162                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2163                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2164                                        "accept_source_route"),
2165                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2166                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2167                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2168                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2169                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2170                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2171                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2172                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2173                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2174                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2175                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2176                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2177                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2178                DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2179                                        "force_igmp_version"),
2180                DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2181                                        "igmpv2_unsolicited_report_interval"),
2182                DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2183                                        "igmpv3_unsolicited_report_interval"),
2184                DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2185                                        "ignore_routes_with_linkdown"),
2186
2187                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2188                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2189                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2190                                              "promote_secondaries"),
2191                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2192                                              "route_localnet"),
2193        },
2194};
2195
2196static int __devinet_sysctl_register(struct net *net, char *dev_name,
2197                                        struct ipv4_devconf *p)
2198{
2199        int i;
2200        struct devinet_sysctl_table *t;
2201        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2202
2203        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2204        if (!t)
2205                goto out;
2206
2207        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2208                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2209                t->devinet_vars[i].extra1 = p;
2210                t->devinet_vars[i].extra2 = net;
2211        }
2212
2213        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2214
2215        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2216        if (!t->sysctl_header)
2217                goto free;
2218
2219        p->sysctl = t;
2220        return 0;
2221
2222free:
2223        kfree(t);
2224out:
2225        return -ENOBUFS;
2226}
2227
2228static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2229{
2230        struct devinet_sysctl_table *t = cnf->sysctl;
2231
2232        if (!t)
2233                return;
2234
2235        cnf->sysctl = NULL;
2236        unregister_net_sysctl_table(t->sysctl_header);
2237        kfree(t);
2238}
2239
2240static int devinet_sysctl_register(struct in_device *idev)
2241{
2242        int err;
2243
2244        if (!sysctl_dev_name_is_allowed(idev->dev->name))
2245                return -EINVAL;
2246
2247        err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2248        if (err)
2249                return err;
2250        err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2251                                        &idev->cnf);
2252        if (err)
2253                neigh_sysctl_unregister(idev->arp_parms);
2254        return err;
2255}
2256
2257static void devinet_sysctl_unregister(struct in_device *idev)
2258{
2259        __devinet_sysctl_unregister(&idev->cnf);
2260        neigh_sysctl_unregister(idev->arp_parms);
2261}
2262
2263static struct ctl_table ctl_forward_entry[] = {
2264        {
2265                .procname       = "ip_forward",
2266                .data           = &ipv4_devconf.data[
2267                                        IPV4_DEVCONF_FORWARDING - 1],
2268                .maxlen         = sizeof(int),
2269                .mode           = 0644,
2270                .proc_handler   = devinet_sysctl_forward,
2271                .extra1         = &ipv4_devconf,
2272                .extra2         = &init_net,
2273        },
2274        { },
2275};
2276#endif
2277
2278static __net_init int devinet_init_net(struct net *net)
2279{
2280        int err;
2281        struct ipv4_devconf *all, *dflt;
2282#ifdef CONFIG_SYSCTL
2283        struct ctl_table *tbl = ctl_forward_entry;
2284        struct ctl_table_header *forw_hdr;
2285#endif
2286
2287        err = -ENOMEM;
2288        all = &ipv4_devconf;
2289        dflt = &ipv4_devconf_dflt;
2290
2291        if (!net_eq(net, &init_net)) {
2292                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2293                if (!all)
2294                        goto err_alloc_all;
2295
2296                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2297                if (!dflt)
2298                        goto err_alloc_dflt;
2299
2300#ifdef CONFIG_SYSCTL
2301                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2302                if (!tbl)
2303                        goto err_alloc_ctl;
2304
2305                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2306                tbl[0].extra1 = all;
2307                tbl[0].extra2 = net;
2308#endif
2309        }
2310
2311#ifdef CONFIG_SYSCTL
2312        err = __devinet_sysctl_register(net, "all", all);
2313        if (err < 0)
2314                goto err_reg_all;
2315
2316        err = __devinet_sysctl_register(net, "default", dflt);
2317        if (err < 0)
2318                goto err_reg_dflt;
2319
2320        err = -ENOMEM;
2321        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2322        if (!forw_hdr)
2323                goto err_reg_ctl;
2324        net->ipv4.forw_hdr = forw_hdr;
2325#endif
2326
2327        net->ipv4.devconf_all = all;
2328        net->ipv4.devconf_dflt = dflt;
2329        return 0;
2330
2331#ifdef CONFIG_SYSCTL
2332err_reg_ctl:
2333        __devinet_sysctl_unregister(dflt);
2334err_reg_dflt:
2335        __devinet_sysctl_unregister(all);
2336err_reg_all:
2337        if (tbl != ctl_forward_entry)
2338                kfree(tbl);
2339err_alloc_ctl:
2340#endif
2341        if (dflt != &ipv4_devconf_dflt)
2342                kfree(dflt);
2343err_alloc_dflt:
2344        if (all != &ipv4_devconf)
2345                kfree(all);
2346err_alloc_all:
2347        return err;
2348}
2349
2350static __net_exit void devinet_exit_net(struct net *net)
2351{
2352#ifdef CONFIG_SYSCTL
2353        struct ctl_table *tbl;
2354
2355        tbl = net->ipv4.forw_hdr->ctl_table_arg;
2356        unregister_net_sysctl_table(net->ipv4.forw_hdr);
2357        __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2358        __devinet_sysctl_unregister(net->ipv4.devconf_all);
2359        kfree(tbl);
2360#endif
2361        kfree(net->ipv4.devconf_dflt);
2362        kfree(net->ipv4.devconf_all);
2363}
2364
2365static __net_initdata struct pernet_operations devinet_ops = {
2366        .init = devinet_init_net,
2367        .exit = devinet_exit_net,
2368};
2369
2370static struct rtnl_af_ops inet_af_ops __read_mostly = {
2371        .family           = AF_INET,
2372        .fill_link_af     = inet_fill_link_af,
2373        .get_link_af_size = inet_get_link_af_size,
2374        .validate_link_af = inet_validate_link_af,
2375        .set_link_af      = inet_set_link_af,
2376};
2377
2378void __init devinet_init(void)
2379{
2380        int i;
2381
2382        for (i = 0; i < IN4_ADDR_HSIZE; i++)
2383                INIT_HLIST_HEAD(&inet_addr_lst[i]);
2384
2385        register_pernet_subsys(&devinet_ops);
2386
2387        register_gifconf(PF_INET, inet_gifconf);
2388        register_netdevice_notifier(&ip_netdev_notifier);
2389
2390        queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2391
2392        rtnl_af_register(&inet_af_ops);
2393
2394        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2395        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2396        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2397        rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2398                      inet_netconf_dump_devconf, NULL);
2399}
2400
2401