linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <asm/system.h>
  31#include <linux/bitops.h>
  32#include <linux/capability.h>
  33#include <linux/module.h>
  34#include <linux/types.h>
  35#include <linux/kernel.h>
  36#include <linux/string.h>
  37#include <linux/mm.h>
  38#include <linux/socket.h>
  39#include <linux/sockios.h>
  40#include <linux/in.h>
  41#include <linux/errno.h>
  42#include <linux/interrupt.h>
  43#include <linux/if_addr.h>
  44#include <linux/if_ether.h>
  45#include <linux/inet.h>
  46#include <linux/netdevice.h>
  47#include <linux/etherdevice.h>
  48#include <linux/skbuff.h>
  49#include <linux/init.h>
  50#include <linux/notifier.h>
  51#include <linux/inetdevice.h>
  52#include <linux/igmp.h>
  53#include <linux/slab.h>
  54#ifdef CONFIG_SYSCTL
  55#include <linux/sysctl.h>
  56#endif
  57#include <linux/kmod.h>
  58
  59#include <net/arp.h>
  60#include <net/ip.h>
  61#include <net/route.h>
  62#include <net/ip_fib.h>
  63#include <net/rtnetlink.h>
  64#include <net/net_namespace.h>
  65
  66static struct ipv4_devconf ipv4_devconf = {
  67        .data = {
  68                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  69                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  70                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  71                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  72        },
  73};
  74
  75static struct ipv4_devconf ipv4_devconf_dflt = {
  76        .data = {
  77                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  78                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  79                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  80                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  81                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  82        },
  83};
  84
  85#define IPV4_DEVCONF_DFLT(net, attr) \
  86        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  87
  88static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  89        [IFA_LOCAL]             = { .type = NLA_U32 },
  90        [IFA_ADDRESS]           = { .type = NLA_U32 },
  91        [IFA_BROADCAST]         = { .type = NLA_U32 },
  92        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
  93};
  94
  95static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
  96
  97static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
  98static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
  99                         int destroy);
 100#ifdef CONFIG_SYSCTL
 101static void devinet_sysctl_register(struct in_device *idev);
 102static void devinet_sysctl_unregister(struct in_device *idev);
 103#else
 104static inline void devinet_sysctl_register(struct in_device *idev)
 105{
 106}
 107static inline void devinet_sysctl_unregister(struct in_device *idev)
 108{
 109}
 110#endif
 111
 112/* Locks all the inet devices. */
 113
 114static struct in_ifaddr *inet_alloc_ifa(void)
 115{
 116        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 117}
 118
 119static void inet_rcu_free_ifa(struct rcu_head *head)
 120{
 121        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 122        if (ifa->ifa_dev)
 123                in_dev_put(ifa->ifa_dev);
 124        kfree(ifa);
 125}
 126
 127static inline void inet_free_ifa(struct in_ifaddr *ifa)
 128{
 129        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 130}
 131
 132void in_dev_finish_destroy(struct in_device *idev)
 133{
 134        struct net_device *dev = idev->dev;
 135
 136        WARN_ON(idev->ifa_list);
 137        WARN_ON(idev->mc_list);
 138#ifdef NET_REFCNT_DEBUG
 139        printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
 140               idev, dev ? dev->name : "NIL");
 141#endif
 142        dev_put(dev);
 143        if (!idev->dead)
 144                pr_err("Freeing alive in_device %p\n", idev);
 145        else
 146                kfree(idev);
 147}
 148EXPORT_SYMBOL(in_dev_finish_destroy);
 149
 150static struct in_device *inetdev_init(struct net_device *dev)
 151{
 152        struct in_device *in_dev;
 153
 154        ASSERT_RTNL();
 155
 156        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 157        if (!in_dev)
 158                goto out;
 159        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 160                        sizeof(in_dev->cnf));
 161        in_dev->cnf.sysctl = NULL;
 162        in_dev->dev = dev;
 163        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 164        if (!in_dev->arp_parms)
 165                goto out_kfree;
 166        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 167                dev_disable_lro(dev);
 168        /* Reference in_dev->dev */
 169        dev_hold(dev);
 170        /* Account for reference dev->ip_ptr (below) */
 171        in_dev_hold(in_dev);
 172
 173        devinet_sysctl_register(in_dev);
 174        ip_mc_init_dev(in_dev);
 175        if (dev->flags & IFF_UP)
 176                ip_mc_up(in_dev);
 177
 178        /* we can receive as soon as ip_ptr is set -- do this last */
 179        rcu_assign_pointer(dev->ip_ptr, in_dev);
 180out:
 181        return in_dev;
 182out_kfree:
 183        kfree(in_dev);
 184        in_dev = NULL;
 185        goto out;
 186}
 187
 188static void in_dev_rcu_put(struct rcu_head *head)
 189{
 190        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 191        in_dev_put(idev);
 192}
 193
 194static void inetdev_destroy(struct in_device *in_dev)
 195{
 196        struct in_ifaddr *ifa;
 197        struct net_device *dev;
 198
 199        ASSERT_RTNL();
 200
 201        dev = in_dev->dev;
 202
 203        in_dev->dead = 1;
 204
 205        ip_mc_destroy_dev(in_dev);
 206
 207        while ((ifa = in_dev->ifa_list) != NULL) {
 208                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 209                inet_free_ifa(ifa);
 210        }
 211
 212        rcu_assign_pointer(dev->ip_ptr, NULL);
 213
 214        devinet_sysctl_unregister(in_dev);
 215        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 216        arp_ifdown(dev);
 217
 218        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 219}
 220
 221int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 222{
 223        rcu_read_lock();
 224        for_primary_ifa(in_dev) {
 225                if (inet_ifa_match(a, ifa)) {
 226                        if (!b || inet_ifa_match(b, ifa)) {
 227                                rcu_read_unlock();
 228                                return 1;
 229                        }
 230                }
 231        } endfor_ifa(in_dev);
 232        rcu_read_unlock();
 233        return 0;
 234}
 235
 236static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 237                         int destroy, struct nlmsghdr *nlh, u32 pid)
 238{
 239        struct in_ifaddr *promote = NULL;
 240        struct in_ifaddr *ifa, *ifa1 = *ifap;
 241        struct in_ifaddr *last_prim = in_dev->ifa_list;
 242        struct in_ifaddr *prev_prom = NULL;
 243        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 244
 245        ASSERT_RTNL();
 246
 247        /* 1. Deleting primary ifaddr forces deletion all secondaries
 248         * unless alias promotion is set
 249         **/
 250
 251        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 252                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 253
 254                while ((ifa = *ifap1) != NULL) {
 255                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 256                            ifa1->ifa_scope <= ifa->ifa_scope)
 257                                last_prim = ifa;
 258
 259                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 260                            ifa1->ifa_mask != ifa->ifa_mask ||
 261                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 262                                ifap1 = &ifa->ifa_next;
 263                                prev_prom = ifa;
 264                                continue;
 265                        }
 266
 267                        if (!do_promote) {
 268                                *ifap1 = ifa->ifa_next;
 269
 270                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 271                                blocking_notifier_call_chain(&inetaddr_chain,
 272                                                NETDEV_DOWN, ifa);
 273                                inet_free_ifa(ifa);
 274                        } else {
 275                                promote = ifa;
 276                                break;
 277                        }
 278                }
 279        }
 280
 281        /* 2. Unlink it */
 282
 283        *ifap = ifa1->ifa_next;
 284
 285        /* 3. Announce address deletion */
 286
 287        /* Send message first, then call notifier.
 288           At first sight, FIB update triggered by notifier
 289           will refer to already deleted ifaddr, that could confuse
 290           netlink listeners. It is not true: look, gated sees
 291           that route deleted and if it still thinks that ifaddr
 292           is valid, it will try to restore deleted routes... Grr.
 293           So that, this order is correct.
 294         */
 295        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 296        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 297
 298        if (promote) {
 299
 300                if (prev_prom) {
 301                        prev_prom->ifa_next = promote->ifa_next;
 302                        promote->ifa_next = last_prim->ifa_next;
 303                        last_prim->ifa_next = promote;
 304                }
 305
 306                promote->ifa_flags &= ~IFA_F_SECONDARY;
 307                rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 308                blocking_notifier_call_chain(&inetaddr_chain,
 309                                NETDEV_UP, promote);
 310                for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
 311                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 312                            !inet_ifa_match(ifa1->ifa_address, ifa))
 313                                        continue;
 314                        fib_add_ifaddr(ifa);
 315                }
 316
 317        }
 318        if (destroy)
 319                inet_free_ifa(ifa1);
 320}
 321
 322static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 323                         int destroy)
 324{
 325        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 326}
 327
 328static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 329                             u32 pid)
 330{
 331        struct in_device *in_dev = ifa->ifa_dev;
 332        struct in_ifaddr *ifa1, **ifap, **last_primary;
 333
 334        ASSERT_RTNL();
 335
 336        if (!ifa->ifa_local) {
 337                inet_free_ifa(ifa);
 338                return 0;
 339        }
 340
 341        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 342        last_primary = &in_dev->ifa_list;
 343
 344        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 345             ifap = &ifa1->ifa_next) {
 346                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 347                    ifa->ifa_scope <= ifa1->ifa_scope)
 348                        last_primary = &ifa1->ifa_next;
 349                if (ifa1->ifa_mask == ifa->ifa_mask &&
 350                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 351                        if (ifa1->ifa_local == ifa->ifa_local) {
 352                                inet_free_ifa(ifa);
 353                                return -EEXIST;
 354                        }
 355                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 356                                inet_free_ifa(ifa);
 357                                return -EINVAL;
 358                        }
 359                        ifa->ifa_flags |= IFA_F_SECONDARY;
 360                }
 361        }
 362
 363        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 364                net_srandom(ifa->ifa_local);
 365                ifap = last_primary;
 366        }
 367
 368        ifa->ifa_next = *ifap;
 369        *ifap = ifa;
 370
 371        /* Send message first, then call notifier.
 372           Notifier will trigger FIB update, so that
 373           listeners of netlink will know about new ifaddr */
 374        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 375        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 376
 377        return 0;
 378}
 379
 380static int inet_insert_ifa(struct in_ifaddr *ifa)
 381{
 382        return __inet_insert_ifa(ifa, NULL, 0);
 383}
 384
 385static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 386{
 387        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 388
 389        ASSERT_RTNL();
 390
 391        if (!in_dev) {
 392                inet_free_ifa(ifa);
 393                return -ENOBUFS;
 394        }
 395        ipv4_devconf_setall(in_dev);
 396        if (ifa->ifa_dev != in_dev) {
 397                WARN_ON(ifa->ifa_dev);
 398                in_dev_hold(in_dev);
 399                ifa->ifa_dev = in_dev;
 400        }
 401        if (ipv4_is_loopback(ifa->ifa_local))
 402                ifa->ifa_scope = RT_SCOPE_HOST;
 403        return inet_insert_ifa(ifa);
 404}
 405
 406/* Caller must hold RCU or RTNL :
 407 * We dont take a reference on found in_device
 408 */
 409struct in_device *inetdev_by_index(struct net *net, int ifindex)
 410{
 411        struct net_device *dev;
 412        struct in_device *in_dev = NULL;
 413
 414        rcu_read_lock();
 415        dev = dev_get_by_index_rcu(net, ifindex);
 416        if (dev)
 417                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 418        rcu_read_unlock();
 419        return in_dev;
 420}
 421EXPORT_SYMBOL(inetdev_by_index);
 422
 423/* Called only from RTNL semaphored context. No locks. */
 424
 425struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 426                                    __be32 mask)
 427{
 428        ASSERT_RTNL();
 429
 430        for_primary_ifa(in_dev) {
 431                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 432                        return ifa;
 433        } endfor_ifa(in_dev);
 434        return NULL;
 435}
 436
 437static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 438{
 439        struct net *net = sock_net(skb->sk);
 440        struct nlattr *tb[IFA_MAX+1];
 441        struct in_device *in_dev;
 442        struct ifaddrmsg *ifm;
 443        struct in_ifaddr *ifa, **ifap;
 444        int err = -EINVAL;
 445
 446        ASSERT_RTNL();
 447
 448        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 449        if (err < 0)
 450                goto errout;
 451
 452        ifm = nlmsg_data(nlh);
 453        in_dev = inetdev_by_index(net, ifm->ifa_index);
 454        if (in_dev == NULL) {
 455                err = -ENODEV;
 456                goto errout;
 457        }
 458
 459        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 460             ifap = &ifa->ifa_next) {
 461                if (tb[IFA_LOCAL] &&
 462                    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
 463                        continue;
 464
 465                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 466                        continue;
 467
 468                if (tb[IFA_ADDRESS] &&
 469                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 470                    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 471                        continue;
 472
 473                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 474                return 0;
 475        }
 476
 477        err = -EADDRNOTAVAIL;
 478errout:
 479        return err;
 480}
 481
 482static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 483{
 484        struct nlattr *tb[IFA_MAX+1];
 485        struct in_ifaddr *ifa;
 486        struct ifaddrmsg *ifm;
 487        struct net_device *dev;
 488        struct in_device *in_dev;
 489        int err;
 490
 491        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 492        if (err < 0)
 493                goto errout;
 494
 495        ifm = nlmsg_data(nlh);
 496        err = -EINVAL;
 497        if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
 498                goto errout;
 499
 500        dev = __dev_get_by_index(net, ifm->ifa_index);
 501        err = -ENODEV;
 502        if (dev == NULL)
 503                goto errout;
 504
 505        in_dev = __in_dev_get_rtnl(dev);
 506        err = -ENOBUFS;
 507        if (in_dev == NULL)
 508                goto errout;
 509
 510        ifa = inet_alloc_ifa();
 511        if (ifa == NULL)
 512                /*
 513                 * A potential indev allocation can be left alive, it stays
 514                 * assigned to its device and is destroy with it.
 515                 */
 516                goto errout;
 517
 518        ipv4_devconf_setall(in_dev);
 519        in_dev_hold(in_dev);
 520
 521        if (tb[IFA_ADDRESS] == NULL)
 522                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 523
 524        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 525        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 526        ifa->ifa_flags = ifm->ifa_flags;
 527        ifa->ifa_scope = ifm->ifa_scope;
 528        ifa->ifa_dev = in_dev;
 529
 530        ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
 531        ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
 532
 533        if (tb[IFA_BROADCAST])
 534                ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 535
 536        if (tb[IFA_LABEL])
 537                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 538        else
 539                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 540
 541        return ifa;
 542
 543errout:
 544        return ERR_PTR(err);
 545}
 546
 547static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 548{
 549        struct net *net = sock_net(skb->sk);
 550        struct in_ifaddr *ifa;
 551
 552        ASSERT_RTNL();
 553
 554        ifa = rtm_to_ifaddr(net, nlh);
 555        if (IS_ERR(ifa))
 556                return PTR_ERR(ifa);
 557
 558        return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 559}
 560
 561/*
 562 *      Determine a default network mask, based on the IP address.
 563 */
 564
 565static inline int inet_abc_len(__be32 addr)
 566{
 567        int rc = -1;    /* Something else, probably a multicast. */
 568
 569        if (ipv4_is_zeronet(addr))
 570                rc = 0;
 571        else {
 572                __u32 haddr = ntohl(addr);
 573
 574                if (IN_CLASSA(haddr))
 575                        rc = 8;
 576                else if (IN_CLASSB(haddr))
 577                        rc = 16;
 578                else if (IN_CLASSC(haddr))
 579                        rc = 24;
 580        }
 581
 582        return rc;
 583}
 584
 585
 586int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 587{
 588        struct ifreq ifr;
 589        struct sockaddr_in sin_orig;
 590        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 591        struct in_device *in_dev;
 592        struct in_ifaddr **ifap = NULL;
 593        struct in_ifaddr *ifa = NULL;
 594        struct net_device *dev;
 595        char *colon;
 596        int ret = -EFAULT;
 597        int tryaddrmatch = 0;
 598
 599        /*
 600         *      Fetch the caller's info block into kernel space
 601         */
 602
 603        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 604                goto out;
 605        ifr.ifr_name[IFNAMSIZ - 1] = 0;
 606
 607        /* save original address for comparison */
 608        memcpy(&sin_orig, sin, sizeof(*sin));
 609
 610        colon = strchr(ifr.ifr_name, ':');
 611        if (colon)
 612                *colon = 0;
 613
 614        dev_load(net, ifr.ifr_name);
 615
 616        switch (cmd) {
 617        case SIOCGIFADDR:       /* Get interface address */
 618        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 619        case SIOCGIFDSTADDR:    /* Get the destination address */
 620        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 621                /* Note that these ioctls will not sleep,
 622                   so that we do not impose a lock.
 623                   One day we will be forced to put shlock here (I mean SMP)
 624                 */
 625                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 626                memset(sin, 0, sizeof(*sin));
 627                sin->sin_family = AF_INET;
 628                break;
 629
 630        case SIOCSIFFLAGS:
 631                ret = -EACCES;
 632                if (!capable(CAP_NET_ADMIN))
 633                        goto out;
 634                break;
 635        case SIOCSIFADDR:       /* Set interface address (and family) */
 636        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 637        case SIOCSIFDSTADDR:    /* Set the destination address */
 638        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 639                ret = -EACCES;
 640                if (!capable(CAP_NET_ADMIN))
 641                        goto out;
 642                ret = -EINVAL;
 643                if (sin->sin_family != AF_INET)
 644                        goto out;
 645                break;
 646        default:
 647                ret = -EINVAL;
 648                goto out;
 649        }
 650
 651        rtnl_lock();
 652
 653        ret = -ENODEV;
 654        dev = __dev_get_by_name(net, ifr.ifr_name);
 655        if (!dev)
 656                goto done;
 657
 658        if (colon)
 659                *colon = ':';
 660
 661        in_dev = __in_dev_get_rtnl(dev);
 662        if (in_dev) {
 663                if (tryaddrmatch) {
 664                        /* Matthias Andree */
 665                        /* compare label and address (4.4BSD style) */
 666                        /* note: we only do this for a limited set of ioctls
 667                           and only if the original address family was AF_INET.
 668                           This is checked above. */
 669                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 670                             ifap = &ifa->ifa_next) {
 671                                if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 672                                    sin_orig.sin_addr.s_addr ==
 673                                                        ifa->ifa_local) {
 674                                        break; /* found */
 675                                }
 676                        }
 677                }
 678                /* we didn't get a match, maybe the application is
 679                   4.3BSD-style and passed in junk so we fall back to
 680                   comparing just the label */
 681                if (!ifa) {
 682                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 683                             ifap = &ifa->ifa_next)
 684                                if (!strcmp(ifr.ifr_name, ifa->ifa_label))
 685                                        break;
 686                }
 687        }
 688
 689        ret = -EADDRNOTAVAIL;
 690        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 691                goto done;
 692
 693        switch (cmd) {
 694        case SIOCGIFADDR:       /* Get interface address */
 695                sin->sin_addr.s_addr = ifa->ifa_local;
 696                goto rarok;
 697
 698        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 699                sin->sin_addr.s_addr = ifa->ifa_broadcast;
 700                goto rarok;
 701
 702        case SIOCGIFDSTADDR:    /* Get the destination address */
 703                sin->sin_addr.s_addr = ifa->ifa_address;
 704                goto rarok;
 705
 706        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 707                sin->sin_addr.s_addr = ifa->ifa_mask;
 708                goto rarok;
 709
 710        case SIOCSIFFLAGS:
 711                if (colon) {
 712                        ret = -EADDRNOTAVAIL;
 713                        if (!ifa)
 714                                break;
 715                        ret = 0;
 716                        if (!(ifr.ifr_flags & IFF_UP))
 717                                inet_del_ifa(in_dev, ifap, 1);
 718                        break;
 719                }
 720                ret = dev_change_flags(dev, ifr.ifr_flags);
 721                break;
 722
 723        case SIOCSIFADDR:       /* Set interface address (and family) */
 724                ret = -EINVAL;
 725                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 726                        break;
 727
 728                if (!ifa) {
 729                        ret = -ENOBUFS;
 730                        ifa = inet_alloc_ifa();
 731                        if (!ifa)
 732                                break;
 733                        if (colon)
 734                                memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
 735                        else
 736                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 737                } else {
 738                        ret = 0;
 739                        if (ifa->ifa_local == sin->sin_addr.s_addr)
 740                                break;
 741                        inet_del_ifa(in_dev, ifap, 0);
 742                        ifa->ifa_broadcast = 0;
 743                        ifa->ifa_scope = 0;
 744                }
 745
 746                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
 747
 748                if (!(dev->flags & IFF_POINTOPOINT)) {
 749                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
 750                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
 751                        if ((dev->flags & IFF_BROADCAST) &&
 752                            ifa->ifa_prefixlen < 31)
 753                                ifa->ifa_broadcast = ifa->ifa_address |
 754                                                     ~ifa->ifa_mask;
 755                } else {
 756                        ifa->ifa_prefixlen = 32;
 757                        ifa->ifa_mask = inet_make_mask(32);
 758                }
 759                ret = inet_set_ifa(dev, ifa);
 760                break;
 761
 762        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 763                ret = 0;
 764                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
 765                        inet_del_ifa(in_dev, ifap, 0);
 766                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
 767                        inet_insert_ifa(ifa);
 768                }
 769                break;
 770
 771        case SIOCSIFDSTADDR:    /* Set the destination address */
 772                ret = 0;
 773                if (ifa->ifa_address == sin->sin_addr.s_addr)
 774                        break;
 775                ret = -EINVAL;
 776                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 777                        break;
 778                ret = 0;
 779                inet_del_ifa(in_dev, ifap, 0);
 780                ifa->ifa_address = sin->sin_addr.s_addr;
 781                inet_insert_ifa(ifa);
 782                break;
 783
 784        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 785
 786                /*
 787                 *      The mask we set must be legal.
 788                 */
 789                ret = -EINVAL;
 790                if (bad_mask(sin->sin_addr.s_addr, 0))
 791                        break;
 792                ret = 0;
 793                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
 794                        __be32 old_mask = ifa->ifa_mask;
 795                        inet_del_ifa(in_dev, ifap, 0);
 796                        ifa->ifa_mask = sin->sin_addr.s_addr;
 797                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
 798
 799                        /* See if current broadcast address matches
 800                         * with current netmask, then recalculate
 801                         * the broadcast address. Otherwise it's a
 802                         * funny address, so don't touch it since
 803                         * the user seems to know what (s)he's doing...
 804                         */
 805                        if ((dev->flags & IFF_BROADCAST) &&
 806                            (ifa->ifa_prefixlen < 31) &&
 807                            (ifa->ifa_broadcast ==
 808                             (ifa->ifa_local|~old_mask))) {
 809                                ifa->ifa_broadcast = (ifa->ifa_local |
 810                                                      ~sin->sin_addr.s_addr);
 811                        }
 812                        inet_insert_ifa(ifa);
 813                }
 814                break;
 815        }
 816done:
 817        rtnl_unlock();
 818out:
 819        return ret;
 820rarok:
 821        rtnl_unlock();
 822        ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
 823        goto out;
 824}
 825
 826static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 827{
 828        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 829        struct in_ifaddr *ifa;
 830        struct ifreq ifr;
 831        int done = 0;
 832
 833        if (!in_dev)
 834                goto out;
 835
 836        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 837                if (!buf) {
 838                        done += sizeof(ifr);
 839                        continue;
 840                }
 841                if (len < (int) sizeof(ifr))
 842                        break;
 843                memset(&ifr, 0, sizeof(struct ifreq));
 844                if (ifa->ifa_label)
 845                        strcpy(ifr.ifr_name, ifa->ifa_label);
 846                else
 847                        strcpy(ifr.ifr_name, dev->name);
 848
 849                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
 850                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
 851                                                                ifa->ifa_local;
 852
 853                if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
 854                        done = -EFAULT;
 855                        break;
 856                }
 857                buf  += sizeof(struct ifreq);
 858                len  -= sizeof(struct ifreq);
 859                done += sizeof(struct ifreq);
 860        }
 861out:
 862        return done;
 863}
 864
 865__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 866{
 867        __be32 addr = 0;
 868        struct in_device *in_dev;
 869        struct net *net = dev_net(dev);
 870
 871        rcu_read_lock();
 872        in_dev = __in_dev_get_rcu(dev);
 873        if (!in_dev)
 874                goto no_in_dev;
 875
 876        for_primary_ifa(in_dev) {
 877                if (ifa->ifa_scope > scope)
 878                        continue;
 879                if (!dst || inet_ifa_match(dst, ifa)) {
 880                        addr = ifa->ifa_local;
 881                        break;
 882                }
 883                if (!addr)
 884                        addr = ifa->ifa_local;
 885        } endfor_ifa(in_dev);
 886
 887        if (addr)
 888                goto out_unlock;
 889no_in_dev:
 890
 891        /* Not loopback addresses on loopback should be preferred
 892           in this case. It is importnat that lo is the first interface
 893           in dev_base list.
 894         */
 895        for_each_netdev_rcu(net, dev) {
 896                in_dev = __in_dev_get_rcu(dev);
 897                if (!in_dev)
 898                        continue;
 899
 900                for_primary_ifa(in_dev) {
 901                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
 902                            ifa->ifa_scope <= scope) {
 903                                addr = ifa->ifa_local;
 904                                goto out_unlock;
 905                        }
 906                } endfor_ifa(in_dev);
 907        }
 908out_unlock:
 909        rcu_read_unlock();
 910        return addr;
 911}
 912EXPORT_SYMBOL(inet_select_addr);
 913
 914static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
 915                              __be32 local, int scope)
 916{
 917        int same = 0;
 918        __be32 addr = 0;
 919
 920        for_ifa(in_dev) {
 921                if (!addr &&
 922                    (local == ifa->ifa_local || !local) &&
 923                    ifa->ifa_scope <= scope) {
 924                        addr = ifa->ifa_local;
 925                        if (same)
 926                                break;
 927                }
 928                if (!same) {
 929                        same = (!local || inet_ifa_match(local, ifa)) &&
 930                                (!dst || inet_ifa_match(dst, ifa));
 931                        if (same && addr) {
 932                                if (local || !dst)
 933                                        break;
 934                                /* Is the selected addr into dst subnet? */
 935                                if (inet_ifa_match(addr, ifa))
 936                                        break;
 937                                /* No, then can we use new local src? */
 938                                if (ifa->ifa_scope <= scope) {
 939                                        addr = ifa->ifa_local;
 940                                        break;
 941                                }
 942                                /* search for large dst subnet for addr */
 943                                same = 0;
 944                        }
 945                }
 946        } endfor_ifa(in_dev);
 947
 948        return same ? addr : 0;
 949}
 950
 951/*
 952 * Confirm that local IP address exists using wildcards:
 953 * - in_dev: only on this interface, 0=any interface
 954 * - dst: only in the same subnet as dst, 0=any dst
 955 * - local: address, 0=autoselect the local address
 956 * - scope: maximum allowed scope value for the local address
 957 */
 958__be32 inet_confirm_addr(struct in_device *in_dev,
 959                         __be32 dst, __be32 local, int scope)
 960{
 961        __be32 addr = 0;
 962        struct net_device *dev;
 963        struct net *net;
 964
 965        if (scope != RT_SCOPE_LINK)
 966                return confirm_addr_indev(in_dev, dst, local, scope);
 967
 968        net = dev_net(in_dev->dev);
 969        rcu_read_lock();
 970        for_each_netdev_rcu(net, dev) {
 971                in_dev = __in_dev_get_rcu(dev);
 972                if (in_dev) {
 973                        addr = confirm_addr_indev(in_dev, dst, local, scope);
 974                        if (addr)
 975                                break;
 976                }
 977        }
 978        rcu_read_unlock();
 979
 980        return addr;
 981}
 982
 983/*
 984 *      Device notifier
 985 */
 986
 987int register_inetaddr_notifier(struct notifier_block *nb)
 988{
 989        return blocking_notifier_chain_register(&inetaddr_chain, nb);
 990}
 991EXPORT_SYMBOL(register_inetaddr_notifier);
 992
 993int unregister_inetaddr_notifier(struct notifier_block *nb)
 994{
 995        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
 996}
 997EXPORT_SYMBOL(unregister_inetaddr_notifier);
 998
 999/* Rename ifa_labels for a device name change. Make some effort to preserve
1000 * existing alias numbering and to create unique labels if possible.
1001*/
1002static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1003{
1004        struct in_ifaddr *ifa;
1005        int named = 0;
1006
1007        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1008                char old[IFNAMSIZ], *dot;
1009
1010                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1011                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1012                if (named++ == 0)
1013                        goto skip;
1014                dot = strchr(old, ':');
1015                if (dot == NULL) {
1016                        sprintf(old, ":%d", named);
1017                        dot = old;
1018                }
1019                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1020                        strcat(ifa->ifa_label, dot);
1021                else
1022                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1023skip:
1024                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1025        }
1026}
1027
1028static inline bool inetdev_valid_mtu(unsigned mtu)
1029{
1030        return mtu >= 68;
1031}
1032
1033static void inetdev_send_gratuitous_arp(struct net_device *dev,
1034                                        struct in_device *in_dev)
1035
1036{
1037        struct in_ifaddr *ifa = in_dev->ifa_list;
1038
1039        if (!ifa)
1040                return;
1041
1042        arp_send(ARPOP_REQUEST, ETH_P_ARP,
1043                 ifa->ifa_local, dev,
1044                 ifa->ifa_local, NULL,
1045                 dev->dev_addr, NULL);
1046}
1047
1048/* Called only under RTNL semaphore */
1049
1050static int inetdev_event(struct notifier_block *this, unsigned long event,
1051                         void *ptr)
1052{
1053        struct net_device *dev = ptr;
1054        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1055
1056        ASSERT_RTNL();
1057
1058        if (!in_dev) {
1059                if (event == NETDEV_REGISTER) {
1060                        in_dev = inetdev_init(dev);
1061                        if (!in_dev)
1062                                return notifier_from_errno(-ENOMEM);
1063                        if (dev->flags & IFF_LOOPBACK) {
1064                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1065                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1066                        }
1067                } else if (event == NETDEV_CHANGEMTU) {
1068                        /* Re-enabling IP */
1069                        if (inetdev_valid_mtu(dev->mtu))
1070                                in_dev = inetdev_init(dev);
1071                }
1072                goto out;
1073        }
1074
1075        switch (event) {
1076        case NETDEV_REGISTER:
1077                printk(KERN_DEBUG "inetdev_event: bug\n");
1078                rcu_assign_pointer(dev->ip_ptr, NULL);
1079                break;
1080        case NETDEV_UP:
1081                if (!inetdev_valid_mtu(dev->mtu))
1082                        break;
1083                if (dev->flags & IFF_LOOPBACK) {
1084                        struct in_ifaddr *ifa = inet_alloc_ifa();
1085
1086                        if (ifa) {
1087                                ifa->ifa_local =
1088                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1089                                ifa->ifa_prefixlen = 8;
1090                                ifa->ifa_mask = inet_make_mask(8);
1091                                in_dev_hold(in_dev);
1092                                ifa->ifa_dev = in_dev;
1093                                ifa->ifa_scope = RT_SCOPE_HOST;
1094                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1095                                inet_insert_ifa(ifa);
1096                        }
1097                }
1098                ip_mc_up(in_dev);
1099                /* fall through */
1100        case NETDEV_CHANGEADDR:
1101                if (!IN_DEV_ARP_NOTIFY(in_dev))
1102                        break;
1103                /* fall through */
1104        case NETDEV_NOTIFY_PEERS:
1105                /* Send gratuitous ARP to notify of link change */
1106                inetdev_send_gratuitous_arp(dev, in_dev);
1107                break;
1108        case NETDEV_DOWN:
1109                ip_mc_down(in_dev);
1110                break;
1111        case NETDEV_PRE_TYPE_CHANGE:
1112                ip_mc_unmap(in_dev);
1113                break;
1114        case NETDEV_POST_TYPE_CHANGE:
1115                ip_mc_remap(in_dev);
1116                break;
1117        case NETDEV_CHANGEMTU:
1118                if (inetdev_valid_mtu(dev->mtu))
1119                        break;
1120                /* disable IP when MTU is not enough */
1121        case NETDEV_UNREGISTER:
1122                inetdev_destroy(in_dev);
1123                break;
1124        case NETDEV_CHANGENAME:
1125                /* Do not notify about label change, this event is
1126                 * not interesting to applications using netlink.
1127                 */
1128                inetdev_changename(dev, in_dev);
1129
1130                devinet_sysctl_unregister(in_dev);
1131                devinet_sysctl_register(in_dev);
1132                break;
1133        }
1134out:
1135        return NOTIFY_DONE;
1136}
1137
1138static struct notifier_block ip_netdev_notifier = {
1139        .notifier_call = inetdev_event,
1140};
1141
1142static inline size_t inet_nlmsg_size(void)
1143{
1144        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1145               + nla_total_size(4) /* IFA_ADDRESS */
1146               + nla_total_size(4) /* IFA_LOCAL */
1147               + nla_total_size(4) /* IFA_BROADCAST */
1148               + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1149}
1150
1151static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1152                            u32 pid, u32 seq, int event, unsigned int flags)
1153{
1154        struct ifaddrmsg *ifm;
1155        struct nlmsghdr  *nlh;
1156
1157        nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1158        if (nlh == NULL)
1159                return -EMSGSIZE;
1160
1161        ifm = nlmsg_data(nlh);
1162        ifm->ifa_family = AF_INET;
1163        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1164        ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1165        ifm->ifa_scope = ifa->ifa_scope;
1166        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1167
1168        if (ifa->ifa_address)
1169                NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1170
1171        if (ifa->ifa_local)
1172                NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1173
1174        if (ifa->ifa_broadcast)
1175                NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1176
1177        if (ifa->ifa_label[0])
1178                NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1179
1180        return nlmsg_end(skb, nlh);
1181
1182nla_put_failure:
1183        nlmsg_cancel(skb, nlh);
1184        return -EMSGSIZE;
1185}
1186
1187static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1188{
1189        struct net *net = sock_net(skb->sk);
1190        int h, s_h;
1191        int idx, s_idx;
1192        int ip_idx, s_ip_idx;
1193        struct net_device *dev;
1194        struct in_device *in_dev;
1195        struct in_ifaddr *ifa;
1196        struct hlist_head *head;
1197        struct hlist_node *node;
1198
1199        s_h = cb->args[0];
1200        s_idx = idx = cb->args[1];
1201        s_ip_idx = ip_idx = cb->args[2];
1202
1203        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1204                idx = 0;
1205                head = &net->dev_index_head[h];
1206                rcu_read_lock();
1207                hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1208                        if (idx < s_idx)
1209                                goto cont;
1210                        if (h > s_h || idx > s_idx)
1211                                s_ip_idx = 0;
1212                        in_dev = __in_dev_get_rcu(dev);
1213                        if (!in_dev)
1214                                goto cont;
1215
1216                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1217                             ifa = ifa->ifa_next, ip_idx++) {
1218                                if (ip_idx < s_ip_idx)
1219                                        continue;
1220                                if (inet_fill_ifaddr(skb, ifa,
1221                                             NETLINK_CB(cb->skb).pid,
1222                                             cb->nlh->nlmsg_seq,
1223                                             RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1224                                        rcu_read_unlock();
1225                                        goto done;
1226                                }
1227                        }
1228cont:
1229                        idx++;
1230                }
1231                rcu_read_unlock();
1232        }
1233
1234done:
1235        cb->args[0] = h;
1236        cb->args[1] = idx;
1237        cb->args[2] = ip_idx;
1238
1239        return skb->len;
1240}
1241
1242static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1243                      u32 pid)
1244{
1245        struct sk_buff *skb;
1246        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1247        int err = -ENOBUFS;
1248        struct net *net;
1249
1250        net = dev_net(ifa->ifa_dev->dev);
1251        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1252        if (skb == NULL)
1253                goto errout;
1254
1255        err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1256        if (err < 0) {
1257                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1258                WARN_ON(err == -EMSGSIZE);
1259                kfree_skb(skb);
1260                goto errout;
1261        }
1262        rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1263        return;
1264errout:
1265        if (err < 0)
1266                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1267}
1268
1269static size_t inet_get_link_af_size(const struct net_device *dev)
1270{
1271        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1272
1273        if (!in_dev)
1274                return 0;
1275
1276        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1277}
1278
1279static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1280{
1281        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1282        struct nlattr *nla;
1283        int i;
1284
1285        if (!in_dev)
1286                return -ENODATA;
1287
1288        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1289        if (nla == NULL)
1290                return -EMSGSIZE;
1291
1292        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1293                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1294
1295        return 0;
1296}
1297
1298static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1299        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1300};
1301
1302static int inet_validate_link_af(const struct net_device *dev,
1303                                 const struct nlattr *nla)
1304{
1305        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1306        int err, rem;
1307
1308        if (dev && !__in_dev_get_rtnl(dev))
1309                return -EAFNOSUPPORT;
1310
1311        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1312        if (err < 0)
1313                return err;
1314
1315        if (tb[IFLA_INET_CONF]) {
1316                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1317                        int cfgid = nla_type(a);
1318
1319                        if (nla_len(a) < 4)
1320                                return -EINVAL;
1321
1322                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1323                                return -EINVAL;
1324                }
1325        }
1326
1327        return 0;
1328}
1329
1330static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1331{
1332        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1333        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1334        int rem;
1335
1336        if (!in_dev)
1337                return -EAFNOSUPPORT;
1338
1339        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1340                BUG();
1341
1342        if (tb[IFLA_INET_CONF]) {
1343                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1344                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1345        }
1346
1347        return 0;
1348}
1349
1350#ifdef CONFIG_SYSCTL
1351
1352static void devinet_copy_dflt_conf(struct net *net, int i)
1353{
1354        struct net_device *dev;
1355
1356        rcu_read_lock();
1357        for_each_netdev_rcu(net, dev) {
1358                struct in_device *in_dev;
1359
1360                in_dev = __in_dev_get_rcu(dev);
1361                if (in_dev && !test_bit(i, in_dev->cnf.state))
1362                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1363        }
1364        rcu_read_unlock();
1365}
1366
1367/* called with RTNL locked */
1368static void inet_forward_change(struct net *net)
1369{
1370        struct net_device *dev;
1371        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1372
1373        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1374        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1375
1376        for_each_netdev(net, dev) {
1377                struct in_device *in_dev;
1378                if (on)
1379                        dev_disable_lro(dev);
1380                rcu_read_lock();
1381                in_dev = __in_dev_get_rcu(dev);
1382                if (in_dev)
1383                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1384                rcu_read_unlock();
1385        }
1386}
1387
1388static int devinet_conf_proc(ctl_table *ctl, int write,
1389                             void __user *buffer,
1390                             size_t *lenp, loff_t *ppos)
1391{
1392        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1393
1394        if (write) {
1395                struct ipv4_devconf *cnf = ctl->extra1;
1396                struct net *net = ctl->extra2;
1397                int i = (int *)ctl->data - cnf->data;
1398
1399                set_bit(i, cnf->state);
1400
1401                if (cnf == net->ipv4.devconf_dflt)
1402                        devinet_copy_dflt_conf(net, i);
1403        }
1404
1405        return ret;
1406}
1407
1408static int devinet_sysctl_forward(ctl_table *ctl, int write,
1409                                  void __user *buffer,
1410                                  size_t *lenp, loff_t *ppos)
1411{
1412        int *valp = ctl->data;
1413        int val = *valp;
1414        loff_t pos = *ppos;
1415        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1416
1417        if (write && *valp != val) {
1418                struct net *net = ctl->extra2;
1419
1420                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1421                        if (!rtnl_trylock()) {
1422                                /* Restore the original values before restarting */
1423                                *valp = val;
1424                                *ppos = pos;
1425                                return restart_syscall();
1426                        }
1427                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1428                                inet_forward_change(net);
1429                        } else if (*valp) {
1430                                struct ipv4_devconf *cnf = ctl->extra1;
1431                                struct in_device *idev =
1432                                        container_of(cnf, struct in_device, cnf);
1433                                dev_disable_lro(idev->dev);
1434                        }
1435                        rtnl_unlock();
1436                        rt_cache_flush(net, 0);
1437                }
1438        }
1439
1440        return ret;
1441}
1442
1443static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1444                                void __user *buffer,
1445                                size_t *lenp, loff_t *ppos)
1446{
1447        int *valp = ctl->data;
1448        int val = *valp;
1449        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1450        struct net *net = ctl->extra2;
1451
1452        if (write && *valp != val)
1453                rt_cache_flush(net, 0);
1454
1455        return ret;
1456}
1457
1458#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1459        { \
1460                .procname       = name, \
1461                .data           = ipv4_devconf.data + \
1462                                  IPV4_DEVCONF_ ## attr - 1, \
1463                .maxlen         = sizeof(int), \
1464                .mode           = mval, \
1465                .proc_handler   = proc, \
1466                .extra1         = &ipv4_devconf, \
1467        }
1468
1469#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1470        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1471
1472#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1473        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1474
1475#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1476        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1477
1478#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1479        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1480
1481static struct devinet_sysctl_table {
1482        struct ctl_table_header *sysctl_header;
1483        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1484        char *dev_name;
1485} devinet_sysctl = {
1486        .devinet_vars = {
1487                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1488                                             devinet_sysctl_forward),
1489                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1490
1491                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1492                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1493                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1494                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1495                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1496                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1497                                        "accept_source_route"),
1498                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1499                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1500                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1501                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1502                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1503                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1504                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1505                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1506                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1507                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1508                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1509                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1510                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1511
1512                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1513                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1514                DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1515                                              "force_igmp_version"),
1516                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1517                                              "promote_secondaries"),
1518        },
1519};
1520
1521static int __devinet_sysctl_register(struct net *net, char *dev_name,
1522                                        struct ipv4_devconf *p)
1523{
1524        int i;
1525        struct devinet_sysctl_table *t;
1526
1527#define DEVINET_CTL_PATH_DEV    3
1528
1529        struct ctl_path devinet_ctl_path[] = {
1530                { .procname = "net",  },
1531                { .procname = "ipv4", },
1532                { .procname = "conf", },
1533                { /* to be set */ },
1534                { },
1535        };
1536
1537        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1538        if (!t)
1539                goto out;
1540
1541        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1542                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1543                t->devinet_vars[i].extra1 = p;
1544                t->devinet_vars[i].extra2 = net;
1545        }
1546
1547        /*
1548         * Make a copy of dev_name, because '.procname' is regarded as const
1549         * by sysctl and we wouldn't want anyone to change it under our feet
1550         * (see SIOCSIFNAME).
1551         */
1552        t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1553        if (!t->dev_name)
1554                goto free;
1555
1556        devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1557
1558        t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1559                        t->devinet_vars);
1560        if (!t->sysctl_header)
1561                goto free_procname;
1562
1563        p->sysctl = t;
1564        return 0;
1565
1566free_procname:
1567        kfree(t->dev_name);
1568free:
1569        kfree(t);
1570out:
1571        return -ENOBUFS;
1572}
1573
1574static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1575{
1576        struct devinet_sysctl_table *t = cnf->sysctl;
1577
1578        if (t == NULL)
1579                return;
1580
1581        cnf->sysctl = NULL;
1582        unregister_sysctl_table(t->sysctl_header);
1583        kfree(t->dev_name);
1584        kfree(t);
1585}
1586
1587static void devinet_sysctl_register(struct in_device *idev)
1588{
1589        neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1590        __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1591                                        &idev->cnf);
1592}
1593
1594static void devinet_sysctl_unregister(struct in_device *idev)
1595{
1596        __devinet_sysctl_unregister(&idev->cnf);
1597        neigh_sysctl_unregister(idev->arp_parms);
1598}
1599
1600static struct ctl_table ctl_forward_entry[] = {
1601        {
1602                .procname       = "ip_forward",
1603                .data           = &ipv4_devconf.data[
1604                                        IPV4_DEVCONF_FORWARDING - 1],
1605                .maxlen         = sizeof(int),
1606                .mode           = 0644,
1607                .proc_handler   = devinet_sysctl_forward,
1608                .extra1         = &ipv4_devconf,
1609                .extra2         = &init_net,
1610        },
1611        { },
1612};
1613
1614static __net_initdata struct ctl_path net_ipv4_path[] = {
1615        { .procname = "net", },
1616        { .procname = "ipv4", },
1617        { },
1618};
1619#endif
1620
1621static __net_init int devinet_init_net(struct net *net)
1622{
1623        int err;
1624        struct ipv4_devconf *all, *dflt;
1625#ifdef CONFIG_SYSCTL
1626        struct ctl_table *tbl = ctl_forward_entry;
1627        struct ctl_table_header *forw_hdr;
1628#endif
1629
1630        err = -ENOMEM;
1631        all = &ipv4_devconf;
1632        dflt = &ipv4_devconf_dflt;
1633
1634        if (!net_eq(net, &init_net)) {
1635                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1636                if (all == NULL)
1637                        goto err_alloc_all;
1638
1639                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1640                if (dflt == NULL)
1641                        goto err_alloc_dflt;
1642
1643#ifdef CONFIG_SYSCTL
1644                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1645                if (tbl == NULL)
1646                        goto err_alloc_ctl;
1647
1648                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1649                tbl[0].extra1 = all;
1650                tbl[0].extra2 = net;
1651#endif
1652        }
1653
1654#ifdef CONFIG_SYSCTL
1655        err = __devinet_sysctl_register(net, "all", all);
1656        if (err < 0)
1657                goto err_reg_all;
1658
1659        err = __devinet_sysctl_register(net, "default", dflt);
1660        if (err < 0)
1661                goto err_reg_dflt;
1662
1663        err = -ENOMEM;
1664        forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1665        if (forw_hdr == NULL)
1666                goto err_reg_ctl;
1667        net->ipv4.forw_hdr = forw_hdr;
1668#endif
1669
1670        net->ipv4.devconf_all = all;
1671        net->ipv4.devconf_dflt = dflt;
1672        return 0;
1673
1674#ifdef CONFIG_SYSCTL
1675err_reg_ctl:
1676        __devinet_sysctl_unregister(dflt);
1677err_reg_dflt:
1678        __devinet_sysctl_unregister(all);
1679err_reg_all:
1680        if (tbl != ctl_forward_entry)
1681                kfree(tbl);
1682err_alloc_ctl:
1683#endif
1684        if (dflt != &ipv4_devconf_dflt)
1685                kfree(dflt);
1686err_alloc_dflt:
1687        if (all != &ipv4_devconf)
1688                kfree(all);
1689err_alloc_all:
1690        return err;
1691}
1692
1693static __net_exit void devinet_exit_net(struct net *net)
1694{
1695#ifdef CONFIG_SYSCTL
1696        struct ctl_table *tbl;
1697
1698        tbl = net->ipv4.forw_hdr->ctl_table_arg;
1699        unregister_net_sysctl_table(net->ipv4.forw_hdr);
1700        __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1701        __devinet_sysctl_unregister(net->ipv4.devconf_all);
1702        kfree(tbl);
1703#endif
1704        kfree(net->ipv4.devconf_dflt);
1705        kfree(net->ipv4.devconf_all);
1706}
1707
1708static __net_initdata struct pernet_operations devinet_ops = {
1709        .init = devinet_init_net,
1710        .exit = devinet_exit_net,
1711};
1712
1713static struct rtnl_af_ops inet_af_ops = {
1714        .family           = AF_INET,
1715        .fill_link_af     = inet_fill_link_af,
1716        .get_link_af_size = inet_get_link_af_size,
1717        .validate_link_af = inet_validate_link_af,
1718        .set_link_af      = inet_set_link_af,
1719};
1720
1721void __init devinet_init(void)
1722{
1723        register_pernet_subsys(&devinet_ops);
1724
1725        register_gifconf(PF_INET, inet_gifconf);
1726        register_netdevice_notifier(&ip_netdev_notifier);
1727
1728        rtnl_af_register(&inet_af_ops);
1729
1730        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1731        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1732        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1733}
1734
1735