linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/string.h>
  36#include <linux/mm.h>
  37#include <linux/socket.h>
  38#include <linux/sockios.h>
  39#include <linux/in.h>
  40#include <linux/errno.h>
  41#include <linux/interrupt.h>
  42#include <linux/if_addr.h>
  43#include <linux/if_ether.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/etherdevice.h>
  47#include <linux/skbuff.h>
  48#include <linux/init.h>
  49#include <linux/notifier.h>
  50#include <linux/inetdevice.h>
  51#include <linux/igmp.h>
  52#include <linux/slab.h>
  53#include <linux/hash.h>
  54#ifdef CONFIG_SYSCTL
  55#include <linux/sysctl.h>
  56#endif
  57#include <linux/kmod.h>
  58
  59#include <net/arp.h>
  60#include <net/ip.h>
  61#include <net/route.h>
  62#include <net/ip_fib.h>
  63#include <net/rtnetlink.h>
  64#include <net/net_namespace.h>
  65
  66#include "fib_lookup.h"
  67
  68static struct ipv4_devconf ipv4_devconf = {
  69        .data = {
  70                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  71                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  72                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  74        },
  75};
  76
  77static struct ipv4_devconf ipv4_devconf_dflt = {
  78        .data = {
  79                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  80                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  81                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  82                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  83                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  84        },
  85};
  86
  87#define IPV4_DEVCONF_DFLT(net, attr) \
  88        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  89
  90static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  91        [IFA_LOCAL]             = { .type = NLA_U32 },
  92        [IFA_ADDRESS]           = { .type = NLA_U32 },
  93        [IFA_BROADCAST]         = { .type = NLA_U32 },
  94        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
  95};
  96
  97/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
  98 * value.  So if you change this define, make appropriate changes to
  99 * inet_addr_hash as well.
 100 */
 101#define IN4_ADDR_HSIZE  256
 102static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 103static DEFINE_SPINLOCK(inet_addr_hash_lock);
 104
 105static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
 106{
 107        u32 val = (__force u32) addr ^ hash_ptr(net, 8);
 108
 109        return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
 110                (IN4_ADDR_HSIZE - 1));
 111}
 112
 113static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 114{
 115        unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
 116
 117        spin_lock(&inet_addr_hash_lock);
 118        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 119        spin_unlock(&inet_addr_hash_lock);
 120}
 121
 122static void inet_hash_remove(struct in_ifaddr *ifa)
 123{
 124        spin_lock(&inet_addr_hash_lock);
 125        hlist_del_init_rcu(&ifa->hash);
 126        spin_unlock(&inet_addr_hash_lock);
 127}
 128
 129/**
 130 * __ip_dev_find - find the first device with a given source address.
 131 * @net: the net namespace
 132 * @addr: the source address
 133 * @devref: if true, take a reference on the found device
 134 *
 135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 136 */
 137struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 138{
 139        unsigned int hash = inet_addr_hash(net, addr);
 140        struct net_device *result = NULL;
 141        struct in_ifaddr *ifa;
 142        struct hlist_node *node;
 143
 144        rcu_read_lock();
 145        hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
 146                struct net_device *dev = ifa->ifa_dev->dev;
 147
 148                if (!net_eq(dev_net(dev), net))
 149                        continue;
 150                if (ifa->ifa_local == addr) {
 151                        result = dev;
 152                        break;
 153                }
 154        }
 155        if (!result) {
 156                struct flowi4 fl4 = { .daddr = addr };
 157                struct fib_result res = { 0 };
 158                struct fib_table *local;
 159
 160                /* Fallback to FIB local table so that communication
 161                 * over loopback subnets work.
 162                 */
 163                local = fib_get_table(net, RT_TABLE_LOCAL);
 164                if (local &&
 165                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 166                    res.type == RTN_LOCAL)
 167                        result = FIB_RES_DEV(res);
 168        }
 169        if (result && devref)
 170                dev_hold(result);
 171        rcu_read_unlock();
 172        return result;
 173}
 174EXPORT_SYMBOL(__ip_dev_find);
 175
 176static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 177
 178static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 179static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 180                         int destroy);
 181#ifdef CONFIG_SYSCTL
 182static void devinet_sysctl_register(struct in_device *idev);
 183static void devinet_sysctl_unregister(struct in_device *idev);
 184#else
 185static inline void devinet_sysctl_register(struct in_device *idev)
 186{
 187}
 188static inline void devinet_sysctl_unregister(struct in_device *idev)
 189{
 190}
 191#endif
 192
 193/* Locks all the inet devices. */
 194
 195static struct in_ifaddr *inet_alloc_ifa(void)
 196{
 197        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 198}
 199
 200static void inet_rcu_free_ifa(struct rcu_head *head)
 201{
 202        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 203        if (ifa->ifa_dev)
 204                in_dev_put(ifa->ifa_dev);
 205        kfree(ifa);
 206}
 207
 208static inline void inet_free_ifa(struct in_ifaddr *ifa)
 209{
 210        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 211}
 212
 213void in_dev_finish_destroy(struct in_device *idev)
 214{
 215        struct net_device *dev = idev->dev;
 216
 217        WARN_ON(idev->ifa_list);
 218        WARN_ON(idev->mc_list);
 219#ifdef NET_REFCNT_DEBUG
 220        printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
 221               idev, dev ? dev->name : "NIL");
 222#endif
 223        dev_put(dev);
 224        if (!idev->dead)
 225                pr_err("Freeing alive in_device %p\n", idev);
 226        else
 227                kfree(idev);
 228}
 229EXPORT_SYMBOL(in_dev_finish_destroy);
 230
 231static struct in_device *inetdev_init(struct net_device *dev)
 232{
 233        struct in_device *in_dev;
 234
 235        ASSERT_RTNL();
 236
 237        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 238        if (!in_dev)
 239                goto out;
 240        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 241                        sizeof(in_dev->cnf));
 242        in_dev->cnf.sysctl = NULL;
 243        in_dev->dev = dev;
 244        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 245        if (!in_dev->arp_parms)
 246                goto out_kfree;
 247        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 248                dev_disable_lro(dev);
 249        /* Reference in_dev->dev */
 250        dev_hold(dev);
 251        /* Account for reference dev->ip_ptr (below) */
 252        in_dev_hold(in_dev);
 253
 254        devinet_sysctl_register(in_dev);
 255        ip_mc_init_dev(in_dev);
 256        if (dev->flags & IFF_UP)
 257                ip_mc_up(in_dev);
 258
 259        /* we can receive as soon as ip_ptr is set -- do this last */
 260        rcu_assign_pointer(dev->ip_ptr, in_dev);
 261out:
 262        return in_dev;
 263out_kfree:
 264        kfree(in_dev);
 265        in_dev = NULL;
 266        goto out;
 267}
 268
 269static void in_dev_rcu_put(struct rcu_head *head)
 270{
 271        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 272        in_dev_put(idev);
 273}
 274
 275static void inetdev_destroy(struct in_device *in_dev)
 276{
 277        struct in_ifaddr *ifa;
 278        struct net_device *dev;
 279
 280        ASSERT_RTNL();
 281
 282        dev = in_dev->dev;
 283
 284        in_dev->dead = 1;
 285
 286        ip_mc_destroy_dev(in_dev);
 287
 288        while ((ifa = in_dev->ifa_list) != NULL) {
 289                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 290                inet_free_ifa(ifa);
 291        }
 292
 293        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 294
 295        devinet_sysctl_unregister(in_dev);
 296        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 297        arp_ifdown(dev);
 298
 299        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 300}
 301
 302int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 303{
 304        rcu_read_lock();
 305        for_primary_ifa(in_dev) {
 306                if (inet_ifa_match(a, ifa)) {
 307                        if (!b || inet_ifa_match(b, ifa)) {
 308                                rcu_read_unlock();
 309                                return 1;
 310                        }
 311                }
 312        } endfor_ifa(in_dev);
 313        rcu_read_unlock();
 314        return 0;
 315}
 316
 317static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 318                         int destroy, struct nlmsghdr *nlh, u32 pid)
 319{
 320        struct in_ifaddr *promote = NULL;
 321        struct in_ifaddr *ifa, *ifa1 = *ifap;
 322        struct in_ifaddr *last_prim = in_dev->ifa_list;
 323        struct in_ifaddr *prev_prom = NULL;
 324        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 325
 326        ASSERT_RTNL();
 327
 328        /* 1. Deleting primary ifaddr forces deletion all secondaries
 329         * unless alias promotion is set
 330         **/
 331
 332        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 333                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 334
 335                while ((ifa = *ifap1) != NULL) {
 336                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 337                            ifa1->ifa_scope <= ifa->ifa_scope)
 338                                last_prim = ifa;
 339
 340                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 341                            ifa1->ifa_mask != ifa->ifa_mask ||
 342                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 343                                ifap1 = &ifa->ifa_next;
 344                                prev_prom = ifa;
 345                                continue;
 346                        }
 347
 348                        if (!do_promote) {
 349                                inet_hash_remove(ifa);
 350                                *ifap1 = ifa->ifa_next;
 351
 352                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 353                                blocking_notifier_call_chain(&inetaddr_chain,
 354                                                NETDEV_DOWN, ifa);
 355                                inet_free_ifa(ifa);
 356                        } else {
 357                                promote = ifa;
 358                                break;
 359                        }
 360                }
 361        }
 362
 363        /* On promotion all secondaries from subnet are changing
 364         * the primary IP, we must remove all their routes silently
 365         * and later to add them back with new prefsrc. Do this
 366         * while all addresses are on the device list.
 367         */
 368        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 369                if (ifa1->ifa_mask == ifa->ifa_mask &&
 370                    inet_ifa_match(ifa1->ifa_address, ifa))
 371                        fib_del_ifaddr(ifa, ifa1);
 372        }
 373
 374        /* 2. Unlink it */
 375
 376        *ifap = ifa1->ifa_next;
 377        inet_hash_remove(ifa1);
 378
 379        /* 3. Announce address deletion */
 380
 381        /* Send message first, then call notifier.
 382           At first sight, FIB update triggered by notifier
 383           will refer to already deleted ifaddr, that could confuse
 384           netlink listeners. It is not true: look, gated sees
 385           that route deleted and if it still thinks that ifaddr
 386           is valid, it will try to restore deleted routes... Grr.
 387           So that, this order is correct.
 388         */
 389        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 390        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 391
 392        if (promote) {
 393                struct in_ifaddr *next_sec = promote->ifa_next;
 394
 395                if (prev_prom) {
 396                        prev_prom->ifa_next = promote->ifa_next;
 397                        promote->ifa_next = last_prim->ifa_next;
 398                        last_prim->ifa_next = promote;
 399                }
 400
 401                promote->ifa_flags &= ~IFA_F_SECONDARY;
 402                rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 403                blocking_notifier_call_chain(&inetaddr_chain,
 404                                NETDEV_UP, promote);
 405                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 406                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 407                            !inet_ifa_match(ifa1->ifa_address, ifa))
 408                                        continue;
 409                        fib_add_ifaddr(ifa);
 410                }
 411
 412        }
 413        if (destroy)
 414                inet_free_ifa(ifa1);
 415}
 416
 417static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 418                         int destroy)
 419{
 420        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 421}
 422
 423static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 424                             u32 pid)
 425{
 426        struct in_device *in_dev = ifa->ifa_dev;
 427        struct in_ifaddr *ifa1, **ifap, **last_primary;
 428
 429        ASSERT_RTNL();
 430
 431        if (!ifa->ifa_local) {
 432                inet_free_ifa(ifa);
 433                return 0;
 434        }
 435
 436        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 437        last_primary = &in_dev->ifa_list;
 438
 439        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 440             ifap = &ifa1->ifa_next) {
 441                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 442                    ifa->ifa_scope <= ifa1->ifa_scope)
 443                        last_primary = &ifa1->ifa_next;
 444                if (ifa1->ifa_mask == ifa->ifa_mask &&
 445                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 446                        if (ifa1->ifa_local == ifa->ifa_local) {
 447                                inet_free_ifa(ifa);
 448                                return -EEXIST;
 449                        }
 450                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 451                                inet_free_ifa(ifa);
 452                                return -EINVAL;
 453                        }
 454                        ifa->ifa_flags |= IFA_F_SECONDARY;
 455                }
 456        }
 457
 458        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 459                net_srandom(ifa->ifa_local);
 460                ifap = last_primary;
 461        }
 462
 463        ifa->ifa_next = *ifap;
 464        *ifap = ifa;
 465
 466        inet_hash_insert(dev_net(in_dev->dev), ifa);
 467
 468        /* Send message first, then call notifier.
 469           Notifier will trigger FIB update, so that
 470           listeners of netlink will know about new ifaddr */
 471        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 472        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 473
 474        return 0;
 475}
 476
 477static int inet_insert_ifa(struct in_ifaddr *ifa)
 478{
 479        return __inet_insert_ifa(ifa, NULL, 0);
 480}
 481
 482static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 483{
 484        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 485
 486        ASSERT_RTNL();
 487
 488        if (!in_dev) {
 489                inet_free_ifa(ifa);
 490                return -ENOBUFS;
 491        }
 492        ipv4_devconf_setall(in_dev);
 493        if (ifa->ifa_dev != in_dev) {
 494                WARN_ON(ifa->ifa_dev);
 495                in_dev_hold(in_dev);
 496                ifa->ifa_dev = in_dev;
 497        }
 498        if (ipv4_is_loopback(ifa->ifa_local))
 499                ifa->ifa_scope = RT_SCOPE_HOST;
 500        return inet_insert_ifa(ifa);
 501}
 502
 503/* Caller must hold RCU or RTNL :
 504 * We dont take a reference on found in_device
 505 */
 506struct in_device *inetdev_by_index(struct net *net, int ifindex)
 507{
 508        struct net_device *dev;
 509        struct in_device *in_dev = NULL;
 510
 511        rcu_read_lock();
 512        dev = dev_get_by_index_rcu(net, ifindex);
 513        if (dev)
 514                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 515        rcu_read_unlock();
 516        return in_dev;
 517}
 518EXPORT_SYMBOL(inetdev_by_index);
 519
 520/* Called only from RTNL semaphored context. No locks. */
 521
 522struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 523                                    __be32 mask)
 524{
 525        ASSERT_RTNL();
 526
 527        for_primary_ifa(in_dev) {
 528                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 529                        return ifa;
 530        } endfor_ifa(in_dev);
 531        return NULL;
 532}
 533
 534static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 535{
 536        struct net *net = sock_net(skb->sk);
 537        struct nlattr *tb[IFA_MAX+1];
 538        struct in_device *in_dev;
 539        struct ifaddrmsg *ifm;
 540        struct in_ifaddr *ifa, **ifap;
 541        int err = -EINVAL;
 542
 543        ASSERT_RTNL();
 544
 545        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 546        if (err < 0)
 547                goto errout;
 548
 549        ifm = nlmsg_data(nlh);
 550        in_dev = inetdev_by_index(net, ifm->ifa_index);
 551        if (in_dev == NULL) {
 552                err = -ENODEV;
 553                goto errout;
 554        }
 555
 556        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 557             ifap = &ifa->ifa_next) {
 558                if (tb[IFA_LOCAL] &&
 559                    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
 560                        continue;
 561
 562                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 563                        continue;
 564
 565                if (tb[IFA_ADDRESS] &&
 566                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 567                    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 568                        continue;
 569
 570                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 571                return 0;
 572        }
 573
 574        err = -EADDRNOTAVAIL;
 575errout:
 576        return err;
 577}
 578
 579static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 580{
 581        struct nlattr *tb[IFA_MAX+1];
 582        struct in_ifaddr *ifa;
 583        struct ifaddrmsg *ifm;
 584        struct net_device *dev;
 585        struct in_device *in_dev;
 586        int err;
 587
 588        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 589        if (err < 0)
 590                goto errout;
 591
 592        ifm = nlmsg_data(nlh);
 593        err = -EINVAL;
 594        if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
 595                goto errout;
 596
 597        dev = __dev_get_by_index(net, ifm->ifa_index);
 598        err = -ENODEV;
 599        if (dev == NULL)
 600                goto errout;
 601
 602        in_dev = __in_dev_get_rtnl(dev);
 603        err = -ENOBUFS;
 604        if (in_dev == NULL)
 605                goto errout;
 606
 607        ifa = inet_alloc_ifa();
 608        if (ifa == NULL)
 609                /*
 610                 * A potential indev allocation can be left alive, it stays
 611                 * assigned to its device and is destroy with it.
 612                 */
 613                goto errout;
 614
 615        ipv4_devconf_setall(in_dev);
 616        in_dev_hold(in_dev);
 617
 618        if (tb[IFA_ADDRESS] == NULL)
 619                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 620
 621        INIT_HLIST_NODE(&ifa->hash);
 622        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 623        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 624        ifa->ifa_flags = ifm->ifa_flags;
 625        ifa->ifa_scope = ifm->ifa_scope;
 626        ifa->ifa_dev = in_dev;
 627
 628        ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
 629        ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
 630
 631        if (tb[IFA_BROADCAST])
 632                ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 633
 634        if (tb[IFA_LABEL])
 635                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 636        else
 637                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 638
 639        return ifa;
 640
 641errout:
 642        return ERR_PTR(err);
 643}
 644
 645static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 646{
 647        struct net *net = sock_net(skb->sk);
 648        struct in_ifaddr *ifa;
 649
 650        ASSERT_RTNL();
 651
 652        ifa = rtm_to_ifaddr(net, nlh);
 653        if (IS_ERR(ifa))
 654                return PTR_ERR(ifa);
 655
 656        return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 657}
 658
 659/*
 660 *      Determine a default network mask, based on the IP address.
 661 */
 662
 663static inline int inet_abc_len(__be32 addr)
 664{
 665        int rc = -1;    /* Something else, probably a multicast. */
 666
 667        if (ipv4_is_zeronet(addr))
 668                rc = 0;
 669        else {
 670                __u32 haddr = ntohl(addr);
 671
 672                if (IN_CLASSA(haddr))
 673                        rc = 8;
 674                else if (IN_CLASSB(haddr))
 675                        rc = 16;
 676                else if (IN_CLASSC(haddr))
 677                        rc = 24;
 678        }
 679
 680        return rc;
 681}
 682
 683
 684int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 685{
 686        struct ifreq ifr;
 687        struct sockaddr_in sin_orig;
 688        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 689        struct in_device *in_dev;
 690        struct in_ifaddr **ifap = NULL;
 691        struct in_ifaddr *ifa = NULL;
 692        struct net_device *dev;
 693        char *colon;
 694        int ret = -EFAULT;
 695        int tryaddrmatch = 0;
 696
 697        /*
 698         *      Fetch the caller's info block into kernel space
 699         */
 700
 701        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 702                goto out;
 703        ifr.ifr_name[IFNAMSIZ - 1] = 0;
 704
 705        /* save original address for comparison */
 706        memcpy(&sin_orig, sin, sizeof(*sin));
 707
 708        colon = strchr(ifr.ifr_name, ':');
 709        if (colon)
 710                *colon = 0;
 711
 712        dev_load(net, ifr.ifr_name);
 713
 714        switch (cmd) {
 715        case SIOCGIFADDR:       /* Get interface address */
 716        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 717        case SIOCGIFDSTADDR:    /* Get the destination address */
 718        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 719                /* Note that these ioctls will not sleep,
 720                   so that we do not impose a lock.
 721                   One day we will be forced to put shlock here (I mean SMP)
 722                 */
 723                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 724                memset(sin, 0, sizeof(*sin));
 725                sin->sin_family = AF_INET;
 726                break;
 727
 728        case SIOCSIFFLAGS:
 729                ret = -EACCES;
 730                if (!capable(CAP_NET_ADMIN))
 731                        goto out;
 732                break;
 733        case SIOCSIFADDR:       /* Set interface address (and family) */
 734        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 735        case SIOCSIFDSTADDR:    /* Set the destination address */
 736        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 737                ret = -EACCES;
 738                if (!capable(CAP_NET_ADMIN))
 739                        goto out;
 740                ret = -EINVAL;
 741                if (sin->sin_family != AF_INET)
 742                        goto out;
 743                break;
 744        default:
 745                ret = -EINVAL;
 746                goto out;
 747        }
 748
 749        rtnl_lock();
 750
 751        ret = -ENODEV;
 752        dev = __dev_get_by_name(net, ifr.ifr_name);
 753        if (!dev)
 754                goto done;
 755
 756        if (colon)
 757                *colon = ':';
 758
 759        in_dev = __in_dev_get_rtnl(dev);
 760        if (in_dev) {
 761                if (tryaddrmatch) {
 762                        /* Matthias Andree */
 763                        /* compare label and address (4.4BSD style) */
 764                        /* note: we only do this for a limited set of ioctls
 765                           and only if the original address family was AF_INET.
 766                           This is checked above. */
 767                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 768                             ifap = &ifa->ifa_next) {
 769                                if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 770                                    sin_orig.sin_addr.s_addr ==
 771                                                        ifa->ifa_local) {
 772                                        break; /* found */
 773                                }
 774                        }
 775                }
 776                /* we didn't get a match, maybe the application is
 777                   4.3BSD-style and passed in junk so we fall back to
 778                   comparing just the label */
 779                if (!ifa) {
 780                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 781                             ifap = &ifa->ifa_next)
 782                                if (!strcmp(ifr.ifr_name, ifa->ifa_label))
 783                                        break;
 784                }
 785        }
 786
 787        ret = -EADDRNOTAVAIL;
 788        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 789                goto done;
 790
 791        switch (cmd) {
 792        case SIOCGIFADDR:       /* Get interface address */
 793                sin->sin_addr.s_addr = ifa->ifa_local;
 794                goto rarok;
 795
 796        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 797                sin->sin_addr.s_addr = ifa->ifa_broadcast;
 798                goto rarok;
 799
 800        case SIOCGIFDSTADDR:    /* Get the destination address */
 801                sin->sin_addr.s_addr = ifa->ifa_address;
 802                goto rarok;
 803
 804        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 805                sin->sin_addr.s_addr = ifa->ifa_mask;
 806                goto rarok;
 807
 808        case SIOCSIFFLAGS:
 809                if (colon) {
 810                        ret = -EADDRNOTAVAIL;
 811                        if (!ifa)
 812                                break;
 813                        ret = 0;
 814                        if (!(ifr.ifr_flags & IFF_UP))
 815                                inet_del_ifa(in_dev, ifap, 1);
 816                        break;
 817                }
 818                ret = dev_change_flags(dev, ifr.ifr_flags);
 819                break;
 820
 821        case SIOCSIFADDR:       /* Set interface address (and family) */
 822                ret = -EINVAL;
 823                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 824                        break;
 825
 826                if (!ifa) {
 827                        ret = -ENOBUFS;
 828                        ifa = inet_alloc_ifa();
 829                        INIT_HLIST_NODE(&ifa->hash);
 830                        if (!ifa)
 831                                break;
 832                        if (colon)
 833                                memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
 834                        else
 835                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 836                } else {
 837                        ret = 0;
 838                        if (ifa->ifa_local == sin->sin_addr.s_addr)
 839                                break;
 840                        inet_del_ifa(in_dev, ifap, 0);
 841                        ifa->ifa_broadcast = 0;
 842                        ifa->ifa_scope = 0;
 843                }
 844
 845                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
 846
 847                if (!(dev->flags & IFF_POINTOPOINT)) {
 848                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
 849                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
 850                        if ((dev->flags & IFF_BROADCAST) &&
 851                            ifa->ifa_prefixlen < 31)
 852                                ifa->ifa_broadcast = ifa->ifa_address |
 853                                                     ~ifa->ifa_mask;
 854                } else {
 855                        ifa->ifa_prefixlen = 32;
 856                        ifa->ifa_mask = inet_make_mask(32);
 857                }
 858                ret = inet_set_ifa(dev, ifa);
 859                break;
 860
 861        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 862                ret = 0;
 863                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
 864                        inet_del_ifa(in_dev, ifap, 0);
 865                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
 866                        inet_insert_ifa(ifa);
 867                }
 868                break;
 869
 870        case SIOCSIFDSTADDR:    /* Set the destination address */
 871                ret = 0;
 872                if (ifa->ifa_address == sin->sin_addr.s_addr)
 873                        break;
 874                ret = -EINVAL;
 875                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 876                        break;
 877                ret = 0;
 878                inet_del_ifa(in_dev, ifap, 0);
 879                ifa->ifa_address = sin->sin_addr.s_addr;
 880                inet_insert_ifa(ifa);
 881                break;
 882
 883        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 884
 885                /*
 886                 *      The mask we set must be legal.
 887                 */
 888                ret = -EINVAL;
 889                if (bad_mask(sin->sin_addr.s_addr, 0))
 890                        break;
 891                ret = 0;
 892                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
 893                        __be32 old_mask = ifa->ifa_mask;
 894                        inet_del_ifa(in_dev, ifap, 0);
 895                        ifa->ifa_mask = sin->sin_addr.s_addr;
 896                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
 897
 898                        /* See if current broadcast address matches
 899                         * with current netmask, then recalculate
 900                         * the broadcast address. Otherwise it's a
 901                         * funny address, so don't touch it since
 902                         * the user seems to know what (s)he's doing...
 903                         */
 904                        if ((dev->flags & IFF_BROADCAST) &&
 905                            (ifa->ifa_prefixlen < 31) &&
 906                            (ifa->ifa_broadcast ==
 907                             (ifa->ifa_local|~old_mask))) {
 908                                ifa->ifa_broadcast = (ifa->ifa_local |
 909                                                      ~sin->sin_addr.s_addr);
 910                        }
 911                        inet_insert_ifa(ifa);
 912                }
 913                break;
 914        }
 915done:
 916        rtnl_unlock();
 917out:
 918        return ret;
 919rarok:
 920        rtnl_unlock();
 921        ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
 922        goto out;
 923}
 924
 925static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 926{
 927        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 928        struct in_ifaddr *ifa;
 929        struct ifreq ifr;
 930        int done = 0;
 931
 932        if (!in_dev)
 933                goto out;
 934
 935        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 936                if (!buf) {
 937                        done += sizeof(ifr);
 938                        continue;
 939                }
 940                if (len < (int) sizeof(ifr))
 941                        break;
 942                memset(&ifr, 0, sizeof(struct ifreq));
 943                if (ifa->ifa_label)
 944                        strcpy(ifr.ifr_name, ifa->ifa_label);
 945                else
 946                        strcpy(ifr.ifr_name, dev->name);
 947
 948                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
 949                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
 950                                                                ifa->ifa_local;
 951
 952                if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
 953                        done = -EFAULT;
 954                        break;
 955                }
 956                buf  += sizeof(struct ifreq);
 957                len  -= sizeof(struct ifreq);
 958                done += sizeof(struct ifreq);
 959        }
 960out:
 961        return done;
 962}
 963
 964__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 965{
 966        __be32 addr = 0;
 967        struct in_device *in_dev;
 968        struct net *net = dev_net(dev);
 969
 970        rcu_read_lock();
 971        in_dev = __in_dev_get_rcu(dev);
 972        if (!in_dev)
 973                goto no_in_dev;
 974
 975        for_primary_ifa(in_dev) {
 976                if (ifa->ifa_scope > scope)
 977                        continue;
 978                if (!dst || inet_ifa_match(dst, ifa)) {
 979                        addr = ifa->ifa_local;
 980                        break;
 981                }
 982                if (!addr)
 983                        addr = ifa->ifa_local;
 984        } endfor_ifa(in_dev);
 985
 986        if (addr)
 987                goto out_unlock;
 988no_in_dev:
 989
 990        /* Not loopback addresses on loopback should be preferred
 991           in this case. It is importnat that lo is the first interface
 992           in dev_base list.
 993         */
 994        for_each_netdev_rcu(net, dev) {
 995                in_dev = __in_dev_get_rcu(dev);
 996                if (!in_dev)
 997                        continue;
 998
 999                for_primary_ifa(in_dev) {
1000                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
1001                            ifa->ifa_scope <= scope) {
1002                                addr = ifa->ifa_local;
1003                                goto out_unlock;
1004                        }
1005                } endfor_ifa(in_dev);
1006        }
1007out_unlock:
1008        rcu_read_unlock();
1009        return addr;
1010}
1011EXPORT_SYMBOL(inet_select_addr);
1012
1013static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1014                              __be32 local, int scope)
1015{
1016        int same = 0;
1017        __be32 addr = 0;
1018
1019        for_ifa(in_dev) {
1020                if (!addr &&
1021                    (local == ifa->ifa_local || !local) &&
1022                    ifa->ifa_scope <= scope) {
1023                        addr = ifa->ifa_local;
1024                        if (same)
1025                                break;
1026                }
1027                if (!same) {
1028                        same = (!local || inet_ifa_match(local, ifa)) &&
1029                                (!dst || inet_ifa_match(dst, ifa));
1030                        if (same && addr) {
1031                                if (local || !dst)
1032                                        break;
1033                                /* Is the selected addr into dst subnet? */
1034                                if (inet_ifa_match(addr, ifa))
1035                                        break;
1036                                /* No, then can we use new local src? */
1037                                if (ifa->ifa_scope <= scope) {
1038                                        addr = ifa->ifa_local;
1039                                        break;
1040                                }
1041                                /* search for large dst subnet for addr */
1042                                same = 0;
1043                        }
1044                }
1045        } endfor_ifa(in_dev);
1046
1047        return same ? addr : 0;
1048}
1049
1050/*
1051 * Confirm that local IP address exists using wildcards:
1052 * - in_dev: only on this interface, 0=any interface
1053 * - dst: only in the same subnet as dst, 0=any dst
1054 * - local: address, 0=autoselect the local address
1055 * - scope: maximum allowed scope value for the local address
1056 */
1057__be32 inet_confirm_addr(struct in_device *in_dev,
1058                         __be32 dst, __be32 local, int scope)
1059{
1060        __be32 addr = 0;
1061        struct net_device *dev;
1062        struct net *net;
1063
1064        if (scope != RT_SCOPE_LINK)
1065                return confirm_addr_indev(in_dev, dst, local, scope);
1066
1067        net = dev_net(in_dev->dev);
1068        rcu_read_lock();
1069        for_each_netdev_rcu(net, dev) {
1070                in_dev = __in_dev_get_rcu(dev);
1071                if (in_dev) {
1072                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1073                        if (addr)
1074                                break;
1075                }
1076        }
1077        rcu_read_unlock();
1078
1079        return addr;
1080}
1081EXPORT_SYMBOL(inet_confirm_addr);
1082
1083/*
1084 *      Device notifier
1085 */
1086
1087int register_inetaddr_notifier(struct notifier_block *nb)
1088{
1089        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090}
1091EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093int unregister_inetaddr_notifier(struct notifier_block *nb)
1094{
1095        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096}
1097EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099/* Rename ifa_labels for a device name change. Make some effort to preserve
1100 * existing alias numbering and to create unique labels if possible.
1101*/
1102static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103{
1104        struct in_ifaddr *ifa;
1105        int named = 0;
1106
1107        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108                char old[IFNAMSIZ], *dot;
1109
1110                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112                if (named++ == 0)
1113                        goto skip;
1114                dot = strchr(old, ':');
1115                if (dot == NULL) {
1116                        sprintf(old, ":%d", named);
1117                        dot = old;
1118                }
1119                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120                        strcat(ifa->ifa_label, dot);
1121                else
1122                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123skip:
1124                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125        }
1126}
1127
1128static inline bool inetdev_valid_mtu(unsigned mtu)
1129{
1130        return mtu >= 68;
1131}
1132
1133static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134                                        struct in_device *in_dev)
1135
1136{
1137        struct in_ifaddr *ifa;
1138
1139        for (ifa = in_dev->ifa_list; ifa;
1140             ifa = ifa->ifa_next) {
1141                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142                         ifa->ifa_local, dev,
1143                         ifa->ifa_local, NULL,
1144                         dev->dev_addr, NULL);
1145        }
1146}
1147
1148/* Called only under RTNL semaphore */
1149
1150static int inetdev_event(struct notifier_block *this, unsigned long event,
1151                         void *ptr)
1152{
1153        struct net_device *dev = ptr;
1154        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156        ASSERT_RTNL();
1157
1158        if (!in_dev) {
1159                if (event == NETDEV_REGISTER) {
1160                        in_dev = inetdev_init(dev);
1161                        if (!in_dev)
1162                                return notifier_from_errno(-ENOMEM);
1163                        if (dev->flags & IFF_LOOPBACK) {
1164                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166                        }
1167                } else if (event == NETDEV_CHANGEMTU) {
1168                        /* Re-enabling IP */
1169                        if (inetdev_valid_mtu(dev->mtu))
1170                                in_dev = inetdev_init(dev);
1171                }
1172                goto out;
1173        }
1174
1175        switch (event) {
1176        case NETDEV_REGISTER:
1177                printk(KERN_DEBUG "inetdev_event: bug\n");
1178                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179                break;
1180        case NETDEV_UP:
1181                if (!inetdev_valid_mtu(dev->mtu))
1182                        break;
1183                if (dev->flags & IFF_LOOPBACK) {
1184                        struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186                        if (ifa) {
1187                                INIT_HLIST_NODE(&ifa->hash);
1188                                ifa->ifa_local =
1189                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190                                ifa->ifa_prefixlen = 8;
1191                                ifa->ifa_mask = inet_make_mask(8);
1192                                in_dev_hold(in_dev);
1193                                ifa->ifa_dev = in_dev;
1194                                ifa->ifa_scope = RT_SCOPE_HOST;
1195                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196                                inet_insert_ifa(ifa);
1197                        }
1198                }
1199                ip_mc_up(in_dev);
1200                /* fall through */
1201        case NETDEV_CHANGEADDR:
1202                if (!IN_DEV_ARP_NOTIFY(in_dev))
1203                        break;
1204                /* fall through */
1205        case NETDEV_NOTIFY_PEERS:
1206                /* Send gratuitous ARP to notify of link change */
1207                inetdev_send_gratuitous_arp(dev, in_dev);
1208                break;
1209        case NETDEV_DOWN:
1210                ip_mc_down(in_dev);
1211                break;
1212        case NETDEV_PRE_TYPE_CHANGE:
1213                ip_mc_unmap(in_dev);
1214                break;
1215        case NETDEV_POST_TYPE_CHANGE:
1216                ip_mc_remap(in_dev);
1217                break;
1218        case NETDEV_CHANGEMTU:
1219                if (inetdev_valid_mtu(dev->mtu))
1220                        break;
1221                /* disable IP when MTU is not enough */
1222        case NETDEV_UNREGISTER:
1223                inetdev_destroy(in_dev);
1224                break;
1225        case NETDEV_CHANGENAME:
1226                /* Do not notify about label change, this event is
1227                 * not interesting to applications using netlink.
1228                 */
1229                inetdev_changename(dev, in_dev);
1230
1231                devinet_sysctl_unregister(in_dev);
1232                devinet_sysctl_register(in_dev);
1233                break;
1234        }
1235out:
1236        return NOTIFY_DONE;
1237}
1238
1239static struct notifier_block ip_netdev_notifier = {
1240        .notifier_call = inetdev_event,
1241};
1242
1243static inline size_t inet_nlmsg_size(void)
1244{
1245        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246               + nla_total_size(4) /* IFA_ADDRESS */
1247               + nla_total_size(4) /* IFA_LOCAL */
1248               + nla_total_size(4) /* IFA_BROADCAST */
1249               + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250}
1251
1252static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253                            u32 pid, u32 seq, int event, unsigned int flags)
1254{
1255        struct ifaddrmsg *ifm;
1256        struct nlmsghdr  *nlh;
1257
1258        nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259        if (nlh == NULL)
1260                return -EMSGSIZE;
1261
1262        ifm = nlmsg_data(nlh);
1263        ifm->ifa_family = AF_INET;
1264        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265        ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266        ifm->ifa_scope = ifa->ifa_scope;
1267        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269        if (ifa->ifa_address)
1270                NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1271
1272        if (ifa->ifa_local)
1273                NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1274
1275        if (ifa->ifa_broadcast)
1276                NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1277
1278        if (ifa->ifa_label[0])
1279                NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1280
1281        return nlmsg_end(skb, nlh);
1282
1283nla_put_failure:
1284        nlmsg_cancel(skb, nlh);
1285        return -EMSGSIZE;
1286}
1287
1288static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1289{
1290        struct net *net = sock_net(skb->sk);
1291        int h, s_h;
1292        int idx, s_idx;
1293        int ip_idx, s_ip_idx;
1294        struct net_device *dev;
1295        struct in_device *in_dev;
1296        struct in_ifaddr *ifa;
1297        struct hlist_head *head;
1298        struct hlist_node *node;
1299
1300        s_h = cb->args[0];
1301        s_idx = idx = cb->args[1];
1302        s_ip_idx = ip_idx = cb->args[2];
1303
1304        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1305                idx = 0;
1306                head = &net->dev_index_head[h];
1307                rcu_read_lock();
1308                hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1309                        if (idx < s_idx)
1310                                goto cont;
1311                        if (h > s_h || idx > s_idx)
1312                                s_ip_idx = 0;
1313                        in_dev = __in_dev_get_rcu(dev);
1314                        if (!in_dev)
1315                                goto cont;
1316
1317                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1318                             ifa = ifa->ifa_next, ip_idx++) {
1319                                if (ip_idx < s_ip_idx)
1320                                        continue;
1321                                if (inet_fill_ifaddr(skb, ifa,
1322                                             NETLINK_CB(cb->skb).pid,
1323                                             cb->nlh->nlmsg_seq,
1324                                             RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1325                                        rcu_read_unlock();
1326                                        goto done;
1327                                }
1328                        }
1329cont:
1330                        idx++;
1331                }
1332                rcu_read_unlock();
1333        }
1334
1335done:
1336        cb->args[0] = h;
1337        cb->args[1] = idx;
1338        cb->args[2] = ip_idx;
1339
1340        return skb->len;
1341}
1342
1343static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1344                      u32 pid)
1345{
1346        struct sk_buff *skb;
1347        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1348        int err = -ENOBUFS;
1349        struct net *net;
1350
1351        net = dev_net(ifa->ifa_dev->dev);
1352        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1353        if (skb == NULL)
1354                goto errout;
1355
1356        err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1357        if (err < 0) {
1358                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1359                WARN_ON(err == -EMSGSIZE);
1360                kfree_skb(skb);
1361                goto errout;
1362        }
1363        rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1364        return;
1365errout:
1366        if (err < 0)
1367                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1368}
1369
1370static size_t inet_get_link_af_size(const struct net_device *dev)
1371{
1372        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373
1374        if (!in_dev)
1375                return 0;
1376
1377        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1378}
1379
1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381{
1382        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383        struct nlattr *nla;
1384        int i;
1385
1386        if (!in_dev)
1387                return -ENODATA;
1388
1389        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1390        if (nla == NULL)
1391                return -EMSGSIZE;
1392
1393        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1394                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1395
1396        return 0;
1397}
1398
1399static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1400        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1401};
1402
1403static int inet_validate_link_af(const struct net_device *dev,
1404                                 const struct nlattr *nla)
1405{
1406        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407        int err, rem;
1408
1409        if (dev && !__in_dev_get_rtnl(dev))
1410                return -EAFNOSUPPORT;
1411
1412        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1413        if (err < 0)
1414                return err;
1415
1416        if (tb[IFLA_INET_CONF]) {
1417                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1418                        int cfgid = nla_type(a);
1419
1420                        if (nla_len(a) < 4)
1421                                return -EINVAL;
1422
1423                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1424                                return -EINVAL;
1425                }
1426        }
1427
1428        return 0;
1429}
1430
1431static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1432{
1433        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1434        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1435        int rem;
1436
1437        if (!in_dev)
1438                return -EAFNOSUPPORT;
1439
1440        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1441                BUG();
1442
1443        if (tb[IFLA_INET_CONF]) {
1444                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1445                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1446        }
1447
1448        return 0;
1449}
1450
1451#ifdef CONFIG_SYSCTL
1452
1453static void devinet_copy_dflt_conf(struct net *net, int i)
1454{
1455        struct net_device *dev;
1456
1457        rcu_read_lock();
1458        for_each_netdev_rcu(net, dev) {
1459                struct in_device *in_dev;
1460
1461                in_dev = __in_dev_get_rcu(dev);
1462                if (in_dev && !test_bit(i, in_dev->cnf.state))
1463                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1464        }
1465        rcu_read_unlock();
1466}
1467
1468/* called with RTNL locked */
1469static void inet_forward_change(struct net *net)
1470{
1471        struct net_device *dev;
1472        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1473
1474        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1475        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1476
1477        for_each_netdev(net, dev) {
1478                struct in_device *in_dev;
1479                if (on)
1480                        dev_disable_lro(dev);
1481                rcu_read_lock();
1482                in_dev = __in_dev_get_rcu(dev);
1483                if (in_dev)
1484                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1485                rcu_read_unlock();
1486        }
1487}
1488
1489static int devinet_conf_proc(ctl_table *ctl, int write,
1490                             void __user *buffer,
1491                             size_t *lenp, loff_t *ppos)
1492{
1493        int old_value = *(int *)ctl->data;
1494        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1495        int new_value = *(int *)ctl->data;
1496
1497        if (write) {
1498                struct ipv4_devconf *cnf = ctl->extra1;
1499                struct net *net = ctl->extra2;
1500                int i = (int *)ctl->data - cnf->data;
1501
1502                set_bit(i, cnf->state);
1503
1504                if (cnf == net->ipv4.devconf_dflt)
1505                        devinet_copy_dflt_conf(net, i);
1506                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1507                        if ((new_value == 0) && (old_value != 0))
1508                                rt_cache_flush(net, 0);
1509        }
1510
1511        return ret;
1512}
1513
1514static int devinet_sysctl_forward(ctl_table *ctl, int write,
1515                                  void __user *buffer,
1516                                  size_t *lenp, loff_t *ppos)
1517{
1518        int *valp = ctl->data;
1519        int val = *valp;
1520        loff_t pos = *ppos;
1521        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1522
1523        if (write && *valp != val) {
1524                struct net *net = ctl->extra2;
1525
1526                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1527                        if (!rtnl_trylock()) {
1528                                /* Restore the original values before restarting */
1529                                *valp = val;
1530                                *ppos = pos;
1531                                return restart_syscall();
1532                        }
1533                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1534                                inet_forward_change(net);
1535                        } else if (*valp) {
1536                                struct ipv4_devconf *cnf = ctl->extra1;
1537                                struct in_device *idev =
1538                                        container_of(cnf, struct in_device, cnf);
1539                                dev_disable_lro(idev->dev);
1540                        }
1541                        rtnl_unlock();
1542                        rt_cache_flush(net, 0);
1543                }
1544        }
1545
1546        return ret;
1547}
1548
1549static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1550                                void __user *buffer,
1551                                size_t *lenp, loff_t *ppos)
1552{
1553        int *valp = ctl->data;
1554        int val = *valp;
1555        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1556        struct net *net = ctl->extra2;
1557
1558        if (write && *valp != val)
1559                rt_cache_flush(net, 0);
1560
1561        return ret;
1562}
1563
1564#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1565        { \
1566                .procname       = name, \
1567                .data           = ipv4_devconf.data + \
1568                                  IPV4_DEVCONF_ ## attr - 1, \
1569                .maxlen         = sizeof(int), \
1570                .mode           = mval, \
1571                .proc_handler   = proc, \
1572                .extra1         = &ipv4_devconf, \
1573        }
1574
1575#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1576        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1577
1578#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1579        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1580
1581#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1582        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1583
1584#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1585        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1586
1587static struct devinet_sysctl_table {
1588        struct ctl_table_header *sysctl_header;
1589        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1590        char *dev_name;
1591} devinet_sysctl = {
1592        .devinet_vars = {
1593                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1594                                             devinet_sysctl_forward),
1595                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1596
1597                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1598                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1599                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1600                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1601                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1602                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1603                                        "accept_source_route"),
1604                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1605                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1606                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1607                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1608                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1609                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1610                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1611                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1612                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1613                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1614                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1615                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1616                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1617
1618                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1619                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1620                DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1621                                              "force_igmp_version"),
1622                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1623                                              "promote_secondaries"),
1624        },
1625};
1626
1627static int __devinet_sysctl_register(struct net *net, char *dev_name,
1628                                        struct ipv4_devconf *p)
1629{
1630        int i;
1631        struct devinet_sysctl_table *t;
1632
1633#define DEVINET_CTL_PATH_DEV    3
1634
1635        struct ctl_path devinet_ctl_path[] = {
1636                { .procname = "net",  },
1637                { .procname = "ipv4", },
1638                { .procname = "conf", },
1639                { /* to be set */ },
1640                { },
1641        };
1642
1643        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1644        if (!t)
1645                goto out;
1646
1647        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1648                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1649                t->devinet_vars[i].extra1 = p;
1650                t->devinet_vars[i].extra2 = net;
1651        }
1652
1653        /*
1654         * Make a copy of dev_name, because '.procname' is regarded as const
1655         * by sysctl and we wouldn't want anyone to change it under our feet
1656         * (see SIOCSIFNAME).
1657         */
1658        t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1659        if (!t->dev_name)
1660                goto free;
1661
1662        devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1663
1664        t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1665                        t->devinet_vars);
1666        if (!t->sysctl_header)
1667                goto free_procname;
1668
1669        p->sysctl = t;
1670        return 0;
1671
1672free_procname:
1673        kfree(t->dev_name);
1674free:
1675        kfree(t);
1676out:
1677        return -ENOBUFS;
1678}
1679
1680static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1681{
1682        struct devinet_sysctl_table *t = cnf->sysctl;
1683
1684        if (t == NULL)
1685                return;
1686
1687        cnf->sysctl = NULL;
1688        unregister_net_sysctl_table(t->sysctl_header);
1689        kfree(t->dev_name);
1690        kfree(t);
1691}
1692
1693static void devinet_sysctl_register(struct in_device *idev)
1694{
1695        neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1696        __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1697                                        &idev->cnf);
1698}
1699
1700static void devinet_sysctl_unregister(struct in_device *idev)
1701{
1702        __devinet_sysctl_unregister(&idev->cnf);
1703        neigh_sysctl_unregister(idev->arp_parms);
1704}
1705
1706static struct ctl_table ctl_forward_entry[] = {
1707        {
1708                .procname       = "ip_forward",
1709                .data           = &ipv4_devconf.data[
1710                                        IPV4_DEVCONF_FORWARDING - 1],
1711                .maxlen         = sizeof(int),
1712                .mode           = 0644,
1713                .proc_handler   = devinet_sysctl_forward,
1714                .extra1         = &ipv4_devconf,
1715                .extra2         = &init_net,
1716        },
1717        { },
1718};
1719
1720static __net_initdata struct ctl_path net_ipv4_path[] = {
1721        { .procname = "net", },
1722        { .procname = "ipv4", },
1723        { },
1724};
1725#endif
1726
1727static __net_init int devinet_init_net(struct net *net)
1728{
1729        int err;
1730        struct ipv4_devconf *all, *dflt;
1731#ifdef CONFIG_SYSCTL
1732        struct ctl_table *tbl = ctl_forward_entry;
1733        struct ctl_table_header *forw_hdr;
1734#endif
1735
1736        err = -ENOMEM;
1737        all = &ipv4_devconf;
1738        dflt = &ipv4_devconf_dflt;
1739
1740        if (!net_eq(net, &init_net)) {
1741                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1742                if (all == NULL)
1743                        goto err_alloc_all;
1744
1745                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1746                if (dflt == NULL)
1747                        goto err_alloc_dflt;
1748
1749#ifdef CONFIG_SYSCTL
1750                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1751                if (tbl == NULL)
1752                        goto err_alloc_ctl;
1753
1754                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1755                tbl[0].extra1 = all;
1756                tbl[0].extra2 = net;
1757#endif
1758        }
1759
1760#ifdef CONFIG_SYSCTL
1761        err = __devinet_sysctl_register(net, "all", all);
1762        if (err < 0)
1763                goto err_reg_all;
1764
1765        err = __devinet_sysctl_register(net, "default", dflt);
1766        if (err < 0)
1767                goto err_reg_dflt;
1768
1769        err = -ENOMEM;
1770        forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1771        if (forw_hdr == NULL)
1772                goto err_reg_ctl;
1773        net->ipv4.forw_hdr = forw_hdr;
1774#endif
1775
1776        net->ipv4.devconf_all = all;
1777        net->ipv4.devconf_dflt = dflt;
1778        return 0;
1779
1780#ifdef CONFIG_SYSCTL
1781err_reg_ctl:
1782        __devinet_sysctl_unregister(dflt);
1783err_reg_dflt:
1784        __devinet_sysctl_unregister(all);
1785err_reg_all:
1786        if (tbl != ctl_forward_entry)
1787                kfree(tbl);
1788err_alloc_ctl:
1789#endif
1790        if (dflt != &ipv4_devconf_dflt)
1791                kfree(dflt);
1792err_alloc_dflt:
1793        if (all != &ipv4_devconf)
1794                kfree(all);
1795err_alloc_all:
1796        return err;
1797}
1798
1799static __net_exit void devinet_exit_net(struct net *net)
1800{
1801#ifdef CONFIG_SYSCTL
1802        struct ctl_table *tbl;
1803
1804        tbl = net->ipv4.forw_hdr->ctl_table_arg;
1805        unregister_net_sysctl_table(net->ipv4.forw_hdr);
1806        __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1807        __devinet_sysctl_unregister(net->ipv4.devconf_all);
1808        kfree(tbl);
1809#endif
1810        kfree(net->ipv4.devconf_dflt);
1811        kfree(net->ipv4.devconf_all);
1812}
1813
1814static __net_initdata struct pernet_operations devinet_ops = {
1815        .init = devinet_init_net,
1816        .exit = devinet_exit_net,
1817};
1818
1819static struct rtnl_af_ops inet_af_ops = {
1820        .family           = AF_INET,
1821        .fill_link_af     = inet_fill_link_af,
1822        .get_link_af_size = inet_get_link_af_size,
1823        .validate_link_af = inet_validate_link_af,
1824        .set_link_af      = inet_set_link_af,
1825};
1826
1827void __init devinet_init(void)
1828{
1829        int i;
1830
1831        for (i = 0; i < IN4_ADDR_HSIZE; i++)
1832                INIT_HLIST_HEAD(&inet_addr_lst[i]);
1833
1834        register_pernet_subsys(&devinet_ops);
1835
1836        register_gifconf(PF_INET, inet_gifconf);
1837        register_netdevice_notifier(&ip_netdev_notifier);
1838
1839        rtnl_af_register(&inet_af_ops);
1840
1841        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1842        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1843        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1844}
1845
1846