linux/net/core/neighbour.c
<<
>>
Prefs
   1/*
   2 *      Generic address resolution entity
   3 *
   4 *      Authors:
   5 *      Pedro Roque             <roque@di.fc.ul.pt>
   6 *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 *      Fixes:
  14 *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
  15 *      Harald Welte            Add neighbour cache statistics like rtstat
  16 */
  17
  18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19
  20#include <linux/slab.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/socket.h>
  25#include <linux/netdevice.h>
  26#include <linux/proc_fs.h>
  27#ifdef CONFIG_SYSCTL
  28#include <linux/sysctl.h>
  29#endif
  30#include <linux/times.h>
  31#include <net/net_namespace.h>
  32#include <net/neighbour.h>
  33#include <net/dst.h>
  34#include <net/sock.h>
  35#include <net/netevent.h>
  36#include <net/netlink.h>
  37#include <linux/rtnetlink.h>
  38#include <linux/random.h>
  39#include <linux/string.h>
  40#include <linux/log2.h>
  41#include <linux/inetdevice.h>
  42#include <net/addrconf.h>
  43
  44#define DEBUG
  45#define NEIGH_DEBUG 1
  46#define neigh_dbg(level, fmt, ...)              \
  47do {                                            \
  48        if (level <= NEIGH_DEBUG)               \
  49                pr_debug(fmt, ##__VA_ARGS__);   \
  50} while (0)
  51
  52#define PNEIGH_HASHMASK         0xF
  53
  54static void neigh_timer_handler(struct timer_list *t);
  55static void __neigh_notify(struct neighbour *n, int type, int flags,
  56                           u32 pid);
  57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
  58static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
  59                                    struct net_device *dev);
  60
  61#ifdef CONFIG_PROC_FS
  62static const struct seq_operations neigh_stat_seq_ops;
  63#endif
  64
  65/*
  66   Neighbour hash table buckets are protected with rwlock tbl->lock.
  67
  68   - All the scans/updates to hash buckets MUST be made under this lock.
  69   - NOTHING clever should be made under this lock: no callbacks
  70     to protocol backends, no attempts to send something to network.
  71     It will result in deadlocks, if backend/driver wants to use neighbour
  72     cache.
  73   - If the entry requires some non-trivial actions, increase
  74     its reference count and release table lock.
  75
  76   Neighbour entries are protected:
  77   - with reference count.
  78   - with rwlock neigh->lock
  79
  80   Reference count prevents destruction.
  81
  82   neigh->lock mainly serializes ll address data and its validity state.
  83   However, the same lock is used to protect another entry fields:
  84    - timer
  85    - resolution queue
  86
  87   Again, nothing clever shall be made under neigh->lock,
  88   the most complicated procedure, which we allow is dev->hard_header.
  89   It is supposed, that dev->hard_header is simplistic and does
  90   not make callbacks to neighbour tables.
  91 */
  92
  93static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
  94{
  95        kfree_skb(skb);
  96        return -ENETDOWN;
  97}
  98
  99static void neigh_cleanup_and_release(struct neighbour *neigh)
 100{
 101        if (neigh->parms->neigh_cleanup)
 102                neigh->parms->neigh_cleanup(neigh);
 103
 104        __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
 105        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
 106        neigh_release(neigh);
 107}
 108
 109/*
 110 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 111 * It corresponds to default IPv6 settings and is not overridable,
 112 * because it is really reasonable choice.
 113 */
 114
 115unsigned long neigh_rand_reach_time(unsigned long base)
 116{
 117        return base ? (prandom_u32() % base) + (base >> 1) : 0;
 118}
 119EXPORT_SYMBOL(neigh_rand_reach_time);
 120
 121
 122static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
 123                      struct neighbour __rcu **np, struct neigh_table *tbl)
 124{
 125        bool retval = false;
 126
 127        write_lock(&n->lock);
 128        if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
 129            !(n->flags & flags)) {
 130                struct neighbour *neigh;
 131
 132                neigh = rcu_dereference_protected(n->next,
 133                                                  lockdep_is_held(&tbl->lock));
 134                rcu_assign_pointer(*np, neigh);
 135                n->dead = 1;
 136                retval = true;
 137        }
 138        write_unlock(&n->lock);
 139        if (retval)
 140                neigh_cleanup_and_release(n);
 141        return retval;
 142}
 143
 144bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 145{
 146        struct neigh_hash_table *nht;
 147        void *pkey = ndel->primary_key;
 148        u32 hash_val;
 149        struct neighbour *n;
 150        struct neighbour __rcu **np;
 151
 152        nht = rcu_dereference_protected(tbl->nht,
 153                                        lockdep_is_held(&tbl->lock));
 154        hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
 155        hash_val = hash_val >> (32 - nht->hash_shift);
 156
 157        np = &nht->hash_buckets[hash_val];
 158        while ((n = rcu_dereference_protected(*np,
 159                                              lockdep_is_held(&tbl->lock)))) {
 160                if (n == ndel)
 161                        return neigh_del(n, 0, 0, np, tbl);
 162                np = &n->next;
 163        }
 164        return false;
 165}
 166
 167static int neigh_forced_gc(struct neigh_table *tbl)
 168{
 169        int shrunk = 0;
 170        int i;
 171        struct neigh_hash_table *nht;
 172
 173        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 174
 175        write_lock_bh(&tbl->lock);
 176        nht = rcu_dereference_protected(tbl->nht,
 177                                        lockdep_is_held(&tbl->lock));
 178        for (i = 0; i < (1 << nht->hash_shift); i++) {
 179                struct neighbour *n;
 180                struct neighbour __rcu **np;
 181
 182                np = &nht->hash_buckets[i];
 183                while ((n = rcu_dereference_protected(*np,
 184                                        lockdep_is_held(&tbl->lock))) != NULL) {
 185                        /* Neighbour record may be discarded if:
 186                         * - nobody refers to it.
 187                         * - it is not permanent
 188                         */
 189                        if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
 190                                      tbl)) {
 191                                shrunk = 1;
 192                                continue;
 193                        }
 194                        np = &n->next;
 195                }
 196        }
 197
 198        tbl->last_flush = jiffies;
 199
 200        write_unlock_bh(&tbl->lock);
 201
 202        return shrunk;
 203}
 204
 205static void neigh_add_timer(struct neighbour *n, unsigned long when)
 206{
 207        neigh_hold(n);
 208        if (unlikely(mod_timer(&n->timer, when))) {
 209                printk("NEIGH: BUG, double timer add, state is %x\n",
 210                       n->nud_state);
 211                dump_stack();
 212        }
 213}
 214
 215static int neigh_del_timer(struct neighbour *n)
 216{
 217        if ((n->nud_state & NUD_IN_TIMER) &&
 218            del_timer(&n->timer)) {
 219                neigh_release(n);
 220                return 1;
 221        }
 222        return 0;
 223}
 224
 225static void pneigh_queue_purge(struct sk_buff_head *list)
 226{
 227        struct sk_buff *skb;
 228
 229        while ((skb = skb_dequeue(list)) != NULL) {
 230                dev_put(skb->dev);
 231                kfree_skb(skb);
 232        }
 233}
 234
 235static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 236                            bool skip_perm)
 237{
 238        int i;
 239        struct neigh_hash_table *nht;
 240
 241        nht = rcu_dereference_protected(tbl->nht,
 242                                        lockdep_is_held(&tbl->lock));
 243
 244        for (i = 0; i < (1 << nht->hash_shift); i++) {
 245                struct neighbour *n;
 246                struct neighbour __rcu **np = &nht->hash_buckets[i];
 247
 248                while ((n = rcu_dereference_protected(*np,
 249                                        lockdep_is_held(&tbl->lock))) != NULL) {
 250                        if (dev && n->dev != dev) {
 251                                np = &n->next;
 252                                continue;
 253                        }
 254                        if (skip_perm && n->nud_state & NUD_PERMANENT) {
 255                                np = &n->next;
 256                                continue;
 257                        }
 258                        rcu_assign_pointer(*np,
 259                                   rcu_dereference_protected(n->next,
 260                                                lockdep_is_held(&tbl->lock)));
 261                        write_lock(&n->lock);
 262                        neigh_del_timer(n);
 263                        n->dead = 1;
 264
 265                        if (refcount_read(&n->refcnt) != 1) {
 266                                /* The most unpleasant situation.
 267                                   We must destroy neighbour entry,
 268                                   but someone still uses it.
 269
 270                                   The destroy will be delayed until
 271                                   the last user releases us, but
 272                                   we must kill timers etc. and move
 273                                   it to safe state.
 274                                 */
 275                                __skb_queue_purge(&n->arp_queue);
 276                                n->arp_queue_len_bytes = 0;
 277                                n->output = neigh_blackhole;
 278                                if (n->nud_state & NUD_VALID)
 279                                        n->nud_state = NUD_NOARP;
 280                                else
 281                                        n->nud_state = NUD_NONE;
 282                                neigh_dbg(2, "neigh %p is stray\n", n);
 283                        }
 284                        write_unlock(&n->lock);
 285                        neigh_cleanup_and_release(n);
 286                }
 287        }
 288}
 289
 290void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 291{
 292        write_lock_bh(&tbl->lock);
 293        neigh_flush_dev(tbl, dev, false);
 294        write_unlock_bh(&tbl->lock);
 295}
 296EXPORT_SYMBOL(neigh_changeaddr);
 297
 298static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
 299                          bool skip_perm)
 300{
 301        write_lock_bh(&tbl->lock);
 302        neigh_flush_dev(tbl, dev, skip_perm);
 303        pneigh_ifdown_and_unlock(tbl, dev);
 304
 305        del_timer_sync(&tbl->proxy_timer);
 306        pneigh_queue_purge(&tbl->proxy_queue);
 307        return 0;
 308}
 309
 310int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
 311{
 312        __neigh_ifdown(tbl, dev, true);
 313        return 0;
 314}
 315EXPORT_SYMBOL(neigh_carrier_down);
 316
 317int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 318{
 319        __neigh_ifdown(tbl, dev, false);
 320        return 0;
 321}
 322EXPORT_SYMBOL(neigh_ifdown);
 323
 324static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 325{
 326        struct neighbour *n = NULL;
 327        unsigned long now = jiffies;
 328        int entries;
 329
 330        entries = atomic_inc_return(&tbl->entries) - 1;
 331        if (entries >= tbl->gc_thresh3 ||
 332            (entries >= tbl->gc_thresh2 &&
 333             time_after(now, tbl->last_flush + 5 * HZ))) {
 334                if (!neigh_forced_gc(tbl) &&
 335                    entries >= tbl->gc_thresh3) {
 336                        net_info_ratelimited("%s: neighbor table overflow!\n",
 337                                             tbl->id);
 338                        NEIGH_CACHE_STAT_INC(tbl, table_fulls);
 339                        goto out_entries;
 340                }
 341        }
 342
 343        n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
 344        if (!n)
 345                goto out_entries;
 346
 347        __skb_queue_head_init(&n->arp_queue);
 348        rwlock_init(&n->lock);
 349        seqlock_init(&n->ha_lock);
 350        n->updated        = n->used = now;
 351        n->nud_state      = NUD_NONE;
 352        n->output         = neigh_blackhole;
 353        seqlock_init(&n->hh.hh_lock);
 354        n->parms          = neigh_parms_clone(&tbl->parms);
 355        timer_setup(&n->timer, neigh_timer_handler, 0);
 356
 357        NEIGH_CACHE_STAT_INC(tbl, allocs);
 358        n->tbl            = tbl;
 359        refcount_set(&n->refcnt, 1);
 360        n->dead           = 1;
 361out:
 362        return n;
 363
 364out_entries:
 365        atomic_dec(&tbl->entries);
 366        goto out;
 367}
 368
 369static void neigh_get_hash_rnd(u32 *x)
 370{
 371        *x = get_random_u32() | 1;
 372}
 373
 374static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 375{
 376        size_t size = (1 << shift) * sizeof(struct neighbour *);
 377        struct neigh_hash_table *ret;
 378        struct neighbour __rcu **buckets;
 379        int i;
 380
 381        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 382        if (!ret)
 383                return NULL;
 384        if (size <= PAGE_SIZE)
 385                buckets = kzalloc(size, GFP_ATOMIC);
 386        else
 387                buckets = (struct neighbour __rcu **)
 388                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 389                                           get_order(size));
 390        if (!buckets) {
 391                kfree(ret);
 392                return NULL;
 393        }
 394        ret->hash_buckets = buckets;
 395        ret->hash_shift = shift;
 396        for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
 397                neigh_get_hash_rnd(&ret->hash_rnd[i]);
 398        return ret;
 399}
 400
 401static void neigh_hash_free_rcu(struct rcu_head *head)
 402{
 403        struct neigh_hash_table *nht = container_of(head,
 404                                                    struct neigh_hash_table,
 405                                                    rcu);
 406        size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 407        struct neighbour __rcu **buckets = nht->hash_buckets;
 408
 409        if (size <= PAGE_SIZE)
 410                kfree(buckets);
 411        else
 412                free_pages((unsigned long)buckets, get_order(size));
 413        kfree(nht);
 414}
 415
 416static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 417                                                unsigned long new_shift)
 418{
 419        unsigned int i, hash;
 420        struct neigh_hash_table *new_nht, *old_nht;
 421
 422        NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 423
 424        old_nht = rcu_dereference_protected(tbl->nht,
 425                                            lockdep_is_held(&tbl->lock));
 426        new_nht = neigh_hash_alloc(new_shift);
 427        if (!new_nht)
 428                return old_nht;
 429
 430        for (i = 0; i < (1 << old_nht->hash_shift); i++) {
 431                struct neighbour *n, *next;
 432
 433                for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
 434                                                   lockdep_is_held(&tbl->lock));
 435                     n != NULL;
 436                     n = next) {
 437                        hash = tbl->hash(n->primary_key, n->dev,
 438                                         new_nht->hash_rnd);
 439
 440                        hash >>= (32 - new_nht->hash_shift);
 441                        next = rcu_dereference_protected(n->next,
 442                                                lockdep_is_held(&tbl->lock));
 443
 444                        rcu_assign_pointer(n->next,
 445                                           rcu_dereference_protected(
 446                                                new_nht->hash_buckets[hash],
 447                                                lockdep_is_held(&tbl->lock)));
 448                        rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 449                }
 450        }
 451
 452        rcu_assign_pointer(tbl->nht, new_nht);
 453        call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
 454        return new_nht;
 455}
 456
 457struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 458                               struct net_device *dev)
 459{
 460        struct neighbour *n;
 461
 462        NEIGH_CACHE_STAT_INC(tbl, lookups);
 463
 464        rcu_read_lock_bh();
 465        n = __neigh_lookup_noref(tbl, pkey, dev);
 466        if (n) {
 467                if (!refcount_inc_not_zero(&n->refcnt))
 468                        n = NULL;
 469                NEIGH_CACHE_STAT_INC(tbl, hits);
 470        }
 471
 472        rcu_read_unlock_bh();
 473        return n;
 474}
 475EXPORT_SYMBOL(neigh_lookup);
 476
 477struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 478                                     const void *pkey)
 479{
 480        struct neighbour *n;
 481        unsigned int key_len = tbl->key_len;
 482        u32 hash_val;
 483        struct neigh_hash_table *nht;
 484
 485        NEIGH_CACHE_STAT_INC(tbl, lookups);
 486
 487        rcu_read_lock_bh();
 488        nht = rcu_dereference_bh(tbl->nht);
 489        hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
 490
 491        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 492             n != NULL;
 493             n = rcu_dereference_bh(n->next)) {
 494                if (!memcmp(n->primary_key, pkey, key_len) &&
 495                    net_eq(dev_net(n->dev), net)) {
 496                        if (!refcount_inc_not_zero(&n->refcnt))
 497                                n = NULL;
 498                        NEIGH_CACHE_STAT_INC(tbl, hits);
 499                        break;
 500                }
 501        }
 502
 503        rcu_read_unlock_bh();
 504        return n;
 505}
 506EXPORT_SYMBOL(neigh_lookup_nodev);
 507
 508struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 509                                 struct net_device *dev, bool want_ref)
 510{
 511        u32 hash_val;
 512        unsigned int key_len = tbl->key_len;
 513        int error;
 514        struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 515        struct neigh_hash_table *nht;
 516
 517        if (!n) {
 518                rc = ERR_PTR(-ENOBUFS);
 519                goto out;
 520        }
 521
 522        memcpy(n->primary_key, pkey, key_len);
 523        n->dev = dev;
 524        dev_hold(dev);
 525
 526        /* Protocol specific setup. */
 527        if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
 528                rc = ERR_PTR(error);
 529                goto out_neigh_release;
 530        }
 531
 532        if (dev->netdev_ops->ndo_neigh_construct) {
 533                error = dev->netdev_ops->ndo_neigh_construct(dev, n);
 534                if (error < 0) {
 535                        rc = ERR_PTR(error);
 536                        goto out_neigh_release;
 537                }
 538        }
 539
 540        /* Device specific setup. */
 541        if (n->parms->neigh_setup &&
 542            (error = n->parms->neigh_setup(n)) < 0) {
 543                rc = ERR_PTR(error);
 544                goto out_neigh_release;
 545        }
 546
 547        n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
 548
 549        write_lock_bh(&tbl->lock);
 550        nht = rcu_dereference_protected(tbl->nht,
 551                                        lockdep_is_held(&tbl->lock));
 552
 553        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
 554                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 555
 556        hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 557
 558        if (n->parms->dead) {
 559                rc = ERR_PTR(-EINVAL);
 560                goto out_tbl_unlock;
 561        }
 562
 563        for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
 564                                            lockdep_is_held(&tbl->lock));
 565             n1 != NULL;
 566             n1 = rcu_dereference_protected(n1->next,
 567                        lockdep_is_held(&tbl->lock))) {
 568                if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
 569                        if (want_ref)
 570                                neigh_hold(n1);
 571                        rc = n1;
 572                        goto out_tbl_unlock;
 573                }
 574        }
 575
 576        n->dead = 0;
 577        if (want_ref)
 578                neigh_hold(n);
 579        rcu_assign_pointer(n->next,
 580                           rcu_dereference_protected(nht->hash_buckets[hash_val],
 581                                                     lockdep_is_held(&tbl->lock)));
 582        rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 583        write_unlock_bh(&tbl->lock);
 584        neigh_dbg(2, "neigh %p is created\n", n);
 585        rc = n;
 586out:
 587        return rc;
 588out_tbl_unlock:
 589        write_unlock_bh(&tbl->lock);
 590out_neigh_release:
 591        neigh_release(n);
 592        goto out;
 593}
 594EXPORT_SYMBOL(__neigh_create);
 595
 596static u32 pneigh_hash(const void *pkey, unsigned int key_len)
 597{
 598        u32 hash_val = *(u32 *)(pkey + key_len - 4);
 599        hash_val ^= (hash_val >> 16);
 600        hash_val ^= hash_val >> 8;
 601        hash_val ^= hash_val >> 4;
 602        hash_val &= PNEIGH_HASHMASK;
 603        return hash_val;
 604}
 605
 606static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 607                                              struct net *net,
 608                                              const void *pkey,
 609                                              unsigned int key_len,
 610                                              struct net_device *dev)
 611{
 612        while (n) {
 613                if (!memcmp(n->key, pkey, key_len) &&
 614                    net_eq(pneigh_net(n), net) &&
 615                    (n->dev == dev || !n->dev))
 616                        return n;
 617                n = n->next;
 618        }
 619        return NULL;
 620}
 621
 622struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
 623                struct net *net, const void *pkey, struct net_device *dev)
 624{
 625        unsigned int key_len = tbl->key_len;
 626        u32 hash_val = pneigh_hash(pkey, key_len);
 627
 628        return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 629                                 net, pkey, key_len, dev);
 630}
 631EXPORT_SYMBOL_GPL(__pneigh_lookup);
 632
 633struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 634                                    struct net *net, const void *pkey,
 635                                    struct net_device *dev, int creat)
 636{
 637        struct pneigh_entry *n;
 638        unsigned int key_len = tbl->key_len;
 639        u32 hash_val = pneigh_hash(pkey, key_len);
 640
 641        read_lock_bh(&tbl->lock);
 642        n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 643                              net, pkey, key_len, dev);
 644        read_unlock_bh(&tbl->lock);
 645
 646        if (n || !creat)
 647                goto out;
 648
 649        ASSERT_RTNL();
 650
 651        n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 652        if (!n)
 653                goto out;
 654
 655        write_pnet(&n->net, net);
 656        memcpy(n->key, pkey, key_len);
 657        n->dev = dev;
 658        if (dev)
 659                dev_hold(dev);
 660
 661        if (tbl->pconstructor && tbl->pconstructor(n)) {
 662                if (dev)
 663                        dev_put(dev);
 664                kfree(n);
 665                n = NULL;
 666                goto out;
 667        }
 668
 669        write_lock_bh(&tbl->lock);
 670        n->next = tbl->phash_buckets[hash_val];
 671        tbl->phash_buckets[hash_val] = n;
 672        write_unlock_bh(&tbl->lock);
 673out:
 674        return n;
 675}
 676EXPORT_SYMBOL(pneigh_lookup);
 677
 678
 679int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 680                  struct net_device *dev)
 681{
 682        struct pneigh_entry *n, **np;
 683        unsigned int key_len = tbl->key_len;
 684        u32 hash_val = pneigh_hash(pkey, key_len);
 685
 686        write_lock_bh(&tbl->lock);
 687        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 688             np = &n->next) {
 689                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
 690                    net_eq(pneigh_net(n), net)) {
 691                        *np = n->next;
 692                        write_unlock_bh(&tbl->lock);
 693                        if (tbl->pdestructor)
 694                                tbl->pdestructor(n);
 695                        if (n->dev)
 696                                dev_put(n->dev);
 697                        kfree(n);
 698                        return 0;
 699                }
 700        }
 701        write_unlock_bh(&tbl->lock);
 702        return -ENOENT;
 703}
 704
 705static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
 706                                    struct net_device *dev)
 707{
 708        struct pneigh_entry *n, **np, *freelist = NULL;
 709        u32 h;
 710
 711        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
 712                np = &tbl->phash_buckets[h];
 713                while ((n = *np) != NULL) {
 714                        if (!dev || n->dev == dev) {
 715                                *np = n->next;
 716                                n->next = freelist;
 717                                freelist = n;
 718                                continue;
 719                        }
 720                        np = &n->next;
 721                }
 722        }
 723        write_unlock_bh(&tbl->lock);
 724        while ((n = freelist)) {
 725                freelist = n->next;
 726                n->next = NULL;
 727                if (tbl->pdestructor)
 728                        tbl->pdestructor(n);
 729                if (n->dev)
 730                        dev_put(n->dev);
 731                kfree(n);
 732        }
 733        return -ENOENT;
 734}
 735
 736static void neigh_parms_destroy(struct neigh_parms *parms);
 737
 738static inline void neigh_parms_put(struct neigh_parms *parms)
 739{
 740        if (refcount_dec_and_test(&parms->refcnt))
 741                neigh_parms_destroy(parms);
 742}
 743
 744/*
 745 *      neighbour must already be out of the table;
 746 *
 747 */
 748void neigh_destroy(struct neighbour *neigh)
 749{
 750        struct net_device *dev = neigh->dev;
 751
 752        NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 753
 754        if (!neigh->dead) {
 755                pr_warn("Destroying alive neighbour %p\n", neigh);
 756                dump_stack();
 757                return;
 758        }
 759
 760        if (neigh_del_timer(neigh))
 761                pr_warn("Impossible event\n");
 762
 763        write_lock_bh(&neigh->lock);
 764        __skb_queue_purge(&neigh->arp_queue);
 765        write_unlock_bh(&neigh->lock);
 766        neigh->arp_queue_len_bytes = 0;
 767
 768        if (dev->netdev_ops->ndo_neigh_destroy)
 769                dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
 770
 771        dev_put(dev);
 772        neigh_parms_put(neigh->parms);
 773
 774        neigh_dbg(2, "neigh %p is destroyed\n", neigh);
 775
 776        atomic_dec(&neigh->tbl->entries);
 777        kfree_rcu(neigh, rcu);
 778}
 779EXPORT_SYMBOL(neigh_destroy);
 780
 781/* Neighbour state is suspicious;
 782   disable fast path.
 783
 784   Called with write_locked neigh.
 785 */
 786static void neigh_suspect(struct neighbour *neigh)
 787{
 788        neigh_dbg(2, "neigh %p is suspected\n", neigh);
 789
 790        neigh->output = neigh->ops->output;
 791}
 792
 793/* Neighbour state is OK;
 794   enable fast path.
 795
 796   Called with write_locked neigh.
 797 */
 798static void neigh_connect(struct neighbour *neigh)
 799{
 800        neigh_dbg(2, "neigh %p is connected\n", neigh);
 801
 802        neigh->output = neigh->ops->connected_output;
 803}
 804
 805static void neigh_periodic_work(struct work_struct *work)
 806{
 807        struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 808        struct neighbour *n;
 809        struct neighbour __rcu **np;
 810        unsigned int i;
 811        struct neigh_hash_table *nht;
 812
 813        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 814
 815        write_lock_bh(&tbl->lock);
 816        nht = rcu_dereference_protected(tbl->nht,
 817                                        lockdep_is_held(&tbl->lock));
 818
 819        /*
 820         *      periodically recompute ReachableTime from random function
 821         */
 822
 823        if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 824                struct neigh_parms *p;
 825                tbl->last_rand = jiffies;
 826                list_for_each_entry(p, &tbl->parms_list, list)
 827                        p->reachable_time =
 828                                neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
 829        }
 830
 831        if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
 832                goto out;
 833
 834        for (i = 0 ; i < (1 << nht->hash_shift); i++) {
 835                np = &nht->hash_buckets[i];
 836
 837                while ((n = rcu_dereference_protected(*np,
 838                                lockdep_is_held(&tbl->lock))) != NULL) {
 839                        unsigned int state;
 840
 841                        write_lock(&n->lock);
 842
 843                        state = n->nud_state;
 844                        if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
 845                            (n->flags & NTF_EXT_LEARNED)) {
 846                                write_unlock(&n->lock);
 847                                goto next_elt;
 848                        }
 849
 850                        if (time_before(n->used, n->confirmed))
 851                                n->used = n->confirmed;
 852
 853                        if (refcount_read(&n->refcnt) == 1 &&
 854                            (state == NUD_FAILED ||
 855                             time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
 856                                *np = n->next;
 857                                n->dead = 1;
 858                                write_unlock(&n->lock);
 859                                neigh_cleanup_and_release(n);
 860                                continue;
 861                        }
 862                        write_unlock(&n->lock);
 863
 864next_elt:
 865                        np = &n->next;
 866                }
 867                /*
 868                 * It's fine to release lock here, even if hash table
 869                 * grows while we are preempted.
 870                 */
 871                write_unlock_bh(&tbl->lock);
 872                cond_resched();
 873                write_lock_bh(&tbl->lock);
 874                nht = rcu_dereference_protected(tbl->nht,
 875                                                lockdep_is_held(&tbl->lock));
 876        }
 877out:
 878        /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
 879         * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
 880         * BASE_REACHABLE_TIME.
 881         */
 882        queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
 883                              NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
 884        write_unlock_bh(&tbl->lock);
 885}
 886
 887static __inline__ int neigh_max_probes(struct neighbour *n)
 888{
 889        struct neigh_parms *p = n->parms;
 890        return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
 891               (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
 892                NEIGH_VAR(p, MCAST_PROBES));
 893}
 894
 895static void neigh_invalidate(struct neighbour *neigh)
 896        __releases(neigh->lock)
 897        __acquires(neigh->lock)
 898{
 899        struct sk_buff *skb;
 900
 901        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
 902        neigh_dbg(2, "neigh %p is failed\n", neigh);
 903        neigh->updated = jiffies;
 904
 905        /* It is very thin place. report_unreachable is very complicated
 906           routine. Particularly, it can hit the same neighbour entry!
 907
 908           So that, we try to be accurate and avoid dead loop. --ANK
 909         */
 910        while (neigh->nud_state == NUD_FAILED &&
 911               (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
 912                write_unlock(&neigh->lock);
 913                neigh->ops->error_report(neigh, skb);
 914                write_lock(&neigh->lock);
 915        }
 916        __skb_queue_purge(&neigh->arp_queue);
 917        neigh->arp_queue_len_bytes = 0;
 918}
 919
 920static void neigh_probe(struct neighbour *neigh)
 921        __releases(neigh->lock)
 922{
 923        struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
 924        /* keep skb alive even if arp_queue overflows */
 925        if (skb)
 926                skb = skb_clone(skb, GFP_ATOMIC);
 927        write_unlock(&neigh->lock);
 928        if (neigh->ops->solicit)
 929                neigh->ops->solicit(neigh, skb);
 930        atomic_inc(&neigh->probes);
 931        kfree_skb(skb);
 932}
 933
 934/* Called when a timer expires for a neighbour entry. */
 935
 936static void neigh_timer_handler(struct timer_list *t)
 937{
 938        unsigned long now, next;
 939        struct neighbour *neigh = from_timer(neigh, t, timer);
 940        unsigned int state;
 941        int notify = 0;
 942
 943        write_lock(&neigh->lock);
 944
 945        state = neigh->nud_state;
 946        now = jiffies;
 947        next = now + HZ;
 948
 949        if (!(state & NUD_IN_TIMER))
 950                goto out;
 951
 952        if (state & NUD_REACHABLE) {
 953                if (time_before_eq(now,
 954                                   neigh->confirmed + neigh->parms->reachable_time)) {
 955                        neigh_dbg(2, "neigh %p is still alive\n", neigh);
 956                        next = neigh->confirmed + neigh->parms->reachable_time;
 957                } else if (time_before_eq(now,
 958                                          neigh->used +
 959                                          NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
 960                        neigh_dbg(2, "neigh %p is delayed\n", neigh);
 961                        neigh->nud_state = NUD_DELAY;
 962                        neigh->updated = jiffies;
 963                        neigh_suspect(neigh);
 964                        next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
 965                } else {
 966                        neigh_dbg(2, "neigh %p is suspected\n", neigh);
 967                        neigh->nud_state = NUD_STALE;
 968                        neigh->updated = jiffies;
 969                        neigh_suspect(neigh);
 970                        notify = 1;
 971                }
 972        } else if (state & NUD_DELAY) {
 973                if (time_before_eq(now,
 974                                   neigh->confirmed +
 975                                   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
 976                        neigh_dbg(2, "neigh %p is now reachable\n", neigh);
 977                        neigh->nud_state = NUD_REACHABLE;
 978                        neigh->updated = jiffies;
 979                        neigh_connect(neigh);
 980                        notify = 1;
 981                        next = neigh->confirmed + neigh->parms->reachable_time;
 982                } else {
 983                        neigh_dbg(2, "neigh %p is probed\n", neigh);
 984                        neigh->nud_state = NUD_PROBE;
 985                        neigh->updated = jiffies;
 986                        atomic_set(&neigh->probes, 0);
 987                        notify = 1;
 988                        next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
 989                }
 990        } else {
 991                /* NUD_PROBE|NUD_INCOMPLETE */
 992                next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
 993        }
 994
 995        if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
 996            atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
 997                neigh->nud_state = NUD_FAILED;
 998                notify = 1;
 999                neigh_invalidate(neigh);
1000                goto out;
1001        }
1002
1003        if (neigh->nud_state & NUD_IN_TIMER) {
1004                if (time_before(next, jiffies + HZ/2))
1005                        next = jiffies + HZ/2;
1006                if (!mod_timer(&neigh->timer, next))
1007                        neigh_hold(neigh);
1008        }
1009        if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1010                neigh_probe(neigh);
1011        } else {
1012out:
1013                write_unlock(&neigh->lock);
1014        }
1015
1016        if (notify)
1017                neigh_update_notify(neigh, 0);
1018
1019        neigh_release(neigh);
1020}
1021
1022int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1023{
1024        int rc;
1025        bool immediate_probe = false;
1026
1027        write_lock_bh(&neigh->lock);
1028
1029        rc = 0;
1030        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1031                goto out_unlock_bh;
1032        if (neigh->dead)
1033                goto out_dead;
1034
1035        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1036                if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1037                    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1038                        unsigned long next, now = jiffies;
1039
1040                        atomic_set(&neigh->probes,
1041                                   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1042                        neigh->nud_state     = NUD_INCOMPLETE;
1043                        neigh->updated = now;
1044                        next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1045                                         HZ/2);
1046                        neigh_add_timer(neigh, next);
1047                        immediate_probe = true;
1048                } else {
1049                        neigh->nud_state = NUD_FAILED;
1050                        neigh->updated = jiffies;
1051                        write_unlock_bh(&neigh->lock);
1052
1053                        kfree_skb(skb);
1054                        return 1;
1055                }
1056        } else if (neigh->nud_state & NUD_STALE) {
1057                neigh_dbg(2, "neigh %p is delayed\n", neigh);
1058                neigh->nud_state = NUD_DELAY;
1059                neigh->updated = jiffies;
1060                neigh_add_timer(neigh, jiffies +
1061                                NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1062        }
1063
1064        if (neigh->nud_state == NUD_INCOMPLETE) {
1065                if (skb) {
1066                        while (neigh->arp_queue_len_bytes + skb->truesize >
1067                               NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1068                                struct sk_buff *buff;
1069
1070                                buff = __skb_dequeue(&neigh->arp_queue);
1071                                if (!buff)
1072                                        break;
1073                                neigh->arp_queue_len_bytes -= buff->truesize;
1074                                kfree_skb(buff);
1075                                NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1076                        }
1077                        skb_dst_force(skb);
1078                        __skb_queue_tail(&neigh->arp_queue, skb);
1079                        neigh->arp_queue_len_bytes += skb->truesize;
1080                }
1081                rc = 1;
1082        }
1083out_unlock_bh:
1084        if (immediate_probe)
1085                neigh_probe(neigh);
1086        else
1087                write_unlock(&neigh->lock);
1088        local_bh_enable();
1089        return rc;
1090
1091out_dead:
1092        if (neigh->nud_state & NUD_STALE)
1093                goto out_unlock_bh;
1094        write_unlock_bh(&neigh->lock);
1095        kfree_skb(skb);
1096        return 1;
1097}
1098EXPORT_SYMBOL(__neigh_event_send);
1099
1100static void neigh_update_hhs(struct neighbour *neigh)
1101{
1102        struct hh_cache *hh;
1103        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1104                = NULL;
1105
1106        if (neigh->dev->header_ops)
1107                update = neigh->dev->header_ops->cache_update;
1108
1109        if (update) {
1110                hh = &neigh->hh;
1111                if (hh->hh_len) {
1112                        write_seqlock_bh(&hh->hh_lock);
1113                        update(hh, neigh->dev, neigh->ha);
1114                        write_sequnlock_bh(&hh->hh_lock);
1115                }
1116        }
1117}
1118
1119
1120
1121/* Generic update routine.
1122   -- lladdr is new lladdr or NULL, if it is not supplied.
1123   -- new    is new state.
1124   -- flags
1125        NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1126                                if it is different.
1127        NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1128                                lladdr instead of overriding it
1129                                if it is different.
1130        NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1131
1132        NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1133                                NTF_ROUTER flag.
1134        NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1135                                a router.
1136
1137   Caller MUST hold reference count on the entry.
1138 */
1139
1140int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1141                 u32 flags, u32 nlmsg_pid)
1142{
1143        u8 old;
1144        int err;
1145        int notify = 0;
1146        struct net_device *dev;
1147        int update_isrouter = 0;
1148
1149        write_lock_bh(&neigh->lock);
1150
1151        dev    = neigh->dev;
1152        old    = neigh->nud_state;
1153        err    = -EPERM;
1154
1155        if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1156            (old & (NUD_NOARP | NUD_PERMANENT)))
1157                goto out;
1158        if (neigh->dead)
1159                goto out;
1160
1161        neigh_update_ext_learned(neigh, flags, &notify);
1162
1163        if (!(new & NUD_VALID)) {
1164                neigh_del_timer(neigh);
1165                if (old & NUD_CONNECTED)
1166                        neigh_suspect(neigh);
1167                neigh->nud_state = new;
1168                err = 0;
1169                notify = old & NUD_VALID;
1170                if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1171                    (new & NUD_FAILED)) {
1172                        neigh_invalidate(neigh);
1173                        notify = 1;
1174                }
1175                goto out;
1176        }
1177
1178        /* Compare new lladdr with cached one */
1179        if (!dev->addr_len) {
1180                /* First case: device needs no address. */
1181                lladdr = neigh->ha;
1182        } else if (lladdr) {
1183                /* The second case: if something is already cached
1184                   and a new address is proposed:
1185                   - compare new & old
1186                   - if they are different, check override flag
1187                 */
1188                if ((old & NUD_VALID) &&
1189                    !memcmp(lladdr, neigh->ha, dev->addr_len))
1190                        lladdr = neigh->ha;
1191        } else {
1192                /* No address is supplied; if we know something,
1193                   use it, otherwise discard the request.
1194                 */
1195                err = -EINVAL;
1196                if (!(old & NUD_VALID))
1197                        goto out;
1198                lladdr = neigh->ha;
1199        }
1200
1201        /* Update confirmed timestamp for neighbour entry after we
1202         * received ARP packet even if it doesn't change IP to MAC binding.
1203         */
1204        if (new & NUD_CONNECTED)
1205                neigh->confirmed = jiffies;
1206
1207        /* If entry was valid and address is not changed,
1208           do not change entry state, if new one is STALE.
1209         */
1210        err = 0;
1211        update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1212        if (old & NUD_VALID) {
1213                if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1214                        update_isrouter = 0;
1215                        if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1216                            (old & NUD_CONNECTED)) {
1217                                lladdr = neigh->ha;
1218                                new = NUD_STALE;
1219                        } else
1220                                goto out;
1221                } else {
1222                        if (lladdr == neigh->ha && new == NUD_STALE &&
1223                            !(flags & NEIGH_UPDATE_F_ADMIN))
1224                                new = old;
1225                }
1226        }
1227
1228        /* Update timestamp only once we know we will make a change to the
1229         * neighbour entry. Otherwise we risk to move the locktime window with
1230         * noop updates and ignore relevant ARP updates.
1231         */
1232        if (new != old || lladdr != neigh->ha)
1233                neigh->updated = jiffies;
1234
1235        if (new != old) {
1236                neigh_del_timer(neigh);
1237                if (new & NUD_PROBE)
1238                        atomic_set(&neigh->probes, 0);
1239                if (new & NUD_IN_TIMER)
1240                        neigh_add_timer(neigh, (jiffies +
1241                                                ((new & NUD_REACHABLE) ?
1242                                                 neigh->parms->reachable_time :
1243                                                 0)));
1244                neigh->nud_state = new;
1245                notify = 1;
1246        }
1247
1248        if (lladdr != neigh->ha) {
1249                write_seqlock(&neigh->ha_lock);
1250                memcpy(&neigh->ha, lladdr, dev->addr_len);
1251                write_sequnlock(&neigh->ha_lock);
1252                neigh_update_hhs(neigh);
1253                if (!(new & NUD_CONNECTED))
1254                        neigh->confirmed = jiffies -
1255                                      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1256                notify = 1;
1257        }
1258        if (new == old)
1259                goto out;
1260        if (new & NUD_CONNECTED)
1261                neigh_connect(neigh);
1262        else
1263                neigh_suspect(neigh);
1264        if (!(old & NUD_VALID)) {
1265                struct sk_buff *skb;
1266
1267                /* Again: avoid dead loop if something went wrong */
1268
1269                while (neigh->nud_state & NUD_VALID &&
1270                       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1271                        struct dst_entry *dst = skb_dst(skb);
1272                        struct neighbour *n2, *n1 = neigh;
1273                        write_unlock_bh(&neigh->lock);
1274
1275                        rcu_read_lock();
1276
1277                        /* Why not just use 'neigh' as-is?  The problem is that
1278                         * things such as shaper, eql, and sch_teql can end up
1279                         * using alternative, different, neigh objects to output
1280                         * the packet in the output path.  So what we need to do
1281                         * here is re-lookup the top-level neigh in the path so
1282                         * we can reinject the packet there.
1283                         */
1284                        n2 = NULL;
1285                        if (dst) {
1286                                n2 = dst_neigh_lookup_skb(dst, skb);
1287                                if (n2)
1288                                        n1 = n2;
1289                        }
1290                        n1->output(n1, skb);
1291                        if (n2)
1292                                neigh_release(n2);
1293                        rcu_read_unlock();
1294
1295                        write_lock_bh(&neigh->lock);
1296                }
1297                __skb_queue_purge(&neigh->arp_queue);
1298                neigh->arp_queue_len_bytes = 0;
1299        }
1300out:
1301        if (update_isrouter)
1302                neigh_update_is_router(neigh, flags, &notify);
1303        write_unlock_bh(&neigh->lock);
1304
1305        if (notify)
1306                neigh_update_notify(neigh, nlmsg_pid);
1307
1308        return err;
1309}
1310EXPORT_SYMBOL(neigh_update);
1311
1312/* Update the neigh to listen temporarily for probe responses, even if it is
1313 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1314 */
1315void __neigh_set_probe_once(struct neighbour *neigh)
1316{
1317        if (neigh->dead)
1318                return;
1319        neigh->updated = jiffies;
1320        if (!(neigh->nud_state & NUD_FAILED))
1321                return;
1322        neigh->nud_state = NUD_INCOMPLETE;
1323        atomic_set(&neigh->probes, neigh_max_probes(neigh));
1324        neigh_add_timer(neigh,
1325                        jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1326}
1327EXPORT_SYMBOL(__neigh_set_probe_once);
1328
1329struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1330                                 u8 *lladdr, void *saddr,
1331                                 struct net_device *dev)
1332{
1333        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1334                                                 lladdr || !dev->addr_len);
1335        if (neigh)
1336                neigh_update(neigh, lladdr, NUD_STALE,
1337                             NEIGH_UPDATE_F_OVERRIDE, 0);
1338        return neigh;
1339}
1340EXPORT_SYMBOL(neigh_event_ns);
1341
1342/* called with read_lock_bh(&n->lock); */
1343static void neigh_hh_init(struct neighbour *n)
1344{
1345        struct net_device *dev = n->dev;
1346        __be16 prot = n->tbl->protocol;
1347        struct hh_cache *hh = &n->hh;
1348
1349        write_lock_bh(&n->lock);
1350
1351        /* Only one thread can come in here and initialize the
1352         * hh_cache entry.
1353         */
1354        if (!hh->hh_len)
1355                dev->header_ops->cache(n, hh, prot);
1356
1357        write_unlock_bh(&n->lock);
1358}
1359
1360/* Slow and careful. */
1361
1362int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1363{
1364        int rc = 0;
1365
1366        if (!neigh_event_send(neigh, skb)) {
1367                int err;
1368                struct net_device *dev = neigh->dev;
1369                unsigned int seq;
1370
1371                if (dev->header_ops->cache && !neigh->hh.hh_len)
1372                        neigh_hh_init(neigh);
1373
1374                do {
1375                        __skb_pull(skb, skb_network_offset(skb));
1376                        seq = read_seqbegin(&neigh->ha_lock);
1377                        err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1378                                              neigh->ha, NULL, skb->len);
1379                } while (read_seqretry(&neigh->ha_lock, seq));
1380
1381                if (err >= 0)
1382                        rc = dev_queue_xmit(skb);
1383                else
1384                        goto out_kfree_skb;
1385        }
1386out:
1387        return rc;
1388out_kfree_skb:
1389        rc = -EINVAL;
1390        kfree_skb(skb);
1391        goto out;
1392}
1393EXPORT_SYMBOL(neigh_resolve_output);
1394
1395/* As fast as possible without hh cache */
1396
1397int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1398{
1399        struct net_device *dev = neigh->dev;
1400        unsigned int seq;
1401        int err;
1402
1403        do {
1404                __skb_pull(skb, skb_network_offset(skb));
1405                seq = read_seqbegin(&neigh->ha_lock);
1406                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1407                                      neigh->ha, NULL, skb->len);
1408        } while (read_seqretry(&neigh->ha_lock, seq));
1409
1410        if (err >= 0)
1411                err = dev_queue_xmit(skb);
1412        else {
1413                err = -EINVAL;
1414                kfree_skb(skb);
1415        }
1416        return err;
1417}
1418EXPORT_SYMBOL(neigh_connected_output);
1419
1420int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1421{
1422        return dev_queue_xmit(skb);
1423}
1424EXPORT_SYMBOL(neigh_direct_output);
1425
1426static void neigh_proxy_process(struct timer_list *t)
1427{
1428        struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1429        long sched_next = 0;
1430        unsigned long now = jiffies;
1431        struct sk_buff *skb, *n;
1432
1433        spin_lock(&tbl->proxy_queue.lock);
1434
1435        skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1436                long tdif = NEIGH_CB(skb)->sched_next - now;
1437
1438                if (tdif <= 0) {
1439                        struct net_device *dev = skb->dev;
1440
1441                        __skb_unlink(skb, &tbl->proxy_queue);
1442                        if (tbl->proxy_redo && netif_running(dev)) {
1443                                rcu_read_lock();
1444                                tbl->proxy_redo(skb);
1445                                rcu_read_unlock();
1446                        } else {
1447                                kfree_skb(skb);
1448                        }
1449
1450                        dev_put(dev);
1451                } else if (!sched_next || tdif < sched_next)
1452                        sched_next = tdif;
1453        }
1454        del_timer(&tbl->proxy_timer);
1455        if (sched_next)
1456                mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1457        spin_unlock(&tbl->proxy_queue.lock);
1458}
1459
1460void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1461                    struct sk_buff *skb)
1462{
1463        unsigned long now = jiffies;
1464
1465        unsigned long sched_next = now + (prandom_u32() %
1466                                          NEIGH_VAR(p, PROXY_DELAY));
1467
1468        if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1469                kfree_skb(skb);
1470                return;
1471        }
1472
1473        NEIGH_CB(skb)->sched_next = sched_next;
1474        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1475
1476        spin_lock(&tbl->proxy_queue.lock);
1477        if (del_timer(&tbl->proxy_timer)) {
1478                if (time_before(tbl->proxy_timer.expires, sched_next))
1479                        sched_next = tbl->proxy_timer.expires;
1480        }
1481        skb_dst_drop(skb);
1482        dev_hold(skb->dev);
1483        __skb_queue_tail(&tbl->proxy_queue, skb);
1484        mod_timer(&tbl->proxy_timer, sched_next);
1485        spin_unlock(&tbl->proxy_queue.lock);
1486}
1487EXPORT_SYMBOL(pneigh_enqueue);
1488
1489static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1490                                                      struct net *net, int ifindex)
1491{
1492        struct neigh_parms *p;
1493
1494        list_for_each_entry(p, &tbl->parms_list, list) {
1495                if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1496                    (!p->dev && !ifindex && net_eq(net, &init_net)))
1497                        return p;
1498        }
1499
1500        return NULL;
1501}
1502
1503struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1504                                      struct neigh_table *tbl)
1505{
1506        struct neigh_parms *p;
1507        struct net *net = dev_net(dev);
1508        const struct net_device_ops *ops = dev->netdev_ops;
1509
1510        p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1511        if (p) {
1512                p->tbl            = tbl;
1513                refcount_set(&p->refcnt, 1);
1514                p->reachable_time =
1515                                neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1516                dev_hold(dev);
1517                p->dev = dev;
1518                write_pnet(&p->net, net);
1519                p->sysctl_table = NULL;
1520
1521                if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1522                        dev_put(dev);
1523                        kfree(p);
1524                        return NULL;
1525                }
1526
1527                write_lock_bh(&tbl->lock);
1528                list_add(&p->list, &tbl->parms.list);
1529                write_unlock_bh(&tbl->lock);
1530
1531                neigh_parms_data_state_cleanall(p);
1532        }
1533        return p;
1534}
1535EXPORT_SYMBOL(neigh_parms_alloc);
1536
1537static void neigh_rcu_free_parms(struct rcu_head *head)
1538{
1539        struct neigh_parms *parms =
1540                container_of(head, struct neigh_parms, rcu_head);
1541
1542        neigh_parms_put(parms);
1543}
1544
1545void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1546{
1547        if (!parms || parms == &tbl->parms)
1548                return;
1549        write_lock_bh(&tbl->lock);
1550        list_del(&parms->list);
1551        parms->dead = 1;
1552        write_unlock_bh(&tbl->lock);
1553        if (parms->dev)
1554                dev_put(parms->dev);
1555        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1556}
1557EXPORT_SYMBOL(neigh_parms_release);
1558
1559static void neigh_parms_destroy(struct neigh_parms *parms)
1560{
1561        kfree(parms);
1562}
1563
1564static struct lock_class_key neigh_table_proxy_queue_class;
1565
1566static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1567
1568void neigh_table_init(int index, struct neigh_table *tbl)
1569{
1570        unsigned long now = jiffies;
1571        unsigned long phsize;
1572
1573        INIT_LIST_HEAD(&tbl->parms_list);
1574        list_add(&tbl->parms.list, &tbl->parms_list);
1575        write_pnet(&tbl->parms.net, &init_net);
1576        refcount_set(&tbl->parms.refcnt, 1);
1577        tbl->parms.reachable_time =
1578                          neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1579
1580        tbl->stats = alloc_percpu(struct neigh_statistics);
1581        if (!tbl->stats)
1582                panic("cannot create neighbour cache statistics");
1583
1584#ifdef CONFIG_PROC_FS
1585        if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1586                              &neigh_stat_seq_ops, tbl))
1587                panic("cannot create neighbour proc dir entry");
1588#endif
1589
1590        RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1591
1592        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1593        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1594
1595        if (!tbl->nht || !tbl->phash_buckets)
1596                panic("cannot allocate neighbour cache hashes");
1597
1598        if (!tbl->entry_size)
1599                tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1600                                        tbl->key_len, NEIGH_PRIV_ALIGN);
1601        else
1602                WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1603
1604        rwlock_init(&tbl->lock);
1605        INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1606        queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1607                        tbl->parms.reachable_time);
1608        timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1609        skb_queue_head_init_class(&tbl->proxy_queue,
1610                        &neigh_table_proxy_queue_class);
1611
1612        tbl->last_flush = now;
1613        tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1614
1615        neigh_tables[index] = tbl;
1616}
1617EXPORT_SYMBOL(neigh_table_init);
1618
1619int neigh_table_clear(int index, struct neigh_table *tbl)
1620{
1621        neigh_tables[index] = NULL;
1622        /* It is not clean... Fix it to unload IPv6 module safely */
1623        cancel_delayed_work_sync(&tbl->gc_work);
1624        del_timer_sync(&tbl->proxy_timer);
1625        pneigh_queue_purge(&tbl->proxy_queue);
1626        neigh_ifdown(tbl, NULL);
1627        if (atomic_read(&tbl->entries))
1628                pr_crit("neighbour leakage\n");
1629
1630        call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1631                 neigh_hash_free_rcu);
1632        tbl->nht = NULL;
1633
1634        kfree(tbl->phash_buckets);
1635        tbl->phash_buckets = NULL;
1636
1637        remove_proc_entry(tbl->id, init_net.proc_net_stat);
1638
1639        free_percpu(tbl->stats);
1640        tbl->stats = NULL;
1641
1642        return 0;
1643}
1644EXPORT_SYMBOL(neigh_table_clear);
1645
1646static struct neigh_table *neigh_find_table(int family)
1647{
1648        struct neigh_table *tbl = NULL;
1649
1650        switch (family) {
1651        case AF_INET:
1652                tbl = neigh_tables[NEIGH_ARP_TABLE];
1653                break;
1654        case AF_INET6:
1655                tbl = neigh_tables[NEIGH_ND_TABLE];
1656                break;
1657        case AF_DECnet:
1658                tbl = neigh_tables[NEIGH_DN_TABLE];
1659                break;
1660        }
1661
1662        return tbl;
1663}
1664
1665static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1666                        struct netlink_ext_ack *extack)
1667{
1668        struct net *net = sock_net(skb->sk);
1669        struct ndmsg *ndm;
1670        struct nlattr *dst_attr;
1671        struct neigh_table *tbl;
1672        struct neighbour *neigh;
1673        struct net_device *dev = NULL;
1674        int err = -EINVAL;
1675
1676        ASSERT_RTNL();
1677        if (nlmsg_len(nlh) < sizeof(*ndm))
1678                goto out;
1679
1680        dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1681        if (dst_attr == NULL)
1682                goto out;
1683
1684        ndm = nlmsg_data(nlh);
1685        if (ndm->ndm_ifindex) {
1686                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1687                if (dev == NULL) {
1688                        err = -ENODEV;
1689                        goto out;
1690                }
1691        }
1692
1693        tbl = neigh_find_table(ndm->ndm_family);
1694        if (tbl == NULL)
1695                return -EAFNOSUPPORT;
1696
1697        if (nla_len(dst_attr) < (int)tbl->key_len)
1698                goto out;
1699
1700        if (ndm->ndm_flags & NTF_PROXY) {
1701                err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1702                goto out;
1703        }
1704
1705        if (dev == NULL)
1706                goto out;
1707
1708        neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1709        if (neigh == NULL) {
1710                err = -ENOENT;
1711                goto out;
1712        }
1713
1714        err = neigh_update(neigh, NULL, NUD_FAILED,
1715                           NEIGH_UPDATE_F_OVERRIDE |
1716                           NEIGH_UPDATE_F_ADMIN,
1717                           NETLINK_CB(skb).portid);
1718        write_lock_bh(&tbl->lock);
1719        neigh_release(neigh);
1720        neigh_remove_one(neigh, tbl);
1721        write_unlock_bh(&tbl->lock);
1722
1723out:
1724        return err;
1725}
1726
1727static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1728                     struct netlink_ext_ack *extack)
1729{
1730        int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1731                NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1732        struct net *net = sock_net(skb->sk);
1733        struct ndmsg *ndm;
1734        struct nlattr *tb[NDA_MAX+1];
1735        struct neigh_table *tbl;
1736        struct net_device *dev = NULL;
1737        struct neighbour *neigh;
1738        void *dst, *lladdr;
1739        int err;
1740
1741        ASSERT_RTNL();
1742        err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1743        if (err < 0)
1744                goto out;
1745
1746        err = -EINVAL;
1747        if (tb[NDA_DST] == NULL)
1748                goto out;
1749
1750        ndm = nlmsg_data(nlh);
1751        if (ndm->ndm_ifindex) {
1752                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1753                if (dev == NULL) {
1754                        err = -ENODEV;
1755                        goto out;
1756                }
1757
1758                if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1759                        goto out;
1760        }
1761
1762        tbl = neigh_find_table(ndm->ndm_family);
1763        if (tbl == NULL)
1764                return -EAFNOSUPPORT;
1765
1766        if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1767                goto out;
1768        dst = nla_data(tb[NDA_DST]);
1769        lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1770
1771        if (ndm->ndm_flags & NTF_PROXY) {
1772                struct pneigh_entry *pn;
1773
1774                err = -ENOBUFS;
1775                pn = pneigh_lookup(tbl, net, dst, dev, 1);
1776                if (pn) {
1777                        pn->flags = ndm->ndm_flags;
1778                        err = 0;
1779                }
1780                goto out;
1781        }
1782
1783        if (dev == NULL)
1784                goto out;
1785
1786        neigh = neigh_lookup(tbl, dst, dev);
1787        if (neigh == NULL) {
1788                if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1789                        err = -ENOENT;
1790                        goto out;
1791                }
1792
1793                neigh = __neigh_lookup_errno(tbl, dst, dev);
1794                if (IS_ERR(neigh)) {
1795                        err = PTR_ERR(neigh);
1796                        goto out;
1797                }
1798        } else {
1799                if (nlh->nlmsg_flags & NLM_F_EXCL) {
1800                        err = -EEXIST;
1801                        neigh_release(neigh);
1802                        goto out;
1803                }
1804
1805                if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1806                        flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1807                                   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1808        }
1809
1810        if (ndm->ndm_flags & NTF_EXT_LEARNED)
1811                flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1812
1813        if (ndm->ndm_flags & NTF_ROUTER)
1814                flags |= NEIGH_UPDATE_F_ISROUTER;
1815
1816        if (ndm->ndm_flags & NTF_USE) {
1817                neigh_event_send(neigh, NULL);
1818                err = 0;
1819        } else
1820                err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1821                                   NETLINK_CB(skb).portid);
1822        neigh_release(neigh);
1823
1824out:
1825        return err;
1826}
1827
1828static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1829{
1830        struct nlattr *nest;
1831
1832        nest = nla_nest_start(skb, NDTA_PARMS);
1833        if (nest == NULL)
1834                return -ENOBUFS;
1835
1836        if ((parms->dev &&
1837             nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1838            nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1839            nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1840                        NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1841            /* approximative value for deprecated QUEUE_LEN (in packets) */
1842            nla_put_u32(skb, NDTPA_QUEUE_LEN,
1843                        NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1844            nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1845            nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1846            nla_put_u32(skb, NDTPA_UCAST_PROBES,
1847                        NEIGH_VAR(parms, UCAST_PROBES)) ||
1848            nla_put_u32(skb, NDTPA_MCAST_PROBES,
1849                        NEIGH_VAR(parms, MCAST_PROBES)) ||
1850            nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1851                        NEIGH_VAR(parms, MCAST_REPROBES)) ||
1852            nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1853                          NDTPA_PAD) ||
1854            nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1855                          NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1856            nla_put_msecs(skb, NDTPA_GC_STALETIME,
1857                          NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1858            nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1859                          NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1860            nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1861                          NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1862            nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1863                          NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1864            nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1865                          NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1866            nla_put_msecs(skb, NDTPA_LOCKTIME,
1867                          NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1868                goto nla_put_failure;
1869        return nla_nest_end(skb, nest);
1870
1871nla_put_failure:
1872        nla_nest_cancel(skb, nest);
1873        return -EMSGSIZE;
1874}
1875
1876static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1877                              u32 pid, u32 seq, int type, int flags)
1878{
1879        struct nlmsghdr *nlh;
1880        struct ndtmsg *ndtmsg;
1881
1882        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1883        if (nlh == NULL)
1884                return -EMSGSIZE;
1885
1886        ndtmsg = nlmsg_data(nlh);
1887
1888        read_lock_bh(&tbl->lock);
1889        ndtmsg->ndtm_family = tbl->family;
1890        ndtmsg->ndtm_pad1   = 0;
1891        ndtmsg->ndtm_pad2   = 0;
1892
1893        if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1894            nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1895            nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1896            nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1897            nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1898                goto nla_put_failure;
1899        {
1900                unsigned long now = jiffies;
1901                unsigned int flush_delta = now - tbl->last_flush;
1902                unsigned int rand_delta = now - tbl->last_rand;
1903                struct neigh_hash_table *nht;
1904                struct ndt_config ndc = {
1905                        .ndtc_key_len           = tbl->key_len,
1906                        .ndtc_entry_size        = tbl->entry_size,
1907                        .ndtc_entries           = atomic_read(&tbl->entries),
1908                        .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1909                        .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1910                        .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1911                };
1912
1913                rcu_read_lock_bh();
1914                nht = rcu_dereference_bh(tbl->nht);
1915                ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1916                ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1917                rcu_read_unlock_bh();
1918
1919                if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1920                        goto nla_put_failure;
1921        }
1922
1923        {
1924                int cpu;
1925                struct ndt_stats ndst;
1926
1927                memset(&ndst, 0, sizeof(ndst));
1928
1929                for_each_possible_cpu(cpu) {
1930                        struct neigh_statistics *st;
1931
1932                        st = per_cpu_ptr(tbl->stats, cpu);
1933                        ndst.ndts_allocs                += st->allocs;
1934                        ndst.ndts_destroys              += st->destroys;
1935                        ndst.ndts_hash_grows            += st->hash_grows;
1936                        ndst.ndts_res_failed            += st->res_failed;
1937                        ndst.ndts_lookups               += st->lookups;
1938                        ndst.ndts_hits                  += st->hits;
1939                        ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1940                        ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1941                        ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1942                        ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1943                        ndst.ndts_table_fulls           += st->table_fulls;
1944                }
1945
1946                if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1947                                  NDTA_PAD))
1948                        goto nla_put_failure;
1949        }
1950
1951        BUG_ON(tbl->parms.dev);
1952        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1953                goto nla_put_failure;
1954
1955        read_unlock_bh(&tbl->lock);
1956        nlmsg_end(skb, nlh);
1957        return 0;
1958
1959nla_put_failure:
1960        read_unlock_bh(&tbl->lock);
1961        nlmsg_cancel(skb, nlh);
1962        return -EMSGSIZE;
1963}
1964
1965static int neightbl_fill_param_info(struct sk_buff *skb,
1966                                    struct neigh_table *tbl,
1967                                    struct neigh_parms *parms,
1968                                    u32 pid, u32 seq, int type,
1969                                    unsigned int flags)
1970{
1971        struct ndtmsg *ndtmsg;
1972        struct nlmsghdr *nlh;
1973
1974        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1975        if (nlh == NULL)
1976                return -EMSGSIZE;
1977
1978        ndtmsg = nlmsg_data(nlh);
1979
1980        read_lock_bh(&tbl->lock);
1981        ndtmsg->ndtm_family = tbl->family;
1982        ndtmsg->ndtm_pad1   = 0;
1983        ndtmsg->ndtm_pad2   = 0;
1984
1985        if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1986            neightbl_fill_parms(skb, parms) < 0)
1987                goto errout;
1988
1989        read_unlock_bh(&tbl->lock);
1990        nlmsg_end(skb, nlh);
1991        return 0;
1992errout:
1993        read_unlock_bh(&tbl->lock);
1994        nlmsg_cancel(skb, nlh);
1995        return -EMSGSIZE;
1996}
1997
1998static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1999        [NDTA_NAME]             = { .type = NLA_STRING },
2000        [NDTA_THRESH1]          = { .type = NLA_U32 },
2001        [NDTA_THRESH2]          = { .type = NLA_U32 },
2002        [NDTA_THRESH3]          = { .type = NLA_U32 },
2003        [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
2004        [NDTA_PARMS]            = { .type = NLA_NESTED },
2005};
2006
2007static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2008        [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
2009        [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
2010        [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
2011        [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
2012        [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
2013        [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
2014        [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
2015        [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
2016        [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
2017        [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
2018        [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
2019        [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
2020        [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2021        [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2022};
2023
2024static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2025                        struct netlink_ext_ack *extack)
2026{
2027        struct net *net = sock_net(skb->sk);
2028        struct neigh_table *tbl;
2029        struct ndtmsg *ndtmsg;
2030        struct nlattr *tb[NDTA_MAX+1];
2031        bool found = false;
2032        int err, tidx;
2033
2034        err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2035                          nl_neightbl_policy, extack);
2036        if (err < 0)
2037                goto errout;
2038
2039        if (tb[NDTA_NAME] == NULL) {
2040                err = -EINVAL;
2041                goto errout;
2042        }
2043
2044        ndtmsg = nlmsg_data(nlh);
2045
2046        for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2047                tbl = neigh_tables[tidx];
2048                if (!tbl)
2049                        continue;
2050                if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2051                        continue;
2052                if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2053                        found = true;
2054                        break;
2055                }
2056        }
2057
2058        if (!found)
2059                return -ENOENT;
2060
2061        /*
2062         * We acquire tbl->lock to be nice to the periodic timers and
2063         * make sure they always see a consistent set of values.
2064         */
2065        write_lock_bh(&tbl->lock);
2066
2067        if (tb[NDTA_PARMS]) {
2068                struct nlattr *tbp[NDTPA_MAX+1];
2069                struct neigh_parms *p;
2070                int i, ifindex = 0;
2071
2072                err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2073                                       nl_ntbl_parm_policy, extack);
2074                if (err < 0)
2075                        goto errout_tbl_lock;
2076
2077                if (tbp[NDTPA_IFINDEX])
2078                        ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2079
2080                p = lookup_neigh_parms(tbl, net, ifindex);
2081                if (p == NULL) {
2082                        err = -ENOENT;
2083                        goto errout_tbl_lock;
2084                }
2085
2086                for (i = 1; i <= NDTPA_MAX; i++) {
2087                        if (tbp[i] == NULL)
2088                                continue;
2089
2090                        switch (i) {
2091                        case NDTPA_QUEUE_LEN:
2092                                NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2093                                              nla_get_u32(tbp[i]) *
2094                                              SKB_TRUESIZE(ETH_FRAME_LEN));
2095                                break;
2096                        case NDTPA_QUEUE_LENBYTES:
2097                                NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2098                                              nla_get_u32(tbp[i]));
2099                                break;
2100                        case NDTPA_PROXY_QLEN:
2101                                NEIGH_VAR_SET(p, PROXY_QLEN,
2102                                              nla_get_u32(tbp[i]));
2103                                break;
2104                        case NDTPA_APP_PROBES:
2105                                NEIGH_VAR_SET(p, APP_PROBES,
2106                                              nla_get_u32(tbp[i]));
2107                                break;
2108                        case NDTPA_UCAST_PROBES:
2109                                NEIGH_VAR_SET(p, UCAST_PROBES,
2110                                              nla_get_u32(tbp[i]));
2111                                break;
2112                        case NDTPA_MCAST_PROBES:
2113                                NEIGH_VAR_SET(p, MCAST_PROBES,
2114                                              nla_get_u32(tbp[i]));
2115                                break;
2116                        case NDTPA_MCAST_REPROBES:
2117                                NEIGH_VAR_SET(p, MCAST_REPROBES,
2118                                              nla_get_u32(tbp[i]));
2119                                break;
2120                        case NDTPA_BASE_REACHABLE_TIME:
2121                                NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2122                                              nla_get_msecs(tbp[i]));
2123                                /* update reachable_time as well, otherwise, the change will
2124                                 * only be effective after the next time neigh_periodic_work
2125                                 * decides to recompute it (can be multiple minutes)
2126                                 */
2127                                p->reachable_time =
2128                                        neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2129                                break;
2130                        case NDTPA_GC_STALETIME:
2131                                NEIGH_VAR_SET(p, GC_STALETIME,
2132                                              nla_get_msecs(tbp[i]));
2133                                break;
2134                        case NDTPA_DELAY_PROBE_TIME:
2135                                NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2136                                              nla_get_msecs(tbp[i]));
2137                                call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2138                                break;
2139                        case NDTPA_RETRANS_TIME:
2140                                NEIGH_VAR_SET(p, RETRANS_TIME,
2141                                              nla_get_msecs(tbp[i]));
2142                                break;
2143                        case NDTPA_ANYCAST_DELAY:
2144                                NEIGH_VAR_SET(p, ANYCAST_DELAY,
2145                                              nla_get_msecs(tbp[i]));
2146                                break;
2147                        case NDTPA_PROXY_DELAY:
2148                                NEIGH_VAR_SET(p, PROXY_DELAY,
2149                                              nla_get_msecs(tbp[i]));
2150                                break;
2151                        case NDTPA_LOCKTIME:
2152                                NEIGH_VAR_SET(p, LOCKTIME,
2153                                              nla_get_msecs(tbp[i]));
2154                                break;
2155                        }
2156                }
2157        }
2158
2159        err = -ENOENT;
2160        if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2161             tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2162            !net_eq(net, &init_net))
2163                goto errout_tbl_lock;
2164
2165        if (tb[NDTA_THRESH1])
2166                tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2167
2168        if (tb[NDTA_THRESH2])
2169                tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2170
2171        if (tb[NDTA_THRESH3])
2172                tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2173
2174        if (tb[NDTA_GC_INTERVAL])
2175                tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2176
2177        err = 0;
2178
2179errout_tbl_lock:
2180        write_unlock_bh(&tbl->lock);
2181errout:
2182        return err;
2183}
2184
2185static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2186                                    struct netlink_ext_ack *extack)
2187{
2188        struct ndtmsg *ndtm;
2189
2190        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2191                NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2192                return -EINVAL;
2193        }
2194
2195        ndtm = nlmsg_data(nlh);
2196        if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2197                NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2198                return -EINVAL;
2199        }
2200
2201        if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2202                NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2203                return -EINVAL;
2204        }
2205
2206        return 0;
2207}
2208
2209static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2210{
2211        const struct nlmsghdr *nlh = cb->nlh;
2212        struct net *net = sock_net(skb->sk);
2213        int family, tidx, nidx = 0;
2214        int tbl_skip = cb->args[0];
2215        int neigh_skip = cb->args[1];
2216        struct neigh_table *tbl;
2217
2218        if (cb->strict_check) {
2219                int err = neightbl_valid_dump_info(nlh, cb->extack);
2220
2221                if (err < 0)
2222                        return err;
2223        }
2224
2225        family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2226
2227        for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2228                struct neigh_parms *p;
2229
2230                tbl = neigh_tables[tidx];
2231                if (!tbl)
2232                        continue;
2233
2234                if (tidx < tbl_skip || (family && tbl->family != family))
2235                        continue;
2236
2237                if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2238                                       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2239                                       NLM_F_MULTI) < 0)
2240                        break;
2241
2242                nidx = 0;
2243                p = list_next_entry(&tbl->parms, list);
2244                list_for_each_entry_from(p, &tbl->parms_list, list) {
2245                        if (!net_eq(neigh_parms_net(p), net))
2246                                continue;
2247
2248                        if (nidx < neigh_skip)
2249                                goto next;
2250
2251                        if (neightbl_fill_param_info(skb, tbl, p,
2252                                                     NETLINK_CB(cb->skb).portid,
2253                                                     nlh->nlmsg_seq,
2254                                                     RTM_NEWNEIGHTBL,
2255                                                     NLM_F_MULTI) < 0)
2256                                goto out;
2257                next:
2258                        nidx++;
2259                }
2260
2261                neigh_skip = 0;
2262        }
2263out:
2264        cb->args[0] = tidx;
2265        cb->args[1] = nidx;
2266
2267        return skb->len;
2268}
2269
2270static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2271                           u32 pid, u32 seq, int type, unsigned int flags)
2272{
2273        unsigned long now = jiffies;
2274        struct nda_cacheinfo ci;
2275        struct nlmsghdr *nlh;
2276        struct ndmsg *ndm;
2277
2278        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2279        if (nlh == NULL)
2280                return -EMSGSIZE;
2281
2282        ndm = nlmsg_data(nlh);
2283        ndm->ndm_family  = neigh->ops->family;
2284        ndm->ndm_pad1    = 0;
2285        ndm->ndm_pad2    = 0;
2286        ndm->ndm_flags   = neigh->flags;
2287        ndm->ndm_type    = neigh->type;
2288        ndm->ndm_ifindex = neigh->dev->ifindex;
2289
2290        if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2291                goto nla_put_failure;
2292
2293        read_lock_bh(&neigh->lock);
2294        ndm->ndm_state   = neigh->nud_state;
2295        if (neigh->nud_state & NUD_VALID) {
2296                char haddr[MAX_ADDR_LEN];
2297
2298                neigh_ha_snapshot(haddr, neigh, neigh->dev);
2299                if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2300                        read_unlock_bh(&neigh->lock);
2301                        goto nla_put_failure;
2302                }
2303        }
2304
2305        ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2306        ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2307        ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2308        ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2309        read_unlock_bh(&neigh->lock);
2310
2311        if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2312            nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2313                goto nla_put_failure;
2314
2315        nlmsg_end(skb, nlh);
2316        return 0;
2317
2318nla_put_failure:
2319        nlmsg_cancel(skb, nlh);
2320        return -EMSGSIZE;
2321}
2322
2323static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2324                            u32 pid, u32 seq, int type, unsigned int flags,
2325                            struct neigh_table *tbl)
2326{
2327        struct nlmsghdr *nlh;
2328        struct ndmsg *ndm;
2329
2330        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2331        if (nlh == NULL)
2332                return -EMSGSIZE;
2333
2334        ndm = nlmsg_data(nlh);
2335        ndm->ndm_family  = tbl->family;
2336        ndm->ndm_pad1    = 0;
2337        ndm->ndm_pad2    = 0;
2338        ndm->ndm_flags   = pn->flags | NTF_PROXY;
2339        ndm->ndm_type    = RTN_UNICAST;
2340        ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2341        ndm->ndm_state   = NUD_NONE;
2342
2343        if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2344                goto nla_put_failure;
2345
2346        nlmsg_end(skb, nlh);
2347        return 0;
2348
2349nla_put_failure:
2350        nlmsg_cancel(skb, nlh);
2351        return -EMSGSIZE;
2352}
2353
2354static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2355{
2356        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2357        __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2358}
2359
2360static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2361{
2362        struct net_device *master;
2363
2364        if (!master_idx)
2365                return false;
2366
2367        master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2368        if (!master || master->ifindex != master_idx)
2369                return true;
2370
2371        return false;
2372}
2373
2374static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2375{
2376        if (filter_idx && (!dev || dev->ifindex != filter_idx))
2377                return true;
2378
2379        return false;
2380}
2381
2382struct neigh_dump_filter {
2383        int master_idx;
2384        int dev_idx;
2385};
2386
2387static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2388                            struct netlink_callback *cb,
2389                            struct neigh_dump_filter *filter)
2390{
2391        struct net *net = sock_net(skb->sk);
2392        struct neighbour *n;
2393        int rc, h, s_h = cb->args[1];
2394        int idx, s_idx = idx = cb->args[2];
2395        struct neigh_hash_table *nht;
2396        unsigned int flags = NLM_F_MULTI;
2397
2398        if (filter->dev_idx || filter->master_idx)
2399                flags |= NLM_F_DUMP_FILTERED;
2400
2401        rcu_read_lock_bh();
2402        nht = rcu_dereference_bh(tbl->nht);
2403
2404        for (h = s_h; h < (1 << nht->hash_shift); h++) {
2405                if (h > s_h)
2406                        s_idx = 0;
2407                for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2408                     n != NULL;
2409                     n = rcu_dereference_bh(n->next)) {
2410                        if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2411                                goto next;
2412                        if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2413                            neigh_master_filtered(n->dev, filter->master_idx))
2414                                goto next;
2415                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2416                                            cb->nlh->nlmsg_seq,
2417                                            RTM_NEWNEIGH,
2418                                            flags) < 0) {
2419                                rc = -1;
2420                                goto out;
2421                        }
2422next:
2423                        idx++;
2424                }
2425        }
2426        rc = skb->len;
2427out:
2428        rcu_read_unlock_bh();
2429        cb->args[1] = h;
2430        cb->args[2] = idx;
2431        return rc;
2432}
2433
2434static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2435                             struct netlink_callback *cb,
2436                             struct neigh_dump_filter *filter)
2437{
2438        struct pneigh_entry *n;
2439        struct net *net = sock_net(skb->sk);
2440        int rc, h, s_h = cb->args[3];
2441        int idx, s_idx = idx = cb->args[4];
2442        unsigned int flags = NLM_F_MULTI;
2443
2444        if (filter->dev_idx || filter->master_idx)
2445                flags |= NLM_F_DUMP_FILTERED;
2446
2447        read_lock_bh(&tbl->lock);
2448
2449        for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2450                if (h > s_h)
2451                        s_idx = 0;
2452                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2453                        if (idx < s_idx || pneigh_net(n) != net)
2454                                goto next;
2455                        if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2456                            neigh_master_filtered(n->dev, filter->master_idx))
2457                                goto next;
2458                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2459                                            cb->nlh->nlmsg_seq,
2460                                            RTM_NEWNEIGH, flags, tbl) < 0) {
2461                                read_unlock_bh(&tbl->lock);
2462                                rc = -1;
2463                                goto out;
2464                        }
2465                next:
2466                        idx++;
2467                }
2468        }
2469
2470        read_unlock_bh(&tbl->lock);
2471        rc = skb->len;
2472out:
2473        cb->args[3] = h;
2474        cb->args[4] = idx;
2475        return rc;
2476
2477}
2478
2479static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2480                                bool strict_check,
2481                                struct neigh_dump_filter *filter,
2482                                struct netlink_ext_ack *extack)
2483{
2484        struct nlattr *tb[NDA_MAX + 1];
2485        int err, i;
2486
2487        if (strict_check) {
2488                struct ndmsg *ndm;
2489
2490                if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2491                        NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2492                        return -EINVAL;
2493                }
2494
2495                ndm = nlmsg_data(nlh);
2496                if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2497                    ndm->ndm_state || ndm->ndm_type) {
2498                        NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2499                        return -EINVAL;
2500                }
2501
2502                if (ndm->ndm_flags & ~NTF_PROXY) {
2503                        NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2504                        return -EINVAL;
2505                }
2506
2507                err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2508                                         NULL, extack);
2509        } else {
2510                err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2511                                  NULL, extack);
2512        }
2513        if (err < 0)
2514                return err;
2515
2516        for (i = 0; i <= NDA_MAX; ++i) {
2517                if (!tb[i])
2518                        continue;
2519
2520                /* all new attributes should require strict_check */
2521                switch (i) {
2522                case NDA_IFINDEX:
2523                        if (nla_len(tb[i]) != sizeof(u32)) {
2524                                NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
2525                                return -EINVAL;
2526                        }
2527                        filter->dev_idx = nla_get_u32(tb[i]);
2528                        break;
2529                case NDA_MASTER:
2530                        if (nla_len(tb[i]) != sizeof(u32)) {
2531                                NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
2532                                return -EINVAL;
2533                        }
2534                        filter->master_idx = nla_get_u32(tb[i]);
2535                        break;
2536                default:
2537                        if (strict_check) {
2538                                NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2539                                return -EINVAL;
2540                        }
2541                }
2542        }
2543
2544        return 0;
2545}
2546
2547static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2548{
2549        const struct nlmsghdr *nlh = cb->nlh;
2550        struct neigh_dump_filter filter = {};
2551        struct neigh_table *tbl;
2552        int t, family, s_t;
2553        int proxy = 0;
2554        int err;
2555
2556        family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2557
2558        /* check for full ndmsg structure presence, family member is
2559         * the same for both structures
2560         */
2561        if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2562            ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2563                proxy = 1;
2564
2565        err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2566        if (err < 0 && cb->strict_check)
2567                return err;
2568
2569        s_t = cb->args[0];
2570
2571        for (t = 0; t < NEIGH_NR_TABLES; t++) {
2572                tbl = neigh_tables[t];
2573
2574                if (!tbl)
2575                        continue;
2576                if (t < s_t || (family && tbl->family != family))
2577                        continue;
2578                if (t > s_t)
2579                        memset(&cb->args[1], 0, sizeof(cb->args) -
2580                                                sizeof(cb->args[0]));
2581                if (proxy)
2582                        err = pneigh_dump_table(tbl, skb, cb, &filter);
2583                else
2584                        err = neigh_dump_table(tbl, skb, cb, &filter);
2585                if (err < 0)
2586                        break;
2587        }
2588
2589        cb->args[0] = t;
2590        return skb->len;
2591}
2592
2593void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2594{
2595        int chain;
2596        struct neigh_hash_table *nht;
2597
2598        rcu_read_lock_bh();
2599        nht = rcu_dereference_bh(tbl->nht);
2600
2601        read_lock(&tbl->lock); /* avoid resizes */
2602        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2603                struct neighbour *n;
2604
2605                for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2606                     n != NULL;
2607                     n = rcu_dereference_bh(n->next))
2608                        cb(n, cookie);
2609        }
2610        read_unlock(&tbl->lock);
2611        rcu_read_unlock_bh();
2612}
2613EXPORT_SYMBOL(neigh_for_each);
2614
2615/* The tbl->lock must be held as a writer and BH disabled. */
2616void __neigh_for_each_release(struct neigh_table *tbl,
2617                              int (*cb)(struct neighbour *))
2618{
2619        int chain;
2620        struct neigh_hash_table *nht;
2621
2622        nht = rcu_dereference_protected(tbl->nht,
2623                                        lockdep_is_held(&tbl->lock));
2624        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2625                struct neighbour *n;
2626                struct neighbour __rcu **np;
2627
2628                np = &nht->hash_buckets[chain];
2629                while ((n = rcu_dereference_protected(*np,
2630                                        lockdep_is_held(&tbl->lock))) != NULL) {
2631                        int release;
2632
2633                        write_lock(&n->lock);
2634                        release = cb(n);
2635                        if (release) {
2636                                rcu_assign_pointer(*np,
2637                                        rcu_dereference_protected(n->next,
2638                                                lockdep_is_held(&tbl->lock)));
2639                                n->dead = 1;
2640                        } else
2641                                np = &n->next;
2642                        write_unlock(&n->lock);
2643                        if (release)
2644                                neigh_cleanup_and_release(n);
2645                }
2646        }
2647}
2648EXPORT_SYMBOL(__neigh_for_each_release);
2649
2650int neigh_xmit(int index, struct net_device *dev,
2651               const void *addr, struct sk_buff *skb)
2652{
2653        int err = -EAFNOSUPPORT;
2654        if (likely(index < NEIGH_NR_TABLES)) {
2655                struct neigh_table *tbl;
2656                struct neighbour *neigh;
2657
2658                tbl = neigh_tables[index];
2659                if (!tbl)
2660                        goto out;
2661                rcu_read_lock_bh();
2662                neigh = __neigh_lookup_noref(tbl, addr, dev);
2663                if (!neigh)
2664                        neigh = __neigh_create(tbl, addr, dev, false);
2665                err = PTR_ERR(neigh);
2666                if (IS_ERR(neigh)) {
2667                        rcu_read_unlock_bh();
2668                        goto out_kfree_skb;
2669                }
2670                err = neigh->output(neigh, skb);
2671                rcu_read_unlock_bh();
2672        }
2673        else if (index == NEIGH_LINK_TABLE) {
2674                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2675                                      addr, NULL, skb->len);
2676                if (err < 0)
2677                        goto out_kfree_skb;
2678                err = dev_queue_xmit(skb);
2679        }
2680out:
2681        return err;
2682out_kfree_skb:
2683        kfree_skb(skb);
2684        goto out;
2685}
2686EXPORT_SYMBOL(neigh_xmit);
2687
2688#ifdef CONFIG_PROC_FS
2689
2690static struct neighbour *neigh_get_first(struct seq_file *seq)
2691{
2692        struct neigh_seq_state *state = seq->private;
2693        struct net *net = seq_file_net(seq);
2694        struct neigh_hash_table *nht = state->nht;
2695        struct neighbour *n = NULL;
2696        int bucket = state->bucket;
2697
2698        state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2699        for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2700                n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2701
2702                while (n) {
2703                        if (!net_eq(dev_net(n->dev), net))
2704                                goto next;
2705                        if (state->neigh_sub_iter) {
2706                                loff_t fakep = 0;
2707                                void *v;
2708
2709                                v = state->neigh_sub_iter(state, n, &fakep);
2710                                if (!v)
2711                                        goto next;
2712                        }
2713                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2714                                break;
2715                        if (n->nud_state & ~NUD_NOARP)
2716                                break;
2717next:
2718                        n = rcu_dereference_bh(n->next);
2719                }
2720
2721                if (n)
2722                        break;
2723        }
2724        state->bucket = bucket;
2725
2726        return n;
2727}
2728
2729static struct neighbour *neigh_get_next(struct seq_file *seq,
2730                                        struct neighbour *n,
2731                                        loff_t *pos)
2732{
2733        struct neigh_seq_state *state = seq->private;
2734        struct net *net = seq_file_net(seq);
2735        struct neigh_hash_table *nht = state->nht;
2736
2737        if (state->neigh_sub_iter) {
2738                void *v = state->neigh_sub_iter(state, n, pos);
2739                if (v)
2740                        return n;
2741        }
2742        n = rcu_dereference_bh(n->next);
2743
2744        while (1) {
2745                while (n) {
2746                        if (!net_eq(dev_net(n->dev), net))
2747                                goto next;
2748                        if (state->neigh_sub_iter) {
2749                                void *v = state->neigh_sub_iter(state, n, pos);
2750                                if (v)
2751                                        return n;
2752                                goto next;
2753                        }
2754                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2755                                break;
2756
2757                        if (n->nud_state & ~NUD_NOARP)
2758                                break;
2759next:
2760                        n = rcu_dereference_bh(n->next);
2761                }
2762
2763                if (n)
2764                        break;
2765
2766                if (++state->bucket >= (1 << nht->hash_shift))
2767                        break;
2768
2769                n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2770        }
2771
2772        if (n && pos)
2773                --(*pos);
2774        return n;
2775}
2776
2777static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2778{
2779        struct neighbour *n = neigh_get_first(seq);
2780
2781        if (n) {
2782                --(*pos);
2783                while (*pos) {
2784                        n = neigh_get_next(seq, n, pos);
2785                        if (!n)
2786                                break;
2787                }
2788        }
2789        return *pos ? NULL : n;
2790}
2791
2792static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2793{
2794        struct neigh_seq_state *state = seq->private;
2795        struct net *net = seq_file_net(seq);
2796        struct neigh_table *tbl = state->tbl;
2797        struct pneigh_entry *pn = NULL;
2798        int bucket = state->bucket;
2799
2800        state->flags |= NEIGH_SEQ_IS_PNEIGH;
2801        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2802                pn = tbl->phash_buckets[bucket];
2803                while (pn && !net_eq(pneigh_net(pn), net))
2804                        pn = pn->next;
2805                if (pn)
2806                        break;
2807        }
2808        state->bucket = bucket;
2809
2810        return pn;
2811}
2812
2813static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2814                                            struct pneigh_entry *pn,
2815                                            loff_t *pos)
2816{
2817        struct neigh_seq_state *state = seq->private;
2818        struct net *net = seq_file_net(seq);
2819        struct neigh_table *tbl = state->tbl;
2820
2821        do {
2822                pn = pn->next;
2823        } while (pn && !net_eq(pneigh_net(pn), net));
2824
2825        while (!pn) {
2826                if (++state->bucket > PNEIGH_HASHMASK)
2827                        break;
2828                pn = tbl->phash_buckets[state->bucket];
2829                while (pn && !net_eq(pneigh_net(pn), net))
2830                        pn = pn->next;
2831                if (pn)
2832                        break;
2833        }
2834
2835        if (pn && pos)
2836                --(*pos);
2837
2838        return pn;
2839}
2840
2841static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2842{
2843        struct pneigh_entry *pn = pneigh_get_first(seq);
2844
2845        if (pn) {
2846                --(*pos);
2847                while (*pos) {
2848                        pn = pneigh_get_next(seq, pn, pos);
2849                        if (!pn)
2850                                break;
2851                }
2852        }
2853        return *pos ? NULL : pn;
2854}
2855
2856static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2857{
2858        struct neigh_seq_state *state = seq->private;
2859        void *rc;
2860        loff_t idxpos = *pos;
2861
2862        rc = neigh_get_idx(seq, &idxpos);
2863        if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2864                rc = pneigh_get_idx(seq, &idxpos);
2865
2866        return rc;
2867}
2868
2869void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2870        __acquires(rcu_bh)
2871{
2872        struct neigh_seq_state *state = seq->private;
2873
2874        state->tbl = tbl;
2875        state->bucket = 0;
2876        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2877
2878        rcu_read_lock_bh();
2879        state->nht = rcu_dereference_bh(tbl->nht);
2880
2881        return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2882}
2883EXPORT_SYMBOL(neigh_seq_start);
2884
2885void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2886{
2887        struct neigh_seq_state *state;
2888        void *rc;
2889
2890        if (v == SEQ_START_TOKEN) {
2891                rc = neigh_get_first(seq);
2892                goto out;
2893        }
2894
2895        state = seq->private;
2896        if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2897                rc = neigh_get_next(seq, v, NULL);
2898                if (rc)
2899                        goto out;
2900                if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2901                        rc = pneigh_get_first(seq);
2902        } else {
2903                BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2904                rc = pneigh_get_next(seq, v, NULL);
2905        }
2906out:
2907        ++(*pos);
2908        return rc;
2909}
2910EXPORT_SYMBOL(neigh_seq_next);
2911
2912void neigh_seq_stop(struct seq_file *seq, void *v)
2913        __releases(rcu_bh)
2914{
2915        rcu_read_unlock_bh();
2916}
2917EXPORT_SYMBOL(neigh_seq_stop);
2918
2919/* statistics via seq_file */
2920
2921static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2922{
2923        struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2924        int cpu;
2925
2926        if (*pos == 0)
2927                return SEQ_START_TOKEN;
2928
2929        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2930                if (!cpu_possible(cpu))
2931                        continue;
2932                *pos = cpu+1;
2933                return per_cpu_ptr(tbl->stats, cpu);
2934        }
2935        return NULL;
2936}
2937
2938static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2939{
2940        struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2941        int cpu;
2942
2943        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2944                if (!cpu_possible(cpu))
2945                        continue;
2946                *pos = cpu+1;
2947                return per_cpu_ptr(tbl->stats, cpu);
2948        }
2949        return NULL;
2950}
2951
2952static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2953{
2954
2955}
2956
2957static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2958{
2959        struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2960        struct neigh_statistics *st = v;
2961
2962        if (v == SEQ_START_TOKEN) {
2963                seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2964                return 0;
2965        }
2966
2967        seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2968                        "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2969                   atomic_read(&tbl->entries),
2970
2971                   st->allocs,
2972                   st->destroys,
2973                   st->hash_grows,
2974
2975                   st->lookups,
2976                   st->hits,
2977
2978                   st->res_failed,
2979
2980                   st->rcv_probes_mcast,
2981                   st->rcv_probes_ucast,
2982
2983                   st->periodic_gc_runs,
2984                   st->forced_gc_runs,
2985                   st->unres_discards,
2986                   st->table_fulls
2987                   );
2988
2989        return 0;
2990}
2991
2992static const struct seq_operations neigh_stat_seq_ops = {
2993        .start  = neigh_stat_seq_start,
2994        .next   = neigh_stat_seq_next,
2995        .stop   = neigh_stat_seq_stop,
2996        .show   = neigh_stat_seq_show,
2997};
2998#endif /* CONFIG_PROC_FS */
2999
3000static inline size_t neigh_nlmsg_size(void)
3001{
3002        return NLMSG_ALIGN(sizeof(struct ndmsg))
3003               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3004               + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3005               + nla_total_size(sizeof(struct nda_cacheinfo))
3006               + nla_total_size(4); /* NDA_PROBES */
3007}
3008
3009static void __neigh_notify(struct neighbour *n, int type, int flags,
3010                           u32 pid)
3011{
3012        struct net *net = dev_net(n->dev);
3013        struct sk_buff *skb;
3014        int err = -ENOBUFS;
3015
3016        skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3017        if (skb == NULL)
3018                goto errout;
3019
3020        err = neigh_fill_info(skb, n, pid, 0, type, flags);
3021        if (err < 0) {
3022                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3023                WARN_ON(err == -EMSGSIZE);
3024                kfree_skb(skb);
3025                goto errout;
3026        }
3027        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3028        return;
3029errout:
3030        if (err < 0)
3031                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3032}
3033
3034void neigh_app_ns(struct neighbour *n)
3035{
3036        __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3037}
3038EXPORT_SYMBOL(neigh_app_ns);
3039
3040#ifdef CONFIG_SYSCTL
3041static int zero;
3042static int int_max = INT_MAX;
3043static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3044
3045static int proc_unres_qlen(struct ctl_table *ctl, int write,
3046                           void __user *buffer, size_t *lenp, loff_t *ppos)
3047{
3048        int size, ret;
3049        struct ctl_table tmp = *ctl;
3050
3051        tmp.extra1 = &zero;
3052        tmp.extra2 = &unres_qlen_max;
3053        tmp.data = &size;
3054
3055        size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3056        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3057
3058        if (write && !ret)
3059                *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3060        return ret;
3061}
3062
3063static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3064                                                   int family)
3065{
3066        switch (family) {
3067        case AF_INET:
3068                return __in_dev_arp_parms_get_rcu(dev);
3069        case AF_INET6:
3070                return __in6_dev_nd_parms_get_rcu(dev);
3071        }
3072        return NULL;
3073}
3074
3075static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3076                                  int index)
3077{
3078        struct net_device *dev;
3079        int family = neigh_parms_family(p);
3080
3081        rcu_read_lock();
3082        for_each_netdev_rcu(net, dev) {
3083                struct neigh_parms *dst_p =
3084                                neigh_get_dev_parms_rcu(dev, family);
3085
3086                if (dst_p && !test_bit(index, dst_p->data_state))
3087                        dst_p->data[index] = p->data[index];
3088        }
3089        rcu_read_unlock();
3090}
3091
3092static void neigh_proc_update(struct ctl_table *ctl, int write)
3093{
3094        struct net_device *dev = ctl->extra1;
3095        struct neigh_parms *p = ctl->extra2;
3096        struct net *net = neigh_parms_net(p);
3097        int index = (int *) ctl->data - p->data;
3098
3099        if (!write)
3100                return;
3101
3102        set_bit(index, p->data_state);
3103        if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3104                call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3105        if (!dev) /* NULL dev means this is default value */
3106                neigh_copy_dflt_parms(net, p, index);
3107}
3108
3109static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3110                                           void __user *buffer,
3111                                           size_t *lenp, loff_t *ppos)
3112{
3113        struct ctl_table tmp = *ctl;
3114        int ret;
3115
3116        tmp.extra1 = &zero;
3117        tmp.extra2 = &int_max;
3118
3119        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3120        neigh_proc_update(ctl, write);
3121        return ret;
3122}
3123
3124int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3125                        void __user *buffer, size_t *lenp, loff_t *ppos)
3126{
3127        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3128
3129        neigh_proc_update(ctl, write);
3130        return ret;
3131}
3132EXPORT_SYMBOL(neigh_proc_dointvec);
3133
3134int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3135                                void __user *buffer,
3136                                size_t *lenp, loff_t *ppos)
3137{
3138        int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3139
3140        neigh_proc_update(ctl, write);
3141        return ret;
3142}
3143EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3144
3145static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3146                                              void __user *buffer,
3147                                              size_t *lenp, loff_t *ppos)
3148{
3149        int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3150
3151        neigh_proc_update(ctl, write);
3152        return ret;
3153}
3154
3155int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3156                                   void __user *buffer,
3157                                   size_t *lenp, loff_t *ppos)
3158{
3159        int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3160
3161        neigh_proc_update(ctl, write);
3162        return ret;
3163}
3164EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3165
3166static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3167                                          void __user *buffer,
3168                                          size_t *lenp, loff_t *ppos)
3169{
3170        int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3171
3172        neigh_proc_update(ctl, write);
3173        return ret;
3174}
3175
3176static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3177                                          void __user *buffer,
3178                                          size_t *lenp, loff_t *ppos)
3179{
3180        struct neigh_parms *p = ctl->extra2;
3181        int ret;
3182
3183        if (strcmp(ctl->procname, "base_reachable_time") == 0)
3184                ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3185        else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3186                ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3187        else
3188                ret = -1;
3189
3190        if (write && ret == 0) {
3191                /* update reachable_time as well, otherwise, the change will
3192                 * only be effective after the next time neigh_periodic_work
3193                 * decides to recompute it
3194                 */
3195                p->reachable_time =
3196                        neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3197        }
3198        return ret;
3199}
3200
3201#define NEIGH_PARMS_DATA_OFFSET(index)  \
3202        (&((struct neigh_parms *) 0)->data[index])
3203
3204#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3205        [NEIGH_VAR_ ## attr] = { \
3206                .procname       = name, \
3207                .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3208                .maxlen         = sizeof(int), \
3209                .mode           = mval, \
3210                .proc_handler   = proc, \
3211        }
3212
3213#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3214        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3215
3216#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3217        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3218
3219#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3220        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3221
3222#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3223        NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3224
3225#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3226        NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3227
3228#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3229        NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3230
3231static struct neigh_sysctl_table {
3232        struct ctl_table_header *sysctl_header;
3233        struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3234} neigh_sysctl_template __read_mostly = {
3235        .neigh_vars = {
3236                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3237                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3238                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3239                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3240                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3241                NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3242                NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3243                NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3244                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3245                NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3246                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3247                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3248                NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3249                NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3250                NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3251                NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3252                [NEIGH_VAR_GC_INTERVAL] = {
3253                        .procname       = "gc_interval",
3254                        .maxlen         = sizeof(int),
3255                        .mode           = 0644,
3256                        .proc_handler   = proc_dointvec_jiffies,
3257                },
3258                [NEIGH_VAR_GC_THRESH1] = {
3259                        .procname       = "gc_thresh1",
3260                        .maxlen         = sizeof(int),
3261                        .mode           = 0644,
3262                        .extra1         = &zero,
3263                        .extra2         = &int_max,
3264                        .proc_handler   = proc_dointvec_minmax,
3265                },
3266                [NEIGH_VAR_GC_THRESH2] = {
3267                        .procname       = "gc_thresh2",
3268                        .maxlen         = sizeof(int),
3269                        .mode           = 0644,
3270                        .extra1         = &zero,
3271                        .extra2         = &int_max,
3272                        .proc_handler   = proc_dointvec_minmax,
3273                },
3274                [NEIGH_VAR_GC_THRESH3] = {
3275                        .procname       = "gc_thresh3",
3276                        .maxlen         = sizeof(int),
3277                        .mode           = 0644,
3278                        .extra1         = &zero,
3279                        .extra2         = &int_max,
3280                        .proc_handler   = proc_dointvec_minmax,
3281                },
3282                {},
3283        },
3284};
3285
3286int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3287                          proc_handler *handler)
3288{
3289        int i;
3290        struct neigh_sysctl_table *t;
3291        const char *dev_name_source;
3292        char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3293        char *p_name;
3294
3295        t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3296        if (!t)
3297                goto err;
3298
3299        for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3300                t->neigh_vars[i].data += (long) p;
3301                t->neigh_vars[i].extra1 = dev;
3302                t->neigh_vars[i].extra2 = p;
3303        }
3304
3305        if (dev) {
3306                dev_name_source = dev->name;
3307                /* Terminate the table early */
3308                memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3309                       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3310        } else {
3311                struct neigh_table *tbl = p->tbl;
3312                dev_name_source = "default";
3313                t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3314                t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3315                t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3316                t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3317        }
3318
3319        if (handler) {
3320                /* RetransTime */
3321                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3322                /* ReachableTime */
3323                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3324                /* RetransTime (in milliseconds)*/
3325                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3326                /* ReachableTime (in milliseconds) */
3327                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3328        } else {
3329                /* Those handlers will update p->reachable_time after
3330                 * base_reachable_time(_ms) is set to ensure the new timer starts being
3331                 * applied after the next neighbour update instead of waiting for
3332                 * neigh_periodic_work to update its value (can be multiple minutes)
3333                 * So any handler that replaces them should do this as well
3334                 */
3335                /* ReachableTime */
3336                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3337                        neigh_proc_base_reachable_time;
3338                /* ReachableTime (in milliseconds) */
3339                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3340                        neigh_proc_base_reachable_time;
3341        }
3342
3343        /* Don't export sysctls to unprivileged users */
3344        if (neigh_parms_net(p)->user_ns != &init_user_ns)
3345                t->neigh_vars[0].procname = NULL;
3346
3347        switch (neigh_parms_family(p)) {
3348        case AF_INET:
3349              p_name = "ipv4";
3350              break;
3351        case AF_INET6:
3352              p_name = "ipv6";
3353              break;
3354        default:
3355              BUG();
3356        }
3357
3358        snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3359                p_name, dev_name_source);
3360        t->sysctl_header =
3361                register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3362        if (!t->sysctl_header)
3363                goto free;
3364
3365        p->sysctl_table = t;
3366        return 0;
3367
3368free:
3369        kfree(t);
3370err:
3371        return -ENOBUFS;
3372}
3373EXPORT_SYMBOL(neigh_sysctl_register);
3374
3375void neigh_sysctl_unregister(struct neigh_parms *p)
3376{
3377        if (p->sysctl_table) {
3378                struct neigh_sysctl_table *t = p->sysctl_table;
3379                p->sysctl_table = NULL;
3380                unregister_net_sysctl_table(t->sysctl_header);
3381                kfree(t);
3382        }
3383}
3384EXPORT_SYMBOL(neigh_sysctl_unregister);
3385
3386#endif  /* CONFIG_SYSCTL */
3387
3388static int __init neigh_init(void)
3389{
3390        rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3391        rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3392        rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3393
3394        rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3395                      0);
3396        rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3397
3398        return 0;
3399}
3400
3401subsys_initcall(neigh_init);
3402