linux/net/core/neighbour.c
<<
>>
Prefs
   1/*
   2 *      Generic address resolution entity
   3 *
   4 *      Authors:
   5 *      Pedro Roque             <roque@di.fc.ul.pt>
   6 *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 *      Fixes:
  14 *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
  15 *      Harald Welte            Add neighbour cache statistics like rtstat
  16 */
  17
  18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19
  20#include <linux/slab.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/socket.h>
  25#include <linux/netdevice.h>
  26#include <linux/proc_fs.h>
  27#ifdef CONFIG_SYSCTL
  28#include <linux/sysctl.h>
  29#endif
  30#include <linux/times.h>
  31#include <net/net_namespace.h>
  32#include <net/neighbour.h>
  33#include <net/dst.h>
  34#include <net/sock.h>
  35#include <net/netevent.h>
  36#include <net/netlink.h>
  37#include <linux/rtnetlink.h>
  38#include <linux/random.h>
  39#include <linux/string.h>
  40#include <linux/log2.h>
  41
  42#define DEBUG
  43#define NEIGH_DEBUG 1
  44#define neigh_dbg(level, fmt, ...)              \
  45do {                                            \
  46        if (level <= NEIGH_DEBUG)               \
  47                pr_debug(fmt, ##__VA_ARGS__);   \
  48} while (0)
  49
  50#define PNEIGH_HASHMASK         0xF
  51
  52static void neigh_timer_handler(unsigned long arg);
  53static void __neigh_notify(struct neighbour *n, int type, int flags);
  54static void neigh_update_notify(struct neighbour *neigh);
  55static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
  56
  57static struct neigh_table *neigh_tables;
  58#ifdef CONFIG_PROC_FS
  59static const struct file_operations neigh_stat_seq_fops;
  60#endif
  61
  62/*
  63   Neighbour hash table buckets are protected with rwlock tbl->lock.
  64
  65   - All the scans/updates to hash buckets MUST be made under this lock.
  66   - NOTHING clever should be made under this lock: no callbacks
  67     to protocol backends, no attempts to send something to network.
  68     It will result in deadlocks, if backend/driver wants to use neighbour
  69     cache.
  70   - If the entry requires some non-trivial actions, increase
  71     its reference count and release table lock.
  72
  73   Neighbour entries are protected:
  74   - with reference count.
  75   - with rwlock neigh->lock
  76
  77   Reference count prevents destruction.
  78
  79   neigh->lock mainly serializes ll address data and its validity state.
  80   However, the same lock is used to protect another entry fields:
  81    - timer
  82    - resolution queue
  83
  84   Again, nothing clever shall be made under neigh->lock,
  85   the most complicated procedure, which we allow is dev->hard_header.
  86   It is supposed, that dev->hard_header is simplistic and does
  87   not make callbacks to neighbour tables.
  88
  89   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
  90   list of neighbour tables. This list is used only in process context,
  91 */
  92
  93static DEFINE_RWLOCK(neigh_tbl_lock);
  94
  95static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
  96{
  97        kfree_skb(skb);
  98        return -ENETDOWN;
  99}
 100
 101static void neigh_cleanup_and_release(struct neighbour *neigh)
 102{
 103        if (neigh->parms->neigh_cleanup)
 104                neigh->parms->neigh_cleanup(neigh);
 105
 106        __neigh_notify(neigh, RTM_DELNEIGH, 0);
 107        neigh_release(neigh);
 108}
 109
 110/*
 111 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 112 * It corresponds to default IPv6 settings and is not overridable,
 113 * because it is really reasonable choice.
 114 */
 115
 116unsigned long neigh_rand_reach_time(unsigned long base)
 117{
 118        return base ? (net_random() % base) + (base >> 1) : 0;
 119}
 120EXPORT_SYMBOL(neigh_rand_reach_time);
 121
 122
 123static int neigh_forced_gc(struct neigh_table *tbl)
 124{
 125        int shrunk = 0;
 126        int i;
 127        struct neigh_hash_table *nht;
 128
 129        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 130
 131        write_lock_bh(&tbl->lock);
 132        nht = rcu_dereference_protected(tbl->nht,
 133                                        lockdep_is_held(&tbl->lock));
 134        for (i = 0; i < (1 << nht->hash_shift); i++) {
 135                struct neighbour *n;
 136                struct neighbour __rcu **np;
 137
 138                np = &nht->hash_buckets[i];
 139                while ((n = rcu_dereference_protected(*np,
 140                                        lockdep_is_held(&tbl->lock))) != NULL) {
 141                        /* Neighbour record may be discarded if:
 142                         * - nobody refers to it.
 143                         * - it is not permanent
 144                         */
 145                        write_lock(&n->lock);
 146                        if (atomic_read(&n->refcnt) == 1 &&
 147                            !(n->nud_state & NUD_PERMANENT)) {
 148                                rcu_assign_pointer(*np,
 149                                        rcu_dereference_protected(n->next,
 150                                                  lockdep_is_held(&tbl->lock)));
 151                                n->dead = 1;
 152                                shrunk  = 1;
 153                                write_unlock(&n->lock);
 154                                neigh_cleanup_and_release(n);
 155                                continue;
 156                        }
 157                        write_unlock(&n->lock);
 158                        np = &n->next;
 159                }
 160        }
 161
 162        tbl->last_flush = jiffies;
 163
 164        write_unlock_bh(&tbl->lock);
 165
 166        return shrunk;
 167}
 168
 169static void neigh_add_timer(struct neighbour *n, unsigned long when)
 170{
 171        neigh_hold(n);
 172        if (unlikely(mod_timer(&n->timer, when))) {
 173                printk("NEIGH: BUG, double timer add, state is %x\n",
 174                       n->nud_state);
 175                dump_stack();
 176        }
 177}
 178
 179static int neigh_del_timer(struct neighbour *n)
 180{
 181        if ((n->nud_state & NUD_IN_TIMER) &&
 182            del_timer(&n->timer)) {
 183                neigh_release(n);
 184                return 1;
 185        }
 186        return 0;
 187}
 188
 189static void pneigh_queue_purge(struct sk_buff_head *list)
 190{
 191        struct sk_buff *skb;
 192
 193        while ((skb = skb_dequeue(list)) != NULL) {
 194                dev_put(skb->dev);
 195                kfree_skb(skb);
 196        }
 197}
 198
 199static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 200{
 201        int i;
 202        struct neigh_hash_table *nht;
 203
 204        nht = rcu_dereference_protected(tbl->nht,
 205                                        lockdep_is_held(&tbl->lock));
 206
 207        for (i = 0; i < (1 << nht->hash_shift); i++) {
 208                struct neighbour *n;
 209                struct neighbour __rcu **np = &nht->hash_buckets[i];
 210
 211                while ((n = rcu_dereference_protected(*np,
 212                                        lockdep_is_held(&tbl->lock))) != NULL) {
 213                        if (dev && n->dev != dev) {
 214                                np = &n->next;
 215                                continue;
 216                        }
 217                        rcu_assign_pointer(*np,
 218                                   rcu_dereference_protected(n->next,
 219                                                lockdep_is_held(&tbl->lock)));
 220                        write_lock(&n->lock);
 221                        neigh_del_timer(n);
 222                        n->dead = 1;
 223
 224                        if (atomic_read(&n->refcnt) != 1) {
 225                                /* The most unpleasant situation.
 226                                   We must destroy neighbour entry,
 227                                   but someone still uses it.
 228
 229                                   The destroy will be delayed until
 230                                   the last user releases us, but
 231                                   we must kill timers etc. and move
 232                                   it to safe state.
 233                                 */
 234                                skb_queue_purge(&n->arp_queue);
 235                                n->arp_queue_len_bytes = 0;
 236                                n->output = neigh_blackhole;
 237                                if (n->nud_state & NUD_VALID)
 238                                        n->nud_state = NUD_NOARP;
 239                                else
 240                                        n->nud_state = NUD_NONE;
 241                                neigh_dbg(2, "neigh %p is stray\n", n);
 242                        }
 243                        write_unlock(&n->lock);
 244                        neigh_cleanup_and_release(n);
 245                }
 246        }
 247}
 248
 249void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 250{
 251        write_lock_bh(&tbl->lock);
 252        neigh_flush_dev(tbl, dev);
 253        write_unlock_bh(&tbl->lock);
 254}
 255EXPORT_SYMBOL(neigh_changeaddr);
 256
 257int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 258{
 259        write_lock_bh(&tbl->lock);
 260        neigh_flush_dev(tbl, dev);
 261        pneigh_ifdown(tbl, dev);
 262        write_unlock_bh(&tbl->lock);
 263
 264        del_timer_sync(&tbl->proxy_timer);
 265        pneigh_queue_purge(&tbl->proxy_queue);
 266        return 0;
 267}
 268EXPORT_SYMBOL(neigh_ifdown);
 269
 270static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 271{
 272        struct neighbour *n = NULL;
 273        unsigned long now = jiffies;
 274        int entries;
 275
 276        entries = atomic_inc_return(&tbl->entries) - 1;
 277        if (entries >= tbl->gc_thresh3 ||
 278            (entries >= tbl->gc_thresh2 &&
 279             time_after(now, tbl->last_flush + 5 * HZ))) {
 280                if (!neigh_forced_gc(tbl) &&
 281                    entries >= tbl->gc_thresh3)
 282                        goto out_entries;
 283        }
 284
 285        n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
 286        if (!n)
 287                goto out_entries;
 288
 289        skb_queue_head_init(&n->arp_queue);
 290        rwlock_init(&n->lock);
 291        seqlock_init(&n->ha_lock);
 292        n->updated        = n->used = now;
 293        n->nud_state      = NUD_NONE;
 294        n->output         = neigh_blackhole;
 295        seqlock_init(&n->hh.hh_lock);
 296        n->parms          = neigh_parms_clone(&tbl->parms);
 297        setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
 298
 299        NEIGH_CACHE_STAT_INC(tbl, allocs);
 300        n->tbl            = tbl;
 301        atomic_set(&n->refcnt, 1);
 302        n->dead           = 1;
 303out:
 304        return n;
 305
 306out_entries:
 307        atomic_dec(&tbl->entries);
 308        goto out;
 309}
 310
 311static void neigh_get_hash_rnd(u32 *x)
 312{
 313        get_random_bytes(x, sizeof(*x));
 314        *x |= 1;
 315}
 316
 317static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 318{
 319        size_t size = (1 << shift) * sizeof(struct neighbour *);
 320        struct neigh_hash_table *ret;
 321        struct neighbour __rcu **buckets;
 322        int i;
 323
 324        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 325        if (!ret)
 326                return NULL;
 327        if (size <= PAGE_SIZE)
 328                buckets = kzalloc(size, GFP_ATOMIC);
 329        else
 330                buckets = (struct neighbour __rcu **)
 331                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 332                                           get_order(size));
 333        if (!buckets) {
 334                kfree(ret);
 335                return NULL;
 336        }
 337        ret->hash_buckets = buckets;
 338        ret->hash_shift = shift;
 339        for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
 340                neigh_get_hash_rnd(&ret->hash_rnd[i]);
 341        return ret;
 342}
 343
 344static void neigh_hash_free_rcu(struct rcu_head *head)
 345{
 346        struct neigh_hash_table *nht = container_of(head,
 347                                                    struct neigh_hash_table,
 348                                                    rcu);
 349        size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 350        struct neighbour __rcu **buckets = nht->hash_buckets;
 351
 352        if (size <= PAGE_SIZE)
 353                kfree(buckets);
 354        else
 355                free_pages((unsigned long)buckets, get_order(size));
 356        kfree(nht);
 357}
 358
 359static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 360                                                unsigned long new_shift)
 361{
 362        unsigned int i, hash;
 363        struct neigh_hash_table *new_nht, *old_nht;
 364
 365        NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 366
 367        old_nht = rcu_dereference_protected(tbl->nht,
 368                                            lockdep_is_held(&tbl->lock));
 369        new_nht = neigh_hash_alloc(new_shift);
 370        if (!new_nht)
 371                return old_nht;
 372
 373        for (i = 0; i < (1 << old_nht->hash_shift); i++) {
 374                struct neighbour *n, *next;
 375
 376                for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
 377                                                   lockdep_is_held(&tbl->lock));
 378                     n != NULL;
 379                     n = next) {
 380                        hash = tbl->hash(n->primary_key, n->dev,
 381                                         new_nht->hash_rnd);
 382
 383                        hash >>= (32 - new_nht->hash_shift);
 384                        next = rcu_dereference_protected(n->next,
 385                                                lockdep_is_held(&tbl->lock));
 386
 387                        rcu_assign_pointer(n->next,
 388                                           rcu_dereference_protected(
 389                                                new_nht->hash_buckets[hash],
 390                                                lockdep_is_held(&tbl->lock)));
 391                        rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 392                }
 393        }
 394
 395        rcu_assign_pointer(tbl->nht, new_nht);
 396        call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
 397        return new_nht;
 398}
 399
 400struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 401                               struct net_device *dev)
 402{
 403        struct neighbour *n;
 404        int key_len = tbl->key_len;
 405        u32 hash_val;
 406        struct neigh_hash_table *nht;
 407
 408        NEIGH_CACHE_STAT_INC(tbl, lookups);
 409
 410        rcu_read_lock_bh();
 411        nht = rcu_dereference_bh(tbl->nht);
 412        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 413
 414        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 415             n != NULL;
 416             n = rcu_dereference_bh(n->next)) {
 417                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
 418                        if (!atomic_inc_not_zero(&n->refcnt))
 419                                n = NULL;
 420                        NEIGH_CACHE_STAT_INC(tbl, hits);
 421                        break;
 422                }
 423        }
 424
 425        rcu_read_unlock_bh();
 426        return n;
 427}
 428EXPORT_SYMBOL(neigh_lookup);
 429
 430struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 431                                     const void *pkey)
 432{
 433        struct neighbour *n;
 434        int key_len = tbl->key_len;
 435        u32 hash_val;
 436        struct neigh_hash_table *nht;
 437
 438        NEIGH_CACHE_STAT_INC(tbl, lookups);
 439
 440        rcu_read_lock_bh();
 441        nht = rcu_dereference_bh(tbl->nht);
 442        hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
 443
 444        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 445             n != NULL;
 446             n = rcu_dereference_bh(n->next)) {
 447                if (!memcmp(n->primary_key, pkey, key_len) &&
 448                    net_eq(dev_net(n->dev), net)) {
 449                        if (!atomic_inc_not_zero(&n->refcnt))
 450                                n = NULL;
 451                        NEIGH_CACHE_STAT_INC(tbl, hits);
 452                        break;
 453                }
 454        }
 455
 456        rcu_read_unlock_bh();
 457        return n;
 458}
 459EXPORT_SYMBOL(neigh_lookup_nodev);
 460
 461struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 462                                 struct net_device *dev, bool want_ref)
 463{
 464        u32 hash_val;
 465        int key_len = tbl->key_len;
 466        int error;
 467        struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 468        struct neigh_hash_table *nht;
 469
 470        if (!n) {
 471                rc = ERR_PTR(-ENOBUFS);
 472                goto out;
 473        }
 474
 475        memcpy(n->primary_key, pkey, key_len);
 476        n->dev = dev;
 477        dev_hold(dev);
 478
 479        /* Protocol specific setup. */
 480        if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
 481                rc = ERR_PTR(error);
 482                goto out_neigh_release;
 483        }
 484
 485        if (dev->netdev_ops->ndo_neigh_construct) {
 486                error = dev->netdev_ops->ndo_neigh_construct(n);
 487                if (error < 0) {
 488                        rc = ERR_PTR(error);
 489                        goto out_neigh_release;
 490                }
 491        }
 492
 493        /* Device specific setup. */
 494        if (n->parms->neigh_setup &&
 495            (error = n->parms->neigh_setup(n)) < 0) {
 496                rc = ERR_PTR(error);
 497                goto out_neigh_release;
 498        }
 499
 500        n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 501
 502        write_lock_bh(&tbl->lock);
 503        nht = rcu_dereference_protected(tbl->nht,
 504                                        lockdep_is_held(&tbl->lock));
 505
 506        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
 507                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 508
 509        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 510
 511        if (n->parms->dead) {
 512                rc = ERR_PTR(-EINVAL);
 513                goto out_tbl_unlock;
 514        }
 515
 516        for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
 517                                            lockdep_is_held(&tbl->lock));
 518             n1 != NULL;
 519             n1 = rcu_dereference_protected(n1->next,
 520                        lockdep_is_held(&tbl->lock))) {
 521                if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 522                        if (want_ref)
 523                                neigh_hold(n1);
 524                        rc = n1;
 525                        goto out_tbl_unlock;
 526                }
 527        }
 528
 529        n->dead = 0;
 530        if (want_ref)
 531                neigh_hold(n);
 532        rcu_assign_pointer(n->next,
 533                           rcu_dereference_protected(nht->hash_buckets[hash_val],
 534                                                     lockdep_is_held(&tbl->lock)));
 535        rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 536        write_unlock_bh(&tbl->lock);
 537        neigh_dbg(2, "neigh %p is created\n", n);
 538        rc = n;
 539out:
 540        return rc;
 541out_tbl_unlock:
 542        write_unlock_bh(&tbl->lock);
 543out_neigh_release:
 544        neigh_release(n);
 545        goto out;
 546}
 547EXPORT_SYMBOL(__neigh_create);
 548
 549static u32 pneigh_hash(const void *pkey, int key_len)
 550{
 551        u32 hash_val = *(u32 *)(pkey + key_len - 4);
 552        hash_val ^= (hash_val >> 16);
 553        hash_val ^= hash_val >> 8;
 554        hash_val ^= hash_val >> 4;
 555        hash_val &= PNEIGH_HASHMASK;
 556        return hash_val;
 557}
 558
 559static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 560                                              struct net *net,
 561                                              const void *pkey,
 562                                              int key_len,
 563                                              struct net_device *dev)
 564{
 565        while (n) {
 566                if (!memcmp(n->key, pkey, key_len) &&
 567                    net_eq(pneigh_net(n), net) &&
 568                    (n->dev == dev || !n->dev))
 569                        return n;
 570                n = n->next;
 571        }
 572        return NULL;
 573}
 574
 575struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
 576                struct net *net, const void *pkey, struct net_device *dev)
 577{
 578        int key_len = tbl->key_len;
 579        u32 hash_val = pneigh_hash(pkey, key_len);
 580
 581        return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 582                                 net, pkey, key_len, dev);
 583}
 584EXPORT_SYMBOL_GPL(__pneigh_lookup);
 585
 586struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 587                                    struct net *net, const void *pkey,
 588                                    struct net_device *dev, int creat)
 589{
 590        struct pneigh_entry *n;
 591        int key_len = tbl->key_len;
 592        u32 hash_val = pneigh_hash(pkey, key_len);
 593
 594        read_lock_bh(&tbl->lock);
 595        n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 596                              net, pkey, key_len, dev);
 597        read_unlock_bh(&tbl->lock);
 598
 599        if (n || !creat)
 600                goto out;
 601
 602        ASSERT_RTNL();
 603
 604        n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 605        if (!n)
 606                goto out;
 607
 608        write_pnet(&n->net, hold_net(net));
 609        memcpy(n->key, pkey, key_len);
 610        n->dev = dev;
 611        if (dev)
 612                dev_hold(dev);
 613
 614        if (tbl->pconstructor && tbl->pconstructor(n)) {
 615                if (dev)
 616                        dev_put(dev);
 617                release_net(net);
 618                kfree(n);
 619                n = NULL;
 620                goto out;
 621        }
 622
 623        write_lock_bh(&tbl->lock);
 624        n->next = tbl->phash_buckets[hash_val];
 625        tbl->phash_buckets[hash_val] = n;
 626        write_unlock_bh(&tbl->lock);
 627out:
 628        return n;
 629}
 630EXPORT_SYMBOL(pneigh_lookup);
 631
 632
 633int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 634                  struct net_device *dev)
 635{
 636        struct pneigh_entry *n, **np;
 637        int key_len = tbl->key_len;
 638        u32 hash_val = pneigh_hash(pkey, key_len);
 639
 640        write_lock_bh(&tbl->lock);
 641        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 642             np = &n->next) {
 643                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
 644                    net_eq(pneigh_net(n), net)) {
 645                        *np = n->next;
 646                        write_unlock_bh(&tbl->lock);
 647                        if (tbl->pdestructor)
 648                                tbl->pdestructor(n);
 649                        if (n->dev)
 650                                dev_put(n->dev);
 651                        release_net(pneigh_net(n));
 652                        kfree(n);
 653                        return 0;
 654                }
 655        }
 656        write_unlock_bh(&tbl->lock);
 657        return -ENOENT;
 658}
 659
 660static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 661{
 662        struct pneigh_entry *n, **np;
 663        u32 h;
 664
 665        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
 666                np = &tbl->phash_buckets[h];
 667                while ((n = *np) != NULL) {
 668                        if (!dev || n->dev == dev) {
 669                                *np = n->next;
 670                                if (tbl->pdestructor)
 671                                        tbl->pdestructor(n);
 672                                if (n->dev)
 673                                        dev_put(n->dev);
 674                                release_net(pneigh_net(n));
 675                                kfree(n);
 676                                continue;
 677                        }
 678                        np = &n->next;
 679                }
 680        }
 681        return -ENOENT;
 682}
 683
 684static void neigh_parms_destroy(struct neigh_parms *parms);
 685
 686static inline void neigh_parms_put(struct neigh_parms *parms)
 687{
 688        if (atomic_dec_and_test(&parms->refcnt))
 689                neigh_parms_destroy(parms);
 690}
 691
 692/*
 693 *      neighbour must already be out of the table;
 694 *
 695 */
 696void neigh_destroy(struct neighbour *neigh)
 697{
 698        struct net_device *dev = neigh->dev;
 699
 700        NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 701
 702        if (!neigh->dead) {
 703                pr_warn("Destroying alive neighbour %p\n", neigh);
 704                dump_stack();
 705                return;
 706        }
 707
 708        if (neigh_del_timer(neigh))
 709                pr_warn("Impossible event\n");
 710
 711        skb_queue_purge(&neigh->arp_queue);
 712        neigh->arp_queue_len_bytes = 0;
 713
 714        if (dev->netdev_ops->ndo_neigh_destroy)
 715                dev->netdev_ops->ndo_neigh_destroy(neigh);
 716
 717        dev_put(dev);
 718        neigh_parms_put(neigh->parms);
 719
 720        neigh_dbg(2, "neigh %p is destroyed\n", neigh);
 721
 722        atomic_dec(&neigh->tbl->entries);
 723        kfree_rcu(neigh, rcu);
 724}
 725EXPORT_SYMBOL(neigh_destroy);
 726
 727/* Neighbour state is suspicious;
 728   disable fast path.
 729
 730   Called with write_locked neigh.
 731 */
 732static void neigh_suspect(struct neighbour *neigh)
 733{
 734        neigh_dbg(2, "neigh %p is suspected\n", neigh);
 735
 736        neigh->output = neigh->ops->output;
 737}
 738
 739/* Neighbour state is OK;
 740   enable fast path.
 741
 742   Called with write_locked neigh.
 743 */
 744static void neigh_connect(struct neighbour *neigh)
 745{
 746        neigh_dbg(2, "neigh %p is connected\n", neigh);
 747
 748        neigh->output = neigh->ops->connected_output;
 749}
 750
 751static void neigh_periodic_work(struct work_struct *work)
 752{
 753        struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 754        struct neighbour *n;
 755        struct neighbour __rcu **np;
 756        unsigned int i;
 757        struct neigh_hash_table *nht;
 758
 759        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 760
 761        write_lock_bh(&tbl->lock);
 762        nht = rcu_dereference_protected(tbl->nht,
 763                                        lockdep_is_held(&tbl->lock));
 764
 765        if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
 766                goto out;
 767
 768        /*
 769         *      periodically recompute ReachableTime from random function
 770         */
 771
 772        if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 773                struct neigh_parms *p;
 774                tbl->last_rand = jiffies;
 775                for (p = &tbl->parms; p; p = p->next)
 776                        p->reachable_time =
 777                                neigh_rand_reach_time(p->base_reachable_time);
 778        }
 779
 780        for (i = 0 ; i < (1 << nht->hash_shift); i++) {
 781                np = &nht->hash_buckets[i];
 782
 783                while ((n = rcu_dereference_protected(*np,
 784                                lockdep_is_held(&tbl->lock))) != NULL) {
 785                        unsigned int state;
 786
 787                        write_lock(&n->lock);
 788
 789                        state = n->nud_state;
 790                        if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
 791                                write_unlock(&n->lock);
 792                                goto next_elt;
 793                        }
 794
 795                        if (time_before(n->used, n->confirmed))
 796                                n->used = n->confirmed;
 797
 798                        if (atomic_read(&n->refcnt) == 1 &&
 799                            (state == NUD_FAILED ||
 800                             time_after(jiffies, n->used + n->parms->gc_staletime))) {
 801                                *np = n->next;
 802                                n->dead = 1;
 803                                write_unlock(&n->lock);
 804                                neigh_cleanup_and_release(n);
 805                                continue;
 806                        }
 807                        write_unlock(&n->lock);
 808
 809next_elt:
 810                        np = &n->next;
 811                }
 812                /*
 813                 * It's fine to release lock here, even if hash table
 814                 * grows while we are preempted.
 815                 */
 816                write_unlock_bh(&tbl->lock);
 817                cond_resched();
 818                write_lock_bh(&tbl->lock);
 819                nht = rcu_dereference_protected(tbl->nht,
 820                                                lockdep_is_held(&tbl->lock));
 821        }
 822out:
 823        /* Cycle through all hash buckets every base_reachable_time/2 ticks.
 824         * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 825         * base_reachable_time.
 826         */
 827        schedule_delayed_work(&tbl->gc_work,
 828                              tbl->parms.base_reachable_time >> 1);
 829        write_unlock_bh(&tbl->lock);
 830}
 831
 832static __inline__ int neigh_max_probes(struct neighbour *n)
 833{
 834        struct neigh_parms *p = n->parms;
 835        return (n->nud_state & NUD_PROBE) ?
 836                p->ucast_probes :
 837                p->ucast_probes + p->app_probes + p->mcast_probes;
 838}
 839
 840static void neigh_invalidate(struct neighbour *neigh)
 841        __releases(neigh->lock)
 842        __acquires(neigh->lock)
 843{
 844        struct sk_buff *skb;
 845
 846        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
 847        neigh_dbg(2, "neigh %p is failed\n", neigh);
 848        neigh->updated = jiffies;
 849
 850        /* It is very thin place. report_unreachable is very complicated
 851           routine. Particularly, it can hit the same neighbour entry!
 852
 853           So that, we try to be accurate and avoid dead loop. --ANK
 854         */
 855        while (neigh->nud_state == NUD_FAILED &&
 856               (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
 857                write_unlock(&neigh->lock);
 858                neigh->ops->error_report(neigh, skb);
 859                write_lock(&neigh->lock);
 860        }
 861        skb_queue_purge(&neigh->arp_queue);
 862        neigh->arp_queue_len_bytes = 0;
 863}
 864
 865static void neigh_probe(struct neighbour *neigh)
 866        __releases(neigh->lock)
 867{
 868        struct sk_buff *skb = skb_peek(&neigh->arp_queue);
 869        /* keep skb alive even if arp_queue overflows */
 870        if (skb)
 871                skb = skb_copy(skb, GFP_ATOMIC);
 872        write_unlock(&neigh->lock);
 873        neigh->ops->solicit(neigh, skb);
 874        atomic_inc(&neigh->probes);
 875        kfree_skb(skb);
 876}
 877
 878/* Called when a timer expires for a neighbour entry. */
 879
 880static void neigh_timer_handler(unsigned long arg)
 881{
 882        unsigned long now, next;
 883        struct neighbour *neigh = (struct neighbour *)arg;
 884        unsigned int state;
 885        int notify = 0;
 886
 887        write_lock(&neigh->lock);
 888
 889        state = neigh->nud_state;
 890        now = jiffies;
 891        next = now + HZ;
 892
 893        if (!(state & NUD_IN_TIMER))
 894                goto out;
 895
 896        if (state & NUD_REACHABLE) {
 897                if (time_before_eq(now,
 898                                   neigh->confirmed + neigh->parms->reachable_time)) {
 899                        neigh_dbg(2, "neigh %p is still alive\n", neigh);
 900                        next = neigh->confirmed + neigh->parms->reachable_time;
 901                } else if (time_before_eq(now,
 902                                          neigh->used + neigh->parms->delay_probe_time)) {
 903                        neigh_dbg(2, "neigh %p is delayed\n", neigh);
 904                        neigh->nud_state = NUD_DELAY;
 905                        neigh->updated = jiffies;
 906                        neigh_suspect(neigh);
 907                        next = now + neigh->parms->delay_probe_time;
 908                } else {
 909                        neigh_dbg(2, "neigh %p is suspected\n", neigh);
 910                        neigh->nud_state = NUD_STALE;
 911                        neigh->updated = jiffies;
 912                        neigh_suspect(neigh);
 913                        notify = 1;
 914                }
 915        } else if (state & NUD_DELAY) {
 916                if (time_before_eq(now,
 917                                   neigh->confirmed + neigh->parms->delay_probe_time)) {
 918                        neigh_dbg(2, "neigh %p is now reachable\n", neigh);
 919                        neigh->nud_state = NUD_REACHABLE;
 920                        neigh->updated = jiffies;
 921                        neigh_connect(neigh);
 922                        notify = 1;
 923                        next = neigh->confirmed + neigh->parms->reachable_time;
 924                } else {
 925                        neigh_dbg(2, "neigh %p is probed\n", neigh);
 926                        neigh->nud_state = NUD_PROBE;
 927                        neigh->updated = jiffies;
 928                        atomic_set(&neigh->probes, 0);
 929                        next = now + neigh->parms->retrans_time;
 930                }
 931        } else {
 932                /* NUD_PROBE|NUD_INCOMPLETE */
 933                next = now + neigh->parms->retrans_time;
 934        }
 935
 936        if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
 937            atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
 938                neigh->nud_state = NUD_FAILED;
 939                notify = 1;
 940                neigh_invalidate(neigh);
 941        }
 942
 943        if (neigh->nud_state & NUD_IN_TIMER) {
 944                if (time_before(next, jiffies + HZ/2))
 945                        next = jiffies + HZ/2;
 946                if (!mod_timer(&neigh->timer, next))
 947                        neigh_hold(neigh);
 948        }
 949        if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
 950                neigh_probe(neigh);
 951        } else {
 952out:
 953                write_unlock(&neigh->lock);
 954        }
 955
 956        if (notify)
 957                neigh_update_notify(neigh);
 958
 959        neigh_release(neigh);
 960}
 961
 962int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 963{
 964        int rc;
 965        bool immediate_probe = false;
 966
 967        write_lock_bh(&neigh->lock);
 968
 969        rc = 0;
 970        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
 971                goto out_unlock_bh;
 972
 973        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
 974                if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
 975                        unsigned long next, now = jiffies;
 976
 977                        atomic_set(&neigh->probes, neigh->parms->ucast_probes);
 978                        neigh->nud_state     = NUD_INCOMPLETE;
 979                        neigh->updated = now;
 980                        next = now + max(neigh->parms->retrans_time, HZ/2);
 981                        neigh_add_timer(neigh, next);
 982                        immediate_probe = true;
 983                } else {
 984                        neigh->nud_state = NUD_FAILED;
 985                        neigh->updated = jiffies;
 986                        write_unlock_bh(&neigh->lock);
 987
 988                        kfree_skb(skb);
 989                        return 1;
 990                }
 991        } else if (neigh->nud_state & NUD_STALE) {
 992                neigh_dbg(2, "neigh %p is delayed\n", neigh);
 993                neigh->nud_state = NUD_DELAY;
 994                neigh->updated = jiffies;
 995                neigh_add_timer(neigh,
 996                                jiffies + neigh->parms->delay_probe_time);
 997        }
 998
 999        if (neigh->nud_state == NUD_INCOMPLETE) {
1000                if (skb) {
1001                        while (neigh->arp_queue_len_bytes + skb->truesize >
1002                               neigh->parms->queue_len_bytes) {
1003                                struct sk_buff *buff;
1004
1005                                buff = __skb_dequeue(&neigh->arp_queue);
1006                                if (!buff)
1007                                        break;
1008                                neigh->arp_queue_len_bytes -= buff->truesize;
1009                                kfree_skb(buff);
1010                                NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1011                        }
1012                        skb_dst_force(skb);
1013                        __skb_queue_tail(&neigh->arp_queue, skb);
1014                        neigh->arp_queue_len_bytes += skb->truesize;
1015                }
1016                rc = 1;
1017        }
1018out_unlock_bh:
1019        if (immediate_probe)
1020                neigh_probe(neigh);
1021        else
1022                write_unlock(&neigh->lock);
1023        local_bh_enable();
1024        return rc;
1025}
1026EXPORT_SYMBOL(__neigh_event_send);
1027
1028static void neigh_update_hhs(struct neighbour *neigh)
1029{
1030        struct hh_cache *hh;
1031        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1032                = NULL;
1033
1034        if (neigh->dev->header_ops)
1035                update = neigh->dev->header_ops->cache_update;
1036
1037        if (update) {
1038                hh = &neigh->hh;
1039                if (hh->hh_len) {
1040                        write_seqlock_bh(&hh->hh_lock);
1041                        update(hh, neigh->dev, neigh->ha);
1042                        write_sequnlock_bh(&hh->hh_lock);
1043                }
1044        }
1045}
1046
1047
1048
1049/* Generic update routine.
1050   -- lladdr is new lladdr or NULL, if it is not supplied.
1051   -- new    is new state.
1052   -- flags
1053        NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1054                                if it is different.
1055        NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1056                                lladdr instead of overriding it
1057                                if it is different.
1058                                It also allows to retain current state
1059                                if lladdr is unchanged.
1060        NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1061
1062        NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1063                                NTF_ROUTER flag.
1064        NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1065                                a router.
1066
1067   Caller MUST hold reference count on the entry.
1068 */
1069
1070int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071                 u32 flags)
1072{
1073        u8 old;
1074        int err;
1075        int notify = 0;
1076        struct net_device *dev;
1077        int update_isrouter = 0;
1078
1079        write_lock_bh(&neigh->lock);
1080
1081        dev    = neigh->dev;
1082        old    = neigh->nud_state;
1083        err    = -EPERM;
1084
1085        if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1086            (old & (NUD_NOARP | NUD_PERMANENT)))
1087                goto out;
1088
1089        if (!(new & NUD_VALID)) {
1090                neigh_del_timer(neigh);
1091                if (old & NUD_CONNECTED)
1092                        neigh_suspect(neigh);
1093                neigh->nud_state = new;
1094                err = 0;
1095                notify = old & NUD_VALID;
1096                if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1097                    (new & NUD_FAILED)) {
1098                        neigh_invalidate(neigh);
1099                        notify = 1;
1100                }
1101                goto out;
1102        }
1103
1104        /* Compare new lladdr with cached one */
1105        if (!dev->addr_len) {
1106                /* First case: device needs no address. */
1107                lladdr = neigh->ha;
1108        } else if (lladdr) {
1109                /* The second case: if something is already cached
1110                   and a new address is proposed:
1111                   - compare new & old
1112                   - if they are different, check override flag
1113                 */
1114                if ((old & NUD_VALID) &&
1115                    !memcmp(lladdr, neigh->ha, dev->addr_len))
1116                        lladdr = neigh->ha;
1117        } else {
1118                /* No address is supplied; if we know something,
1119                   use it, otherwise discard the request.
1120                 */
1121                err = -EINVAL;
1122                if (!(old & NUD_VALID))
1123                        goto out;
1124                lladdr = neigh->ha;
1125        }
1126
1127        if (new & NUD_CONNECTED)
1128                neigh->confirmed = jiffies;
1129        neigh->updated = jiffies;
1130
1131        /* If entry was valid and address is not changed,
1132           do not change entry state, if new one is STALE.
1133         */
1134        err = 0;
1135        update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1136        if (old & NUD_VALID) {
1137                if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1138                        update_isrouter = 0;
1139                        if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1140                            (old & NUD_CONNECTED)) {
1141                                lladdr = neigh->ha;
1142                                new = NUD_STALE;
1143                        } else
1144                                goto out;
1145                } else {
1146                        if (lladdr == neigh->ha && new == NUD_STALE &&
1147                            ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1148                             (old & NUD_CONNECTED))
1149                            )
1150                                new = old;
1151                }
1152        }
1153
1154        if (new != old) {
1155                neigh_del_timer(neigh);
1156                if (new & NUD_IN_TIMER)
1157                        neigh_add_timer(neigh, (jiffies +
1158                                                ((new & NUD_REACHABLE) ?
1159                                                 neigh->parms->reachable_time :
1160                                                 0)));
1161                neigh->nud_state = new;
1162        }
1163
1164        if (lladdr != neigh->ha) {
1165                write_seqlock(&neigh->ha_lock);
1166                memcpy(&neigh->ha, lladdr, dev->addr_len);
1167                write_sequnlock(&neigh->ha_lock);
1168                neigh_update_hhs(neigh);
1169                if (!(new & NUD_CONNECTED))
1170                        neigh->confirmed = jiffies -
1171                                      (neigh->parms->base_reachable_time << 1);
1172                notify = 1;
1173        }
1174        if (new == old)
1175                goto out;
1176        if (new & NUD_CONNECTED)
1177                neigh_connect(neigh);
1178        else
1179                neigh_suspect(neigh);
1180        if (!(old & NUD_VALID)) {
1181                struct sk_buff *skb;
1182
1183                /* Again: avoid dead loop if something went wrong */
1184
1185                while (neigh->nud_state & NUD_VALID &&
1186                       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1187                        struct dst_entry *dst = skb_dst(skb);
1188                        struct neighbour *n2, *n1 = neigh;
1189                        write_unlock_bh(&neigh->lock);
1190
1191                        rcu_read_lock();
1192
1193                        /* Why not just use 'neigh' as-is?  The problem is that
1194                         * things such as shaper, eql, and sch_teql can end up
1195                         * using alternative, different, neigh objects to output
1196                         * the packet in the output path.  So what we need to do
1197                         * here is re-lookup the top-level neigh in the path so
1198                         * we can reinject the packet there.
1199                         */
1200                        n2 = NULL;
1201                        if (dst) {
1202                                n2 = dst_neigh_lookup_skb(dst, skb);
1203                                if (n2)
1204                                        n1 = n2;
1205                        }
1206                        n1->output(n1, skb);
1207                        if (n2)
1208                                neigh_release(n2);
1209                        rcu_read_unlock();
1210
1211                        write_lock_bh(&neigh->lock);
1212                }
1213                skb_queue_purge(&neigh->arp_queue);
1214                neigh->arp_queue_len_bytes = 0;
1215        }
1216out:
1217        if (update_isrouter) {
1218                neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1219                        (neigh->flags | NTF_ROUTER) :
1220                        (neigh->flags & ~NTF_ROUTER);
1221        }
1222        write_unlock_bh(&neigh->lock);
1223
1224        if (notify)
1225                neigh_update_notify(neigh);
1226
1227        return err;
1228}
1229EXPORT_SYMBOL(neigh_update);
1230
1231struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1232                                 u8 *lladdr, void *saddr,
1233                                 struct net_device *dev)
1234{
1235        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1236                                                 lladdr || !dev->addr_len);
1237        if (neigh)
1238                neigh_update(neigh, lladdr, NUD_STALE,
1239                             NEIGH_UPDATE_F_OVERRIDE);
1240        return neigh;
1241}
1242EXPORT_SYMBOL(neigh_event_ns);
1243
1244/* called with read_lock_bh(&n->lock); */
1245static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1246{
1247        struct net_device *dev = dst->dev;
1248        __be16 prot = dst->ops->protocol;
1249        struct hh_cache *hh = &n->hh;
1250
1251        write_lock_bh(&n->lock);
1252
1253        /* Only one thread can come in here and initialize the
1254         * hh_cache entry.
1255         */
1256        if (!hh->hh_len)
1257                dev->header_ops->cache(n, hh, prot);
1258
1259        write_unlock_bh(&n->lock);
1260}
1261
1262/* This function can be used in contexts, where only old dev_queue_xmit
1263 * worked, f.e. if you want to override normal output path (eql, shaper),
1264 * but resolution is not made yet.
1265 */
1266
1267int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1268{
1269        struct net_device *dev = skb->dev;
1270
1271        __skb_pull(skb, skb_network_offset(skb));
1272
1273        if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1274                            skb->len) < 0 &&
1275            dev->header_ops->rebuild(skb))
1276                return 0;
1277
1278        return dev_queue_xmit(skb);
1279}
1280EXPORT_SYMBOL(neigh_compat_output);
1281
1282/* Slow and careful. */
1283
1284int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1285{
1286        struct dst_entry *dst = skb_dst(skb);
1287        int rc = 0;
1288
1289        if (!dst)
1290                goto discard;
1291
1292        if (!neigh_event_send(neigh, skb)) {
1293                int err;
1294                struct net_device *dev = neigh->dev;
1295                unsigned int seq;
1296
1297                if (dev->header_ops->cache && !neigh->hh.hh_len)
1298                        neigh_hh_init(neigh, dst);
1299
1300                do {
1301                        __skb_pull(skb, skb_network_offset(skb));
1302                        seq = read_seqbegin(&neigh->ha_lock);
1303                        err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1304                                              neigh->ha, NULL, skb->len);
1305                } while (read_seqretry(&neigh->ha_lock, seq));
1306
1307                if (err >= 0)
1308                        rc = dev_queue_xmit(skb);
1309                else
1310                        goto out_kfree_skb;
1311        }
1312out:
1313        return rc;
1314discard:
1315        neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1316out_kfree_skb:
1317        rc = -EINVAL;
1318        kfree_skb(skb);
1319        goto out;
1320}
1321EXPORT_SYMBOL(neigh_resolve_output);
1322
1323/* As fast as possible without hh cache */
1324
1325int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1326{
1327        struct net_device *dev = neigh->dev;
1328        unsigned int seq;
1329        int err;
1330
1331        do {
1332                __skb_pull(skb, skb_network_offset(skb));
1333                seq = read_seqbegin(&neigh->ha_lock);
1334                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335                                      neigh->ha, NULL, skb->len);
1336        } while (read_seqretry(&neigh->ha_lock, seq));
1337
1338        if (err >= 0)
1339                err = dev_queue_xmit(skb);
1340        else {
1341                err = -EINVAL;
1342                kfree_skb(skb);
1343        }
1344        return err;
1345}
1346EXPORT_SYMBOL(neigh_connected_output);
1347
1348int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1349{
1350        return dev_queue_xmit(skb);
1351}
1352EXPORT_SYMBOL(neigh_direct_output);
1353
1354static void neigh_proxy_process(unsigned long arg)
1355{
1356        struct neigh_table *tbl = (struct neigh_table *)arg;
1357        long sched_next = 0;
1358        unsigned long now = jiffies;
1359        struct sk_buff *skb, *n;
1360
1361        spin_lock(&tbl->proxy_queue.lock);
1362
1363        skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364                long tdif = NEIGH_CB(skb)->sched_next - now;
1365
1366                if (tdif <= 0) {
1367                        struct net_device *dev = skb->dev;
1368
1369                        __skb_unlink(skb, &tbl->proxy_queue);
1370                        if (tbl->proxy_redo && netif_running(dev)) {
1371                                rcu_read_lock();
1372                                tbl->proxy_redo(skb);
1373                                rcu_read_unlock();
1374                        } else {
1375                                kfree_skb(skb);
1376                        }
1377
1378                        dev_put(dev);
1379                } else if (!sched_next || tdif < sched_next)
1380                        sched_next = tdif;
1381        }
1382        del_timer(&tbl->proxy_timer);
1383        if (sched_next)
1384                mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385        spin_unlock(&tbl->proxy_queue.lock);
1386}
1387
1388void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389                    struct sk_buff *skb)
1390{
1391        unsigned long now = jiffies;
1392        unsigned long sched_next = now + (net_random() % p->proxy_delay);
1393
1394        if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1395                kfree_skb(skb);
1396                return;
1397        }
1398
1399        NEIGH_CB(skb)->sched_next = sched_next;
1400        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1401
1402        spin_lock(&tbl->proxy_queue.lock);
1403        if (del_timer(&tbl->proxy_timer)) {
1404                if (time_before(tbl->proxy_timer.expires, sched_next))
1405                        sched_next = tbl->proxy_timer.expires;
1406        }
1407        skb_dst_drop(skb);
1408        dev_hold(skb->dev);
1409        __skb_queue_tail(&tbl->proxy_queue, skb);
1410        mod_timer(&tbl->proxy_timer, sched_next);
1411        spin_unlock(&tbl->proxy_queue.lock);
1412}
1413EXPORT_SYMBOL(pneigh_enqueue);
1414
1415static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1416                                                      struct net *net, int ifindex)
1417{
1418        struct neigh_parms *p;
1419
1420        for (p = &tbl->parms; p; p = p->next) {
1421                if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422                    (!p->dev && !ifindex))
1423                        return p;
1424        }
1425
1426        return NULL;
1427}
1428
1429struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430                                      struct neigh_table *tbl)
1431{
1432        struct neigh_parms *p, *ref;
1433        struct net *net = dev_net(dev);
1434        const struct net_device_ops *ops = dev->netdev_ops;
1435
1436        ref = lookup_neigh_parms(tbl, net, 0);
1437        if (!ref)
1438                return NULL;
1439
1440        p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441        if (p) {
1442                p->tbl            = tbl;
1443                atomic_set(&p->refcnt, 1);
1444                p->reachable_time =
1445                                neigh_rand_reach_time(p->base_reachable_time);
1446
1447                if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1448                        kfree(p);
1449                        return NULL;
1450                }
1451
1452                dev_hold(dev);
1453                p->dev = dev;
1454                write_pnet(&p->net, hold_net(net));
1455                p->sysctl_table = NULL;
1456                write_lock_bh(&tbl->lock);
1457                p->next         = tbl->parms.next;
1458                tbl->parms.next = p;
1459                write_unlock_bh(&tbl->lock);
1460        }
1461        return p;
1462}
1463EXPORT_SYMBOL(neigh_parms_alloc);
1464
1465static void neigh_rcu_free_parms(struct rcu_head *head)
1466{
1467        struct neigh_parms *parms =
1468                container_of(head, struct neigh_parms, rcu_head);
1469
1470        neigh_parms_put(parms);
1471}
1472
1473void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474{
1475        struct neigh_parms **p;
1476
1477        if (!parms || parms == &tbl->parms)
1478                return;
1479        write_lock_bh(&tbl->lock);
1480        for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481                if (*p == parms) {
1482                        *p = parms->next;
1483                        parms->dead = 1;
1484                        write_unlock_bh(&tbl->lock);
1485                        if (parms->dev)
1486                                dev_put(parms->dev);
1487                        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488                        return;
1489                }
1490        }
1491        write_unlock_bh(&tbl->lock);
1492        neigh_dbg(1, "%s: not found\n", __func__);
1493}
1494EXPORT_SYMBOL(neigh_parms_release);
1495
1496static void neigh_parms_destroy(struct neigh_parms *parms)
1497{
1498        release_net(neigh_parms_net(parms));
1499        kfree(parms);
1500}
1501
1502static struct lock_class_key neigh_table_proxy_queue_class;
1503
1504static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505{
1506        unsigned long now = jiffies;
1507        unsigned long phsize;
1508
1509        write_pnet(&tbl->parms.net, &init_net);
1510        atomic_set(&tbl->parms.refcnt, 1);
1511        tbl->parms.reachable_time =
1512                          neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513
1514        tbl->stats = alloc_percpu(struct neigh_statistics);
1515        if (!tbl->stats)
1516                panic("cannot create neighbour cache statistics");
1517
1518#ifdef CONFIG_PROC_FS
1519        if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520                              &neigh_stat_seq_fops, tbl))
1521                panic("cannot create neighbour proc dir entry");
1522#endif
1523
1524        RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525
1526        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528
1529        if (!tbl->nht || !tbl->phash_buckets)
1530                panic("cannot allocate neighbour cache hashes");
1531
1532        if (!tbl->entry_size)
1533                tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1534                                        tbl->key_len, NEIGH_PRIV_ALIGN);
1535        else
1536                WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1537
1538        rwlock_init(&tbl->lock);
1539        INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1540        schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1541        setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542        skb_queue_head_init_class(&tbl->proxy_queue,
1543                        &neigh_table_proxy_queue_class);
1544
1545        tbl->last_flush = now;
1546        tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1547}
1548
1549void neigh_table_init(struct neigh_table *tbl)
1550{
1551        struct neigh_table *tmp;
1552
1553        neigh_table_init_no_netlink(tbl);
1554        write_lock(&neigh_tbl_lock);
1555        for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1556                if (tmp->family == tbl->family)
1557                        break;
1558        }
1559        tbl->next       = neigh_tables;
1560        neigh_tables    = tbl;
1561        write_unlock(&neigh_tbl_lock);
1562
1563        if (unlikely(tmp)) {
1564                pr_err("Registering multiple tables for family %d\n",
1565                       tbl->family);
1566                dump_stack();
1567        }
1568}
1569EXPORT_SYMBOL(neigh_table_init);
1570
1571int neigh_table_clear(struct neigh_table *tbl)
1572{
1573        struct neigh_table **tp;
1574
1575        /* It is not clean... Fix it to unload IPv6 module safely */
1576        cancel_delayed_work_sync(&tbl->gc_work);
1577        del_timer_sync(&tbl->proxy_timer);
1578        pneigh_queue_purge(&tbl->proxy_queue);
1579        neigh_ifdown(tbl, NULL);
1580        if (atomic_read(&tbl->entries))
1581                pr_crit("neighbour leakage\n");
1582        write_lock(&neigh_tbl_lock);
1583        for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1584                if (*tp == tbl) {
1585                        *tp = tbl->next;
1586                        break;
1587                }
1588        }
1589        write_unlock(&neigh_tbl_lock);
1590
1591        call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1592                 neigh_hash_free_rcu);
1593        tbl->nht = NULL;
1594
1595        kfree(tbl->phash_buckets);
1596        tbl->phash_buckets = NULL;
1597
1598        remove_proc_entry(tbl->id, init_net.proc_net_stat);
1599
1600        free_percpu(tbl->stats);
1601        tbl->stats = NULL;
1602
1603        return 0;
1604}
1605EXPORT_SYMBOL(neigh_table_clear);
1606
1607static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1608{
1609        struct net *net = sock_net(skb->sk);
1610        struct ndmsg *ndm;
1611        struct nlattr *dst_attr;
1612        struct neigh_table *tbl;
1613        struct net_device *dev = NULL;
1614        int err = -EINVAL;
1615
1616        ASSERT_RTNL();
1617        if (nlmsg_len(nlh) < sizeof(*ndm))
1618                goto out;
1619
1620        dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1621        if (dst_attr == NULL)
1622                goto out;
1623
1624        ndm = nlmsg_data(nlh);
1625        if (ndm->ndm_ifindex) {
1626                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1627                if (dev == NULL) {
1628                        err = -ENODEV;
1629                        goto out;
1630                }
1631        }
1632
1633        read_lock(&neigh_tbl_lock);
1634        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1635                struct neighbour *neigh;
1636
1637                if (tbl->family != ndm->ndm_family)
1638                        continue;
1639                read_unlock(&neigh_tbl_lock);
1640
1641                if (nla_len(dst_attr) < tbl->key_len)
1642                        goto out;
1643
1644                if (ndm->ndm_flags & NTF_PROXY) {
1645                        err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1646                        goto out;
1647                }
1648
1649                if (dev == NULL)
1650                        goto out;
1651
1652                neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1653                if (neigh == NULL) {
1654                        err = -ENOENT;
1655                        goto out;
1656                }
1657
1658                err = neigh_update(neigh, NULL, NUD_FAILED,
1659                                   NEIGH_UPDATE_F_OVERRIDE |
1660                                   NEIGH_UPDATE_F_ADMIN);
1661                neigh_release(neigh);
1662                goto out;
1663        }
1664        read_unlock(&neigh_tbl_lock);
1665        err = -EAFNOSUPPORT;
1666
1667out:
1668        return err;
1669}
1670
1671static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1672{
1673        struct net *net = sock_net(skb->sk);
1674        struct ndmsg *ndm;
1675        struct nlattr *tb[NDA_MAX+1];
1676        struct neigh_table *tbl;
1677        struct net_device *dev = NULL;
1678        int err;
1679
1680        ASSERT_RTNL();
1681        err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1682        if (err < 0)
1683                goto out;
1684
1685        err = -EINVAL;
1686        if (tb[NDA_DST] == NULL)
1687                goto out;
1688
1689        ndm = nlmsg_data(nlh);
1690        if (ndm->ndm_ifindex) {
1691                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1692                if (dev == NULL) {
1693                        err = -ENODEV;
1694                        goto out;
1695                }
1696
1697                if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1698                        goto out;
1699        }
1700
1701        read_lock(&neigh_tbl_lock);
1702        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1703                int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1704                struct neighbour *neigh;
1705                void *dst, *lladdr;
1706
1707                if (tbl->family != ndm->ndm_family)
1708                        continue;
1709                read_unlock(&neigh_tbl_lock);
1710
1711                if (nla_len(tb[NDA_DST]) < tbl->key_len)
1712                        goto out;
1713                dst = nla_data(tb[NDA_DST]);
1714                lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1715
1716                if (ndm->ndm_flags & NTF_PROXY) {
1717                        struct pneigh_entry *pn;
1718
1719                        err = -ENOBUFS;
1720                        pn = pneigh_lookup(tbl, net, dst, dev, 1);
1721                        if (pn) {
1722                                pn->flags = ndm->ndm_flags;
1723                                err = 0;
1724                        }
1725                        goto out;
1726                }
1727
1728                if (dev == NULL)
1729                        goto out;
1730
1731                neigh = neigh_lookup(tbl, dst, dev);
1732                if (neigh == NULL) {
1733                        if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1734                                err = -ENOENT;
1735                                goto out;
1736                        }
1737
1738                        neigh = __neigh_lookup_errno(tbl, dst, dev);
1739                        if (IS_ERR(neigh)) {
1740                                err = PTR_ERR(neigh);
1741                                goto out;
1742                        }
1743                } else {
1744                        if (nlh->nlmsg_flags & NLM_F_EXCL) {
1745                                err = -EEXIST;
1746                                neigh_release(neigh);
1747                                goto out;
1748                        }
1749
1750                        if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1751                                flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1752                }
1753
1754                if (ndm->ndm_flags & NTF_USE) {
1755                        neigh_event_send(neigh, NULL);
1756                        err = 0;
1757                } else
1758                        err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1759                neigh_release(neigh);
1760                goto out;
1761        }
1762
1763        read_unlock(&neigh_tbl_lock);
1764        err = -EAFNOSUPPORT;
1765out:
1766        return err;
1767}
1768
1769static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1770{
1771        struct nlattr *nest;
1772
1773        nest = nla_nest_start(skb, NDTA_PARMS);
1774        if (nest == NULL)
1775                return -ENOBUFS;
1776
1777        if ((parms->dev &&
1778             nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1779            nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1780            nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1781            /* approximative value for deprecated QUEUE_LEN (in packets) */
1782            nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783                        parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784            nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1785            nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1786            nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1787            nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1788            nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1789            nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1790                          parms->base_reachable_time) ||
1791            nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1792            nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1793                          parms->delay_probe_time) ||
1794            nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1795            nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1796            nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1797            nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1798                goto nla_put_failure;
1799        return nla_nest_end(skb, nest);
1800
1801nla_put_failure:
1802        nla_nest_cancel(skb, nest);
1803        return -EMSGSIZE;
1804}
1805
1806static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1807                              u32 pid, u32 seq, int type, int flags)
1808{
1809        struct nlmsghdr *nlh;
1810        struct ndtmsg *ndtmsg;
1811
1812        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1813        if (nlh == NULL)
1814                return -EMSGSIZE;
1815
1816        ndtmsg = nlmsg_data(nlh);
1817
1818        read_lock_bh(&tbl->lock);
1819        ndtmsg->ndtm_family = tbl->family;
1820        ndtmsg->ndtm_pad1   = 0;
1821        ndtmsg->ndtm_pad2   = 0;
1822
1823        if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1824            nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1825            nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1826            nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1827            nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1828                goto nla_put_failure;
1829        {
1830                unsigned long now = jiffies;
1831                unsigned int flush_delta = now - tbl->last_flush;
1832                unsigned int rand_delta = now - tbl->last_rand;
1833                struct neigh_hash_table *nht;
1834                struct ndt_config ndc = {
1835                        .ndtc_key_len           = tbl->key_len,
1836                        .ndtc_entry_size        = tbl->entry_size,
1837                        .ndtc_entries           = atomic_read(&tbl->entries),
1838                        .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1839                        .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1840                        .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1841                };
1842
1843                rcu_read_lock_bh();
1844                nht = rcu_dereference_bh(tbl->nht);
1845                ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1846                ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1847                rcu_read_unlock_bh();
1848
1849                if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1850                        goto nla_put_failure;
1851        }
1852
1853        {
1854                int cpu;
1855                struct ndt_stats ndst;
1856
1857                memset(&ndst, 0, sizeof(ndst));
1858
1859                for_each_possible_cpu(cpu) {
1860                        struct neigh_statistics *st;
1861
1862                        st = per_cpu_ptr(tbl->stats, cpu);
1863                        ndst.ndts_allocs                += st->allocs;
1864                        ndst.ndts_destroys              += st->destroys;
1865                        ndst.ndts_hash_grows            += st->hash_grows;
1866                        ndst.ndts_res_failed            += st->res_failed;
1867                        ndst.ndts_lookups               += st->lookups;
1868                        ndst.ndts_hits                  += st->hits;
1869                        ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1870                        ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1871                        ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1872                        ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1873                }
1874
1875                if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1876                        goto nla_put_failure;
1877        }
1878
1879        BUG_ON(tbl->parms.dev);
1880        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1881                goto nla_put_failure;
1882
1883        read_unlock_bh(&tbl->lock);
1884        return nlmsg_end(skb, nlh);
1885
1886nla_put_failure:
1887        read_unlock_bh(&tbl->lock);
1888        nlmsg_cancel(skb, nlh);
1889        return -EMSGSIZE;
1890}
1891
1892static int neightbl_fill_param_info(struct sk_buff *skb,
1893                                    struct neigh_table *tbl,
1894                                    struct neigh_parms *parms,
1895                                    u32 pid, u32 seq, int type,
1896                                    unsigned int flags)
1897{
1898        struct ndtmsg *ndtmsg;
1899        struct nlmsghdr *nlh;
1900
1901        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1902        if (nlh == NULL)
1903                return -EMSGSIZE;
1904
1905        ndtmsg = nlmsg_data(nlh);
1906
1907        read_lock_bh(&tbl->lock);
1908        ndtmsg->ndtm_family = tbl->family;
1909        ndtmsg->ndtm_pad1   = 0;
1910        ndtmsg->ndtm_pad2   = 0;
1911
1912        if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1913            neightbl_fill_parms(skb, parms) < 0)
1914                goto errout;
1915
1916        read_unlock_bh(&tbl->lock);
1917        return nlmsg_end(skb, nlh);
1918errout:
1919        read_unlock_bh(&tbl->lock);
1920        nlmsg_cancel(skb, nlh);
1921        return -EMSGSIZE;
1922}
1923
1924static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1925        [NDTA_NAME]             = { .type = NLA_STRING },
1926        [NDTA_THRESH1]          = { .type = NLA_U32 },
1927        [NDTA_THRESH2]          = { .type = NLA_U32 },
1928        [NDTA_THRESH3]          = { .type = NLA_U32 },
1929        [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1930        [NDTA_PARMS]            = { .type = NLA_NESTED },
1931};
1932
1933static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1934        [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1935        [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1936        [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1937        [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1938        [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1939        [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1940        [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1941        [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1942        [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1943        [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1944        [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1945        [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1946        [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1947};
1948
1949static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1950{
1951        struct net *net = sock_net(skb->sk);
1952        struct neigh_table *tbl;
1953        struct ndtmsg *ndtmsg;
1954        struct nlattr *tb[NDTA_MAX+1];
1955        int err;
1956
1957        err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1958                          nl_neightbl_policy);
1959        if (err < 0)
1960                goto errout;
1961
1962        if (tb[NDTA_NAME] == NULL) {
1963                err = -EINVAL;
1964                goto errout;
1965        }
1966
1967        ndtmsg = nlmsg_data(nlh);
1968        read_lock(&neigh_tbl_lock);
1969        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1970                if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1971                        continue;
1972
1973                if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1974                        break;
1975        }
1976
1977        if (tbl == NULL) {
1978                err = -ENOENT;
1979                goto errout_locked;
1980        }
1981
1982        /*
1983         * We acquire tbl->lock to be nice to the periodic timers and
1984         * make sure they always see a consistent set of values.
1985         */
1986        write_lock_bh(&tbl->lock);
1987
1988        if (tb[NDTA_PARMS]) {
1989                struct nlattr *tbp[NDTPA_MAX+1];
1990                struct neigh_parms *p;
1991                int i, ifindex = 0;
1992
1993                err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1994                                       nl_ntbl_parm_policy);
1995                if (err < 0)
1996                        goto errout_tbl_lock;
1997
1998                if (tbp[NDTPA_IFINDEX])
1999                        ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2000
2001                p = lookup_neigh_parms(tbl, net, ifindex);
2002                if (p == NULL) {
2003                        err = -ENOENT;
2004                        goto errout_tbl_lock;
2005                }
2006
2007                for (i = 1; i <= NDTPA_MAX; i++) {
2008                        if (tbp[i] == NULL)
2009                                continue;
2010
2011                        switch (i) {
2012                        case NDTPA_QUEUE_LEN:
2013                                p->queue_len_bytes = nla_get_u32(tbp[i]) *
2014                                                     SKB_TRUESIZE(ETH_FRAME_LEN);
2015                                break;
2016                        case NDTPA_QUEUE_LENBYTES:
2017                                p->queue_len_bytes = nla_get_u32(tbp[i]);
2018                                break;
2019                        case NDTPA_PROXY_QLEN:
2020                                p->proxy_qlen = nla_get_u32(tbp[i]);
2021                                break;
2022                        case NDTPA_APP_PROBES:
2023                                p->app_probes = nla_get_u32(tbp[i]);
2024                                break;
2025                        case NDTPA_UCAST_PROBES:
2026                                p->ucast_probes = nla_get_u32(tbp[i]);
2027                                break;
2028                        case NDTPA_MCAST_PROBES:
2029                                p->mcast_probes = nla_get_u32(tbp[i]);
2030                                break;
2031                        case NDTPA_BASE_REACHABLE_TIME:
2032                                p->base_reachable_time = nla_get_msecs(tbp[i]);
2033                                break;
2034                        case NDTPA_GC_STALETIME:
2035                                p->gc_staletime = nla_get_msecs(tbp[i]);
2036                                break;
2037                        case NDTPA_DELAY_PROBE_TIME:
2038                                p->delay_probe_time = nla_get_msecs(tbp[i]);
2039                                break;
2040                        case NDTPA_RETRANS_TIME:
2041                                p->retrans_time = nla_get_msecs(tbp[i]);
2042                                break;
2043                        case NDTPA_ANYCAST_DELAY:
2044                                p->anycast_delay = nla_get_msecs(tbp[i]);
2045                                break;
2046                        case NDTPA_PROXY_DELAY:
2047                                p->proxy_delay = nla_get_msecs(tbp[i]);
2048                                break;
2049                        case NDTPA_LOCKTIME:
2050                                p->locktime = nla_get_msecs(tbp[i]);
2051                                break;
2052                        }
2053                }
2054        }
2055
2056        if (tb[NDTA_THRESH1])
2057                tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2058
2059        if (tb[NDTA_THRESH2])
2060                tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2061
2062        if (tb[NDTA_THRESH3])
2063                tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2064
2065        if (tb[NDTA_GC_INTERVAL])
2066                tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2067
2068        err = 0;
2069
2070errout_tbl_lock:
2071        write_unlock_bh(&tbl->lock);
2072errout_locked:
2073        read_unlock(&neigh_tbl_lock);
2074errout:
2075        return err;
2076}
2077
2078static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2079{
2080        struct net *net = sock_net(skb->sk);
2081        int family, tidx, nidx = 0;
2082        int tbl_skip = cb->args[0];
2083        int neigh_skip = cb->args[1];
2084        struct neigh_table *tbl;
2085
2086        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2087
2088        read_lock(&neigh_tbl_lock);
2089        for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2090                struct neigh_parms *p;
2091
2092                if (tidx < tbl_skip || (family && tbl->family != family))
2093                        continue;
2094
2095                if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2096                                       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2097                                       NLM_F_MULTI) <= 0)
2098                        break;
2099
2100                for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2101                        if (!net_eq(neigh_parms_net(p), net))
2102                                continue;
2103
2104                        if (nidx < neigh_skip)
2105                                goto next;
2106
2107                        if (neightbl_fill_param_info(skb, tbl, p,
2108                                                     NETLINK_CB(cb->skb).portid,
2109                                                     cb->nlh->nlmsg_seq,
2110                                                     RTM_NEWNEIGHTBL,
2111                                                     NLM_F_MULTI) <= 0)
2112                                goto out;
2113                next:
2114                        nidx++;
2115                }
2116
2117                neigh_skip = 0;
2118        }
2119out:
2120        read_unlock(&neigh_tbl_lock);
2121        cb->args[0] = tidx;
2122        cb->args[1] = nidx;
2123
2124        return skb->len;
2125}
2126
2127static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2128                           u32 pid, u32 seq, int type, unsigned int flags)
2129{
2130        unsigned long now = jiffies;
2131        struct nda_cacheinfo ci;
2132        struct nlmsghdr *nlh;
2133        struct ndmsg *ndm;
2134
2135        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2136        if (nlh == NULL)
2137                return -EMSGSIZE;
2138
2139        ndm = nlmsg_data(nlh);
2140        ndm->ndm_family  = neigh->ops->family;
2141        ndm->ndm_pad1    = 0;
2142        ndm->ndm_pad2    = 0;
2143        ndm->ndm_flags   = neigh->flags;
2144        ndm->ndm_type    = neigh->type;
2145        ndm->ndm_ifindex = neigh->dev->ifindex;
2146
2147        if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2148                goto nla_put_failure;
2149
2150        read_lock_bh(&neigh->lock);
2151        ndm->ndm_state   = neigh->nud_state;
2152        if (neigh->nud_state & NUD_VALID) {
2153                char haddr[MAX_ADDR_LEN];
2154
2155                neigh_ha_snapshot(haddr, neigh, neigh->dev);
2156                if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2157                        read_unlock_bh(&neigh->lock);
2158                        goto nla_put_failure;
2159                }
2160        }
2161
2162        ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2163        ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2164        ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2165        ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2166        read_unlock_bh(&neigh->lock);
2167
2168        if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2169            nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2170                goto nla_put_failure;
2171
2172        return nlmsg_end(skb, nlh);
2173
2174nla_put_failure:
2175        nlmsg_cancel(skb, nlh);
2176        return -EMSGSIZE;
2177}
2178
2179static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2180                            u32 pid, u32 seq, int type, unsigned int flags,
2181                            struct neigh_table *tbl)
2182{
2183        struct nlmsghdr *nlh;
2184        struct ndmsg *ndm;
2185
2186        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2187        if (nlh == NULL)
2188                return -EMSGSIZE;
2189
2190        ndm = nlmsg_data(nlh);
2191        ndm->ndm_family  = tbl->family;
2192        ndm->ndm_pad1    = 0;
2193        ndm->ndm_pad2    = 0;
2194        ndm->ndm_flags   = pn->flags | NTF_PROXY;
2195        ndm->ndm_type    = NDA_DST;
2196        ndm->ndm_ifindex = pn->dev->ifindex;
2197        ndm->ndm_state   = NUD_NONE;
2198
2199        if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2200                goto nla_put_failure;
2201
2202        return nlmsg_end(skb, nlh);
2203
2204nla_put_failure:
2205        nlmsg_cancel(skb, nlh);
2206        return -EMSGSIZE;
2207}
2208
2209static void neigh_update_notify(struct neighbour *neigh)
2210{
2211        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2212        __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2213}
2214
2215static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2216                            struct netlink_callback *cb)
2217{
2218        struct net *net = sock_net(skb->sk);
2219        struct neighbour *n;
2220        int rc, h, s_h = cb->args[1];
2221        int idx, s_idx = idx = cb->args[2];
2222        struct neigh_hash_table *nht;
2223
2224        rcu_read_lock_bh();
2225        nht = rcu_dereference_bh(tbl->nht);
2226
2227        for (h = s_h; h < (1 << nht->hash_shift); h++) {
2228                if (h > s_h)
2229                        s_idx = 0;
2230                for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2231                     n != NULL;
2232                     n = rcu_dereference_bh(n->next)) {
2233                        if (!net_eq(dev_net(n->dev), net))
2234                                continue;
2235                        if (idx < s_idx)
2236                                goto next;
2237                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2238                                            cb->nlh->nlmsg_seq,
2239                                            RTM_NEWNEIGH,
2240                                            NLM_F_MULTI) <= 0) {
2241                                rc = -1;
2242                                goto out;
2243                        }
2244next:
2245                        idx++;
2246                }
2247        }
2248        rc = skb->len;
2249out:
2250        rcu_read_unlock_bh();
2251        cb->args[1] = h;
2252        cb->args[2] = idx;
2253        return rc;
2254}
2255
2256static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2257                             struct netlink_callback *cb)
2258{
2259        struct pneigh_entry *n;
2260        struct net *net = sock_net(skb->sk);
2261        int rc, h, s_h = cb->args[3];
2262        int idx, s_idx = idx = cb->args[4];
2263
2264        read_lock_bh(&tbl->lock);
2265
2266        for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2267                if (h > s_h)
2268                        s_idx = 0;
2269                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2270                        if (dev_net(n->dev) != net)
2271                                continue;
2272                        if (idx < s_idx)
2273                                goto next;
2274                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2275                                            cb->nlh->nlmsg_seq,
2276                                            RTM_NEWNEIGH,
2277                                            NLM_F_MULTI, tbl) <= 0) {
2278                                read_unlock_bh(&tbl->lock);
2279                                rc = -1;
2280                                goto out;
2281                        }
2282                next:
2283                        idx++;
2284                }
2285        }
2286
2287        read_unlock_bh(&tbl->lock);
2288        rc = skb->len;
2289out:
2290        cb->args[3] = h;
2291        cb->args[4] = idx;
2292        return rc;
2293
2294}
2295
2296static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2297{
2298        struct neigh_table *tbl;
2299        int t, family, s_t;
2300        int proxy = 0;
2301        int err;
2302
2303        read_lock(&neigh_tbl_lock);
2304        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2305
2306        /* check for full ndmsg structure presence, family member is
2307         * the same for both structures
2308         */
2309        if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2310            ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2311                proxy = 1;
2312
2313        s_t = cb->args[0];
2314
2315        for (tbl = neigh_tables, t = 0; tbl;
2316             tbl = tbl->next, t++) {
2317                if (t < s_t || (family && tbl->family != family))
2318                        continue;
2319                if (t > s_t)
2320                        memset(&cb->args[1], 0, sizeof(cb->args) -
2321                                                sizeof(cb->args[0]));
2322                if (proxy)
2323                        err = pneigh_dump_table(tbl, skb, cb);
2324                else
2325                        err = neigh_dump_table(tbl, skb, cb);
2326                if (err < 0)
2327                        break;
2328        }
2329        read_unlock(&neigh_tbl_lock);
2330
2331        cb->args[0] = t;
2332        return skb->len;
2333}
2334
2335void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2336{
2337        int chain;
2338        struct neigh_hash_table *nht;
2339
2340        rcu_read_lock_bh();
2341        nht = rcu_dereference_bh(tbl->nht);
2342
2343        read_lock(&tbl->lock); /* avoid resizes */
2344        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2345                struct neighbour *n;
2346
2347                for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2348                     n != NULL;
2349                     n = rcu_dereference_bh(n->next))
2350                        cb(n, cookie);
2351        }
2352        read_unlock(&tbl->lock);
2353        rcu_read_unlock_bh();
2354}
2355EXPORT_SYMBOL(neigh_for_each);
2356
2357/* The tbl->lock must be held as a writer and BH disabled. */
2358void __neigh_for_each_release(struct neigh_table *tbl,
2359                              int (*cb)(struct neighbour *))
2360{
2361        int chain;
2362        struct neigh_hash_table *nht;
2363
2364        nht = rcu_dereference_protected(tbl->nht,
2365                                        lockdep_is_held(&tbl->lock));
2366        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2367                struct neighbour *n;
2368                struct neighbour __rcu **np;
2369
2370                np = &nht->hash_buckets[chain];
2371                while ((n = rcu_dereference_protected(*np,
2372                                        lockdep_is_held(&tbl->lock))) != NULL) {
2373                        int release;
2374
2375                        write_lock(&n->lock);
2376                        release = cb(n);
2377                        if (release) {
2378                                rcu_assign_pointer(*np,
2379                                        rcu_dereference_protected(n->next,
2380                                                lockdep_is_held(&tbl->lock)));
2381                                n->dead = 1;
2382                        } else
2383                                np = &n->next;
2384                        write_unlock(&n->lock);
2385                        if (release)
2386                                neigh_cleanup_and_release(n);
2387                }
2388        }
2389}
2390EXPORT_SYMBOL(__neigh_for_each_release);
2391
2392#ifdef CONFIG_PROC_FS
2393
2394static struct neighbour *neigh_get_first(struct seq_file *seq)
2395{
2396        struct neigh_seq_state *state = seq->private;
2397        struct net *net = seq_file_net(seq);
2398        struct neigh_hash_table *nht = state->nht;
2399        struct neighbour *n = NULL;
2400        int bucket = state->bucket;
2401
2402        state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2403        for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2404                n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2405
2406                while (n) {
2407                        if (!net_eq(dev_net(n->dev), net))
2408                                goto next;
2409                        if (state->neigh_sub_iter) {
2410                                loff_t fakep = 0;
2411                                void *v;
2412
2413                                v = state->neigh_sub_iter(state, n, &fakep);
2414                                if (!v)
2415                                        goto next;
2416                        }
2417                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2418                                break;
2419                        if (n->nud_state & ~NUD_NOARP)
2420                                break;
2421next:
2422                        n = rcu_dereference_bh(n->next);
2423                }
2424
2425                if (n)
2426                        break;
2427        }
2428        state->bucket = bucket;
2429
2430        return n;
2431}
2432
2433static struct neighbour *neigh_get_next(struct seq_file *seq,
2434                                        struct neighbour *n,
2435                                        loff_t *pos)
2436{
2437        struct neigh_seq_state *state = seq->private;
2438        struct net *net = seq_file_net(seq);
2439        struct neigh_hash_table *nht = state->nht;
2440
2441        if (state->neigh_sub_iter) {
2442                void *v = state->neigh_sub_iter(state, n, pos);
2443                if (v)
2444                        return n;
2445        }
2446        n = rcu_dereference_bh(n->next);
2447
2448        while (1) {
2449                while (n) {
2450                        if (!net_eq(dev_net(n->dev), net))
2451                                goto next;
2452                        if (state->neigh_sub_iter) {
2453                                void *v = state->neigh_sub_iter(state, n, pos);
2454                                if (v)
2455                                        return n;
2456                                goto next;
2457                        }
2458                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2459                                break;
2460
2461                        if (n->nud_state & ~NUD_NOARP)
2462                                break;
2463next:
2464                        n = rcu_dereference_bh(n->next);
2465                }
2466
2467                if (n)
2468                        break;
2469
2470                if (++state->bucket >= (1 << nht->hash_shift))
2471                        break;
2472
2473                n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2474        }
2475
2476        if (n && pos)
2477                --(*pos);
2478        return n;
2479}
2480
2481static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2482{
2483        struct neighbour *n = neigh_get_first(seq);
2484
2485        if (n) {
2486                --(*pos);
2487                while (*pos) {
2488                        n = neigh_get_next(seq, n, pos);
2489                        if (!n)
2490                                break;
2491                }
2492        }
2493        return *pos ? NULL : n;
2494}
2495
2496static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2497{
2498        struct neigh_seq_state *state = seq->private;
2499        struct net *net = seq_file_net(seq);
2500        struct neigh_table *tbl = state->tbl;
2501        struct pneigh_entry *pn = NULL;
2502        int bucket = state->bucket;
2503
2504        state->flags |= NEIGH_SEQ_IS_PNEIGH;
2505        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2506                pn = tbl->phash_buckets[bucket];
2507                while (pn && !net_eq(pneigh_net(pn), net))
2508                        pn = pn->next;
2509                if (pn)
2510                        break;
2511        }
2512        state->bucket = bucket;
2513
2514        return pn;
2515}
2516
2517static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2518                                            struct pneigh_entry *pn,
2519                                            loff_t *pos)
2520{
2521        struct neigh_seq_state *state = seq->private;
2522        struct net *net = seq_file_net(seq);
2523        struct neigh_table *tbl = state->tbl;
2524
2525        do {
2526                pn = pn->next;
2527        } while (pn && !net_eq(pneigh_net(pn), net));
2528
2529        while (!pn) {
2530                if (++state->bucket > PNEIGH_HASHMASK)
2531                        break;
2532                pn = tbl->phash_buckets[state->bucket];
2533                while (pn && !net_eq(pneigh_net(pn), net))
2534                        pn = pn->next;
2535                if (pn)
2536                        break;
2537        }
2538
2539        if (pn && pos)
2540                --(*pos);
2541
2542        return pn;
2543}
2544
2545static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2546{
2547        struct pneigh_entry *pn = pneigh_get_first(seq);
2548
2549        if (pn) {
2550                --(*pos);
2551                while (*pos) {
2552                        pn = pneigh_get_next(seq, pn, pos);
2553                        if (!pn)
2554                                break;
2555                }
2556        }
2557        return *pos ? NULL : pn;
2558}
2559
2560static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2561{
2562        struct neigh_seq_state *state = seq->private;
2563        void *rc;
2564        loff_t idxpos = *pos;
2565
2566        rc = neigh_get_idx(seq, &idxpos);
2567        if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2568                rc = pneigh_get_idx(seq, &idxpos);
2569
2570        return rc;
2571}
2572
2573void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2574        __acquires(rcu_bh)
2575{
2576        struct neigh_seq_state *state = seq->private;
2577
2578        state->tbl = tbl;
2579        state->bucket = 0;
2580        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2581
2582        rcu_read_lock_bh();
2583        state->nht = rcu_dereference_bh(tbl->nht);
2584
2585        return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2586}
2587EXPORT_SYMBOL(neigh_seq_start);
2588
2589void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2590{
2591        struct neigh_seq_state *state;
2592        void *rc;
2593
2594        if (v == SEQ_START_TOKEN) {
2595                rc = neigh_get_first(seq);
2596                goto out;
2597        }
2598
2599        state = seq->private;
2600        if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2601                rc = neigh_get_next(seq, v, NULL);
2602                if (rc)
2603                        goto out;
2604                if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2605                        rc = pneigh_get_first(seq);
2606        } else {
2607                BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2608                rc = pneigh_get_next(seq, v, NULL);
2609        }
2610out:
2611        ++(*pos);
2612        return rc;
2613}
2614EXPORT_SYMBOL(neigh_seq_next);
2615
2616void neigh_seq_stop(struct seq_file *seq, void *v)
2617        __releases(rcu_bh)
2618{
2619        rcu_read_unlock_bh();
2620}
2621EXPORT_SYMBOL(neigh_seq_stop);
2622
2623/* statistics via seq_file */
2624
2625static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2626{
2627        struct neigh_table *tbl = seq->private;
2628        int cpu;
2629
2630        if (*pos == 0)
2631                return SEQ_START_TOKEN;
2632
2633        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2634                if (!cpu_possible(cpu))
2635                        continue;
2636                *pos = cpu+1;
2637                return per_cpu_ptr(tbl->stats, cpu);
2638        }
2639        return NULL;
2640}
2641
2642static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2643{
2644        struct neigh_table *tbl = seq->private;
2645        int cpu;
2646
2647        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2648                if (!cpu_possible(cpu))
2649                        continue;
2650                *pos = cpu+1;
2651                return per_cpu_ptr(tbl->stats, cpu);
2652        }
2653        return NULL;
2654}
2655
2656static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2657{
2658
2659}
2660
2661static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2662{
2663        struct neigh_table *tbl = seq->private;
2664        struct neigh_statistics *st = v;
2665
2666        if (v == SEQ_START_TOKEN) {
2667                seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2668                return 0;
2669        }
2670
2671        seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2672                        "%08lx %08lx  %08lx %08lx %08lx\n",
2673                   atomic_read(&tbl->entries),
2674
2675                   st->allocs,
2676                   st->destroys,
2677                   st->hash_grows,
2678
2679                   st->lookups,
2680                   st->hits,
2681
2682                   st->res_failed,
2683
2684                   st->rcv_probes_mcast,
2685                   st->rcv_probes_ucast,
2686
2687                   st->periodic_gc_runs,
2688                   st->forced_gc_runs,
2689                   st->unres_discards
2690                   );
2691
2692        return 0;
2693}
2694
2695static const struct seq_operations neigh_stat_seq_ops = {
2696        .start  = neigh_stat_seq_start,
2697        .next   = neigh_stat_seq_next,
2698        .stop   = neigh_stat_seq_stop,
2699        .show   = neigh_stat_seq_show,
2700};
2701
2702static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2703{
2704        int ret = seq_open(file, &neigh_stat_seq_ops);
2705
2706        if (!ret) {
2707                struct seq_file *sf = file->private_data;
2708                sf->private = PDE_DATA(inode);
2709        }
2710        return ret;
2711};
2712
2713static const struct file_operations neigh_stat_seq_fops = {
2714        .owner   = THIS_MODULE,
2715        .open    = neigh_stat_seq_open,
2716        .read    = seq_read,
2717        .llseek  = seq_lseek,
2718        .release = seq_release,
2719};
2720
2721#endif /* CONFIG_PROC_FS */
2722
2723static inline size_t neigh_nlmsg_size(void)
2724{
2725        return NLMSG_ALIGN(sizeof(struct ndmsg))
2726               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2727               + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2728               + nla_total_size(sizeof(struct nda_cacheinfo))
2729               + nla_total_size(4); /* NDA_PROBES */
2730}
2731
2732static void __neigh_notify(struct neighbour *n, int type, int flags)
2733{
2734        struct net *net = dev_net(n->dev);
2735        struct sk_buff *skb;
2736        int err = -ENOBUFS;
2737
2738        skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2739        if (skb == NULL)
2740                goto errout;
2741
2742        err = neigh_fill_info(skb, n, 0, 0, type, flags);
2743        if (err < 0) {
2744                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2745                WARN_ON(err == -EMSGSIZE);
2746                kfree_skb(skb);
2747                goto errout;
2748        }
2749        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2750        return;
2751errout:
2752        if (err < 0)
2753                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2754}
2755
2756#ifdef CONFIG_ARPD
2757void neigh_app_ns(struct neighbour *n)
2758{
2759        __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2760}
2761EXPORT_SYMBOL(neigh_app_ns);
2762#endif /* CONFIG_ARPD */
2763
2764#ifdef CONFIG_SYSCTL
2765static int zero;
2766static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2767
2768static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2769                           size_t *lenp, loff_t *ppos)
2770{
2771        int size, ret;
2772        ctl_table tmp = *ctl;
2773
2774        tmp.extra1 = &zero;
2775        tmp.extra2 = &unres_qlen_max;
2776        tmp.data = &size;
2777
2778        size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2779        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2780
2781        if (write && !ret)
2782                *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2783        return ret;
2784}
2785
2786enum {
2787        NEIGH_VAR_MCAST_PROBE,
2788        NEIGH_VAR_UCAST_PROBE,
2789        NEIGH_VAR_APP_PROBE,
2790        NEIGH_VAR_RETRANS_TIME,
2791        NEIGH_VAR_BASE_REACHABLE_TIME,
2792        NEIGH_VAR_DELAY_PROBE_TIME,
2793        NEIGH_VAR_GC_STALETIME,
2794        NEIGH_VAR_QUEUE_LEN,
2795        NEIGH_VAR_QUEUE_LEN_BYTES,
2796        NEIGH_VAR_PROXY_QLEN,
2797        NEIGH_VAR_ANYCAST_DELAY,
2798        NEIGH_VAR_PROXY_DELAY,
2799        NEIGH_VAR_LOCKTIME,
2800        NEIGH_VAR_RETRANS_TIME_MS,
2801        NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2802        NEIGH_VAR_GC_INTERVAL,
2803        NEIGH_VAR_GC_THRESH1,
2804        NEIGH_VAR_GC_THRESH2,
2805        NEIGH_VAR_GC_THRESH3,
2806        NEIGH_VAR_MAX
2807};
2808
2809static struct neigh_sysctl_table {
2810        struct ctl_table_header *sysctl_header;
2811        struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2812} neigh_sysctl_template __read_mostly = {
2813        .neigh_vars = {
2814                [NEIGH_VAR_MCAST_PROBE] = {
2815                        .procname       = "mcast_solicit",
2816                        .maxlen         = sizeof(int),
2817                        .mode           = 0644,
2818                        .proc_handler   = proc_dointvec,
2819                },
2820                [NEIGH_VAR_UCAST_PROBE] = {
2821                        .procname       = "ucast_solicit",
2822                        .maxlen         = sizeof(int),
2823                        .mode           = 0644,
2824                        .proc_handler   = proc_dointvec,
2825                },
2826                [NEIGH_VAR_APP_PROBE] = {
2827                        .procname       = "app_solicit",
2828                        .maxlen         = sizeof(int),
2829                        .mode           = 0644,
2830                        .proc_handler   = proc_dointvec,
2831                },
2832                [NEIGH_VAR_RETRANS_TIME] = {
2833                        .procname       = "retrans_time",
2834                        .maxlen         = sizeof(int),
2835                        .mode           = 0644,
2836                        .proc_handler   = proc_dointvec_userhz_jiffies,
2837                },
2838                [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2839                        .procname       = "base_reachable_time",
2840                        .maxlen         = sizeof(int),
2841                        .mode           = 0644,
2842                        .proc_handler   = proc_dointvec_jiffies,
2843                },
2844                [NEIGH_VAR_DELAY_PROBE_TIME] = {
2845                        .procname       = "delay_first_probe_time",
2846                        .maxlen         = sizeof(int),
2847                        .mode           = 0644,
2848                        .proc_handler   = proc_dointvec_jiffies,
2849                },
2850                [NEIGH_VAR_GC_STALETIME] = {
2851                        .procname       = "gc_stale_time",
2852                        .maxlen         = sizeof(int),
2853                        .mode           = 0644,
2854                        .proc_handler   = proc_dointvec_jiffies,
2855                },
2856                [NEIGH_VAR_QUEUE_LEN] = {
2857                        .procname       = "unres_qlen",
2858                        .maxlen         = sizeof(int),
2859                        .mode           = 0644,
2860                        .proc_handler   = proc_unres_qlen,
2861                },
2862                [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2863                        .procname       = "unres_qlen_bytes",
2864                        .maxlen         = sizeof(int),
2865                        .mode           = 0644,
2866                        .extra1         = &zero,
2867                        .proc_handler   = proc_dointvec_minmax,
2868                },
2869                [NEIGH_VAR_PROXY_QLEN] = {
2870                        .procname       = "proxy_qlen",
2871                        .maxlen         = sizeof(int),
2872                        .mode           = 0644,
2873                        .proc_handler   = proc_dointvec,
2874                },
2875                [NEIGH_VAR_ANYCAST_DELAY] = {
2876                        .procname       = "anycast_delay",
2877                        .maxlen         = sizeof(int),
2878                        .mode           = 0644,
2879                        .proc_handler   = proc_dointvec_userhz_jiffies,
2880                },
2881                [NEIGH_VAR_PROXY_DELAY] = {
2882                        .procname       = "proxy_delay",
2883                        .maxlen         = sizeof(int),
2884                        .mode           = 0644,
2885                        .proc_handler   = proc_dointvec_userhz_jiffies,
2886                },
2887                [NEIGH_VAR_LOCKTIME] = {
2888                        .procname       = "locktime",
2889                        .maxlen         = sizeof(int),
2890                        .mode           = 0644,
2891                        .proc_handler   = proc_dointvec_userhz_jiffies,
2892                },
2893                [NEIGH_VAR_RETRANS_TIME_MS] = {
2894                        .procname       = "retrans_time_ms",
2895                        .maxlen         = sizeof(int),
2896                        .mode           = 0644,
2897                        .proc_handler   = proc_dointvec_ms_jiffies,
2898                },
2899                [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2900                        .procname       = "base_reachable_time_ms",
2901                        .maxlen         = sizeof(int),
2902                        .mode           = 0644,
2903                        .proc_handler   = proc_dointvec_ms_jiffies,
2904                },
2905                [NEIGH_VAR_GC_INTERVAL] = {
2906                        .procname       = "gc_interval",
2907                        .maxlen         = sizeof(int),
2908                        .mode           = 0644,
2909                        .proc_handler   = proc_dointvec_jiffies,
2910                },
2911                [NEIGH_VAR_GC_THRESH1] = {
2912                        .procname       = "gc_thresh1",
2913                        .maxlen         = sizeof(int),
2914                        .mode           = 0644,
2915                        .proc_handler   = proc_dointvec,
2916                },
2917                [NEIGH_VAR_GC_THRESH2] = {
2918                        .procname       = "gc_thresh2",
2919                        .maxlen         = sizeof(int),
2920                        .mode           = 0644,
2921                        .proc_handler   = proc_dointvec,
2922                },
2923                [NEIGH_VAR_GC_THRESH3] = {
2924                        .procname       = "gc_thresh3",
2925                        .maxlen         = sizeof(int),
2926                        .mode           = 0644,
2927                        .proc_handler   = proc_dointvec,
2928                },
2929                {},
2930        },
2931};
2932
2933int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2934                          char *p_name, proc_handler *handler)
2935{
2936        struct neigh_sysctl_table *t;
2937        const char *dev_name_source = NULL;
2938        char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2939
2940        t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2941        if (!t)
2942                goto err;
2943
2944        t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2945        t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2946        t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2947        t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2948        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2949        t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2950        t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2951        t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2952        t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2953        t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2954        t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2955        t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2956        t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2957        t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2958        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2959
2960        if (dev) {
2961                dev_name_source = dev->name;
2962                /* Terminate the table early */
2963                memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2964                       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2965        } else {
2966                dev_name_source = "default";
2967                t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2968                t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2969                t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2970                t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2971        }
2972
2973
2974        if (handler) {
2975                /* RetransTime */
2976                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2977                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2978                /* ReachableTime */
2979                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2980                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2981                /* RetransTime (in milliseconds)*/
2982                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2983                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2984                /* ReachableTime (in milliseconds) */
2985                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2986                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2987        }
2988
2989        /* Don't export sysctls to unprivileged users */
2990        if (neigh_parms_net(p)->user_ns != &init_user_ns)
2991                t->neigh_vars[0].procname = NULL;
2992
2993        snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2994                p_name, dev_name_source);
2995        t->sysctl_header =
2996                register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2997        if (!t->sysctl_header)
2998                goto free;
2999
3000        p->sysctl_table = t;
3001        return 0;
3002
3003free:
3004        kfree(t);
3005err:
3006        return -ENOBUFS;
3007}
3008EXPORT_SYMBOL(neigh_sysctl_register);
3009
3010void neigh_sysctl_unregister(struct neigh_parms *p)
3011{
3012        if (p->sysctl_table) {
3013                struct neigh_sysctl_table *t = p->sysctl_table;
3014                p->sysctl_table = NULL;
3015                unregister_net_sysctl_table(t->sysctl_header);
3016                kfree(t);
3017        }
3018}
3019EXPORT_SYMBOL(neigh_sysctl_unregister);
3020
3021#endif  /* CONFIG_SYSCTL */
3022
3023static int __init neigh_init(void)
3024{
3025        rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3026        rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3027        rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3028
3029        rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3030                      NULL);
3031        rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3032
3033        return 0;
3034}
3035
3036subsys_initcall(neigh_init);
3037
3038