linux/net/core/neighbour.c
<<
>>
Prefs
   1/*
   2 *      Generic address resolution entity
   3 *
   4 *      Authors:
   5 *      Pedro Roque             <roque@di.fc.ul.pt>
   6 *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 *      Fixes:
  14 *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
  15 *      Harald Welte            Add neighbour cache statistics like rtstat
  16 */
  17
  18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19
  20#include <linux/slab.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/socket.h>
  25#include <linux/netdevice.h>
  26#include <linux/proc_fs.h>
  27#ifdef CONFIG_SYSCTL
  28#include <linux/sysctl.h>
  29#endif
  30#include <linux/times.h>
  31#include <net/net_namespace.h>
  32#include <net/neighbour.h>
  33#include <net/dst.h>
  34#include <net/sock.h>
  35#include <net/netevent.h>
  36#include <net/netlink.h>
  37#include <linux/rtnetlink.h>
  38#include <linux/random.h>
  39#include <linux/string.h>
  40#include <linux/log2.h>
  41
  42#define NEIGH_DEBUG 1
  43
  44#define NEIGH_PRINTK(x...) printk(x)
  45#define NEIGH_NOPRINTK(x...) do { ; } while(0)
  46#define NEIGH_PRINTK1 NEIGH_NOPRINTK
  47#define NEIGH_PRINTK2 NEIGH_NOPRINTK
  48
  49#if NEIGH_DEBUG >= 1
  50#undef NEIGH_PRINTK1
  51#define NEIGH_PRINTK1 NEIGH_PRINTK
  52#endif
  53#if NEIGH_DEBUG >= 2
  54#undef NEIGH_PRINTK2
  55#define NEIGH_PRINTK2 NEIGH_PRINTK
  56#endif
  57
  58#define PNEIGH_HASHMASK         0xF
  59
  60static void neigh_timer_handler(unsigned long arg);
  61static void __neigh_notify(struct neighbour *n, int type, int flags);
  62static void neigh_update_notify(struct neighbour *neigh);
  63static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
  64
  65static struct neigh_table *neigh_tables;
  66#ifdef CONFIG_PROC_FS
  67static const struct file_operations neigh_stat_seq_fops;
  68#endif
  69
  70/*
  71   Neighbour hash table buckets are protected with rwlock tbl->lock.
  72
  73   - All the scans/updates to hash buckets MUST be made under this lock.
  74   - NOTHING clever should be made under this lock: no callbacks
  75     to protocol backends, no attempts to send something to network.
  76     It will result in deadlocks, if backend/driver wants to use neighbour
  77     cache.
  78   - If the entry requires some non-trivial actions, increase
  79     its reference count and release table lock.
  80
  81   Neighbour entries are protected:
  82   - with reference count.
  83   - with rwlock neigh->lock
  84
  85   Reference count prevents destruction.
  86
  87   neigh->lock mainly serializes ll address data and its validity state.
  88   However, the same lock is used to protect another entry fields:
  89    - timer
  90    - resolution queue
  91
  92   Again, nothing clever shall be made under neigh->lock,
  93   the most complicated procedure, which we allow is dev->hard_header.
  94   It is supposed, that dev->hard_header is simplistic and does
  95   not make callbacks to neighbour tables.
  96
  97   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
  98   list of neighbour tables. This list is used only in process context,
  99 */
 100
 101static DEFINE_RWLOCK(neigh_tbl_lock);
 102
 103static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
 104{
 105        kfree_skb(skb);
 106        return -ENETDOWN;
 107}
 108
 109static void neigh_cleanup_and_release(struct neighbour *neigh)
 110{
 111        if (neigh->parms->neigh_cleanup)
 112                neigh->parms->neigh_cleanup(neigh);
 113
 114        __neigh_notify(neigh, RTM_DELNEIGH, 0);
 115        neigh_release(neigh);
 116}
 117
 118/*
 119 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 120 * It corresponds to default IPv6 settings and is not overridable,
 121 * because it is really reasonable choice.
 122 */
 123
 124unsigned long neigh_rand_reach_time(unsigned long base)
 125{
 126        return base ? (net_random() % base) + (base >> 1) : 0;
 127}
 128EXPORT_SYMBOL(neigh_rand_reach_time);
 129
 130
 131static int neigh_forced_gc(struct neigh_table *tbl)
 132{
 133        int shrunk = 0;
 134        int i;
 135        struct neigh_hash_table *nht;
 136
 137        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 138
 139        write_lock_bh(&tbl->lock);
 140        nht = rcu_dereference_protected(tbl->nht,
 141                                        lockdep_is_held(&tbl->lock));
 142        for (i = 0; i < (1 << nht->hash_shift); i++) {
 143                struct neighbour *n;
 144                struct neighbour __rcu **np;
 145
 146                np = &nht->hash_buckets[i];
 147                while ((n = rcu_dereference_protected(*np,
 148                                        lockdep_is_held(&tbl->lock))) != NULL) {
 149                        /* Neighbour record may be discarded if:
 150                         * - nobody refers to it.
 151                         * - it is not permanent
 152                         */
 153                        write_lock(&n->lock);
 154                        if (atomic_read(&n->refcnt) == 1 &&
 155                            !(n->nud_state & NUD_PERMANENT)) {
 156                                rcu_assign_pointer(*np,
 157                                        rcu_dereference_protected(n->next,
 158                                                  lockdep_is_held(&tbl->lock)));
 159                                n->dead = 1;
 160                                shrunk  = 1;
 161                                write_unlock(&n->lock);
 162                                neigh_cleanup_and_release(n);
 163                                continue;
 164                        }
 165                        write_unlock(&n->lock);
 166                        np = &n->next;
 167                }
 168        }
 169
 170        tbl->last_flush = jiffies;
 171
 172        write_unlock_bh(&tbl->lock);
 173
 174        return shrunk;
 175}
 176
 177static void neigh_add_timer(struct neighbour *n, unsigned long when)
 178{
 179        neigh_hold(n);
 180        if (unlikely(mod_timer(&n->timer, when))) {
 181                printk("NEIGH: BUG, double timer add, state is %x\n",
 182                       n->nud_state);
 183                dump_stack();
 184        }
 185}
 186
 187static int neigh_del_timer(struct neighbour *n)
 188{
 189        if ((n->nud_state & NUD_IN_TIMER) &&
 190            del_timer(&n->timer)) {
 191                neigh_release(n);
 192                return 1;
 193        }
 194        return 0;
 195}
 196
 197static void pneigh_queue_purge(struct sk_buff_head *list)
 198{
 199        struct sk_buff *skb;
 200
 201        while ((skb = skb_dequeue(list)) != NULL) {
 202                dev_put(skb->dev);
 203                kfree_skb(skb);
 204        }
 205}
 206
 207static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 208{
 209        int i;
 210        struct neigh_hash_table *nht;
 211
 212        nht = rcu_dereference_protected(tbl->nht,
 213                                        lockdep_is_held(&tbl->lock));
 214
 215        for (i = 0; i < (1 << nht->hash_shift); i++) {
 216                struct neighbour *n;
 217                struct neighbour __rcu **np = &nht->hash_buckets[i];
 218
 219                while ((n = rcu_dereference_protected(*np,
 220                                        lockdep_is_held(&tbl->lock))) != NULL) {
 221                        if (dev && n->dev != dev) {
 222                                np = &n->next;
 223                                continue;
 224                        }
 225                        rcu_assign_pointer(*np,
 226                                   rcu_dereference_protected(n->next,
 227                                                lockdep_is_held(&tbl->lock)));
 228                        write_lock(&n->lock);
 229                        neigh_del_timer(n);
 230                        n->dead = 1;
 231
 232                        if (atomic_read(&n->refcnt) != 1) {
 233                                /* The most unpleasant situation.
 234                                   We must destroy neighbour entry,
 235                                   but someone still uses it.
 236
 237                                   The destroy will be delayed until
 238                                   the last user releases us, but
 239                                   we must kill timers etc. and move
 240                                   it to safe state.
 241                                 */
 242                                skb_queue_purge(&n->arp_queue);
 243                                n->arp_queue_len_bytes = 0;
 244                                n->output = neigh_blackhole;
 245                                if (n->nud_state & NUD_VALID)
 246                                        n->nud_state = NUD_NOARP;
 247                                else
 248                                        n->nud_state = NUD_NONE;
 249                                NEIGH_PRINTK2("neigh %p is stray.\n", n);
 250                        }
 251                        write_unlock(&n->lock);
 252                        neigh_cleanup_and_release(n);
 253                }
 254        }
 255}
 256
 257void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 258{
 259        write_lock_bh(&tbl->lock);
 260        neigh_flush_dev(tbl, dev);
 261        write_unlock_bh(&tbl->lock);
 262}
 263EXPORT_SYMBOL(neigh_changeaddr);
 264
 265int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 266{
 267        write_lock_bh(&tbl->lock);
 268        neigh_flush_dev(tbl, dev);
 269        pneigh_ifdown(tbl, dev);
 270        write_unlock_bh(&tbl->lock);
 271
 272        del_timer_sync(&tbl->proxy_timer);
 273        pneigh_queue_purge(&tbl->proxy_queue);
 274        return 0;
 275}
 276EXPORT_SYMBOL(neigh_ifdown);
 277
 278static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 279{
 280        struct neighbour *n = NULL;
 281        unsigned long now = jiffies;
 282        int entries;
 283
 284        entries = atomic_inc_return(&tbl->entries) - 1;
 285        if (entries >= tbl->gc_thresh3 ||
 286            (entries >= tbl->gc_thresh2 &&
 287             time_after(now, tbl->last_flush + 5 * HZ))) {
 288                if (!neigh_forced_gc(tbl) &&
 289                    entries >= tbl->gc_thresh3)
 290                        goto out_entries;
 291        }
 292
 293        if (tbl->entry_size)
 294                n = kzalloc(tbl->entry_size, GFP_ATOMIC);
 295        else {
 296                int sz = sizeof(*n) + tbl->key_len;
 297
 298                sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
 299                sz += dev->neigh_priv_len;
 300                n = kzalloc(sz, GFP_ATOMIC);
 301        }
 302        if (!n)
 303                goto out_entries;
 304
 305        skb_queue_head_init(&n->arp_queue);
 306        rwlock_init(&n->lock);
 307        seqlock_init(&n->ha_lock);
 308        n->updated        = n->used = now;
 309        n->nud_state      = NUD_NONE;
 310        n->output         = neigh_blackhole;
 311        seqlock_init(&n->hh.hh_lock);
 312        n->parms          = neigh_parms_clone(&tbl->parms);
 313        setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
 314
 315        NEIGH_CACHE_STAT_INC(tbl, allocs);
 316        n->tbl            = tbl;
 317        atomic_set(&n->refcnt, 1);
 318        n->dead           = 1;
 319out:
 320        return n;
 321
 322out_entries:
 323        atomic_dec(&tbl->entries);
 324        goto out;
 325}
 326
 327static void neigh_get_hash_rnd(u32 *x)
 328{
 329        get_random_bytes(x, sizeof(*x));
 330        *x |= 1;
 331}
 332
 333static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 334{
 335        size_t size = (1 << shift) * sizeof(struct neighbour *);
 336        struct neigh_hash_table *ret;
 337        struct neighbour __rcu **buckets;
 338        int i;
 339
 340        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 341        if (!ret)
 342                return NULL;
 343        if (size <= PAGE_SIZE)
 344                buckets = kzalloc(size, GFP_ATOMIC);
 345        else
 346                buckets = (struct neighbour __rcu **)
 347                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 348                                           get_order(size));
 349        if (!buckets) {
 350                kfree(ret);
 351                return NULL;
 352        }
 353        ret->hash_buckets = buckets;
 354        ret->hash_shift = shift;
 355        for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
 356                neigh_get_hash_rnd(&ret->hash_rnd[i]);
 357        return ret;
 358}
 359
 360static void neigh_hash_free_rcu(struct rcu_head *head)
 361{
 362        struct neigh_hash_table *nht = container_of(head,
 363                                                    struct neigh_hash_table,
 364                                                    rcu);
 365        size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 366        struct neighbour __rcu **buckets = nht->hash_buckets;
 367
 368        if (size <= PAGE_SIZE)
 369                kfree(buckets);
 370        else
 371                free_pages((unsigned long)buckets, get_order(size));
 372        kfree(nht);
 373}
 374
 375static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 376                                                unsigned long new_shift)
 377{
 378        unsigned int i, hash;
 379        struct neigh_hash_table *new_nht, *old_nht;
 380
 381        NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 382
 383        old_nht = rcu_dereference_protected(tbl->nht,
 384                                            lockdep_is_held(&tbl->lock));
 385        new_nht = neigh_hash_alloc(new_shift);
 386        if (!new_nht)
 387                return old_nht;
 388
 389        for (i = 0; i < (1 << old_nht->hash_shift); i++) {
 390                struct neighbour *n, *next;
 391
 392                for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
 393                                                   lockdep_is_held(&tbl->lock));
 394                     n != NULL;
 395                     n = next) {
 396                        hash = tbl->hash(n->primary_key, n->dev,
 397                                         new_nht->hash_rnd);
 398
 399                        hash >>= (32 - new_nht->hash_shift);
 400                        next = rcu_dereference_protected(n->next,
 401                                                lockdep_is_held(&tbl->lock));
 402
 403                        rcu_assign_pointer(n->next,
 404                                           rcu_dereference_protected(
 405                                                new_nht->hash_buckets[hash],
 406                                                lockdep_is_held(&tbl->lock)));
 407                        rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 408                }
 409        }
 410
 411        rcu_assign_pointer(tbl->nht, new_nht);
 412        call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
 413        return new_nht;
 414}
 415
 416struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 417                               struct net_device *dev)
 418{
 419        struct neighbour *n;
 420        int key_len = tbl->key_len;
 421        u32 hash_val;
 422        struct neigh_hash_table *nht;
 423
 424        NEIGH_CACHE_STAT_INC(tbl, lookups);
 425
 426        rcu_read_lock_bh();
 427        nht = rcu_dereference_bh(tbl->nht);
 428        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 429
 430        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 431             n != NULL;
 432             n = rcu_dereference_bh(n->next)) {
 433                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
 434                        if (!atomic_inc_not_zero(&n->refcnt))
 435                                n = NULL;
 436                        NEIGH_CACHE_STAT_INC(tbl, hits);
 437                        break;
 438                }
 439        }
 440
 441        rcu_read_unlock_bh();
 442        return n;
 443}
 444EXPORT_SYMBOL(neigh_lookup);
 445
 446struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 447                                     const void *pkey)
 448{
 449        struct neighbour *n;
 450        int key_len = tbl->key_len;
 451        u32 hash_val;
 452        struct neigh_hash_table *nht;
 453
 454        NEIGH_CACHE_STAT_INC(tbl, lookups);
 455
 456        rcu_read_lock_bh();
 457        nht = rcu_dereference_bh(tbl->nht);
 458        hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
 459
 460        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 461             n != NULL;
 462             n = rcu_dereference_bh(n->next)) {
 463                if (!memcmp(n->primary_key, pkey, key_len) &&
 464                    net_eq(dev_net(n->dev), net)) {
 465                        if (!atomic_inc_not_zero(&n->refcnt))
 466                                n = NULL;
 467                        NEIGH_CACHE_STAT_INC(tbl, hits);
 468                        break;
 469                }
 470        }
 471
 472        rcu_read_unlock_bh();
 473        return n;
 474}
 475EXPORT_SYMBOL(neigh_lookup_nodev);
 476
 477struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 478                                 struct net_device *dev, bool want_ref)
 479{
 480        u32 hash_val;
 481        int key_len = tbl->key_len;
 482        int error;
 483        struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 484        struct neigh_hash_table *nht;
 485
 486        if (!n) {
 487                rc = ERR_PTR(-ENOBUFS);
 488                goto out;
 489        }
 490
 491        memcpy(n->primary_key, pkey, key_len);
 492        n->dev = dev;
 493        dev_hold(dev);
 494
 495        /* Protocol specific setup. */
 496        if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
 497                rc = ERR_PTR(error);
 498                goto out_neigh_release;
 499        }
 500
 501        if (dev->netdev_ops->ndo_neigh_construct) {
 502                error = dev->netdev_ops->ndo_neigh_construct(n);
 503                if (error < 0) {
 504                        rc = ERR_PTR(error);
 505                        goto out_neigh_release;
 506                }
 507        }
 508
 509        /* Device specific setup. */
 510        if (n->parms->neigh_setup &&
 511            (error = n->parms->neigh_setup(n)) < 0) {
 512                rc = ERR_PTR(error);
 513                goto out_neigh_release;
 514        }
 515
 516        n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 517
 518        write_lock_bh(&tbl->lock);
 519        nht = rcu_dereference_protected(tbl->nht,
 520                                        lockdep_is_held(&tbl->lock));
 521
 522        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
 523                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 524
 525        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 526
 527        if (n->parms->dead) {
 528                rc = ERR_PTR(-EINVAL);
 529                goto out_tbl_unlock;
 530        }
 531
 532        for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
 533                                            lockdep_is_held(&tbl->lock));
 534             n1 != NULL;
 535             n1 = rcu_dereference_protected(n1->next,
 536                        lockdep_is_held(&tbl->lock))) {
 537                if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 538                        if (want_ref)
 539                                neigh_hold(n1);
 540                        rc = n1;
 541                        goto out_tbl_unlock;
 542                }
 543        }
 544
 545        n->dead = 0;
 546        if (want_ref)
 547                neigh_hold(n);
 548        rcu_assign_pointer(n->next,
 549                           rcu_dereference_protected(nht->hash_buckets[hash_val],
 550                                                     lockdep_is_held(&tbl->lock)));
 551        rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 552        write_unlock_bh(&tbl->lock);
 553        NEIGH_PRINTK2("neigh %p is created.\n", n);
 554        rc = n;
 555out:
 556        return rc;
 557out_tbl_unlock:
 558        write_unlock_bh(&tbl->lock);
 559out_neigh_release:
 560        neigh_release(n);
 561        goto out;
 562}
 563EXPORT_SYMBOL(__neigh_create);
 564
 565static u32 pneigh_hash(const void *pkey, int key_len)
 566{
 567        u32 hash_val = *(u32 *)(pkey + key_len - 4);
 568        hash_val ^= (hash_val >> 16);
 569        hash_val ^= hash_val >> 8;
 570        hash_val ^= hash_val >> 4;
 571        hash_val &= PNEIGH_HASHMASK;
 572        return hash_val;
 573}
 574
 575static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 576                                              struct net *net,
 577                                              const void *pkey,
 578                                              int key_len,
 579                                              struct net_device *dev)
 580{
 581        while (n) {
 582                if (!memcmp(n->key, pkey, key_len) &&
 583                    net_eq(pneigh_net(n), net) &&
 584                    (n->dev == dev || !n->dev))
 585                        return n;
 586                n = n->next;
 587        }
 588        return NULL;
 589}
 590
 591struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
 592                struct net *net, const void *pkey, struct net_device *dev)
 593{
 594        int key_len = tbl->key_len;
 595        u32 hash_val = pneigh_hash(pkey, key_len);
 596
 597        return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 598                                 net, pkey, key_len, dev);
 599}
 600EXPORT_SYMBOL_GPL(__pneigh_lookup);
 601
 602struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 603                                    struct net *net, const void *pkey,
 604                                    struct net_device *dev, int creat)
 605{
 606        struct pneigh_entry *n;
 607        int key_len = tbl->key_len;
 608        u32 hash_val = pneigh_hash(pkey, key_len);
 609
 610        read_lock_bh(&tbl->lock);
 611        n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 612                              net, pkey, key_len, dev);
 613        read_unlock_bh(&tbl->lock);
 614
 615        if (n || !creat)
 616                goto out;
 617
 618        ASSERT_RTNL();
 619
 620        n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 621        if (!n)
 622                goto out;
 623
 624        write_pnet(&n->net, hold_net(net));
 625        memcpy(n->key, pkey, key_len);
 626        n->dev = dev;
 627        if (dev)
 628                dev_hold(dev);
 629
 630        if (tbl->pconstructor && tbl->pconstructor(n)) {
 631                if (dev)
 632                        dev_put(dev);
 633                release_net(net);
 634                kfree(n);
 635                n = NULL;
 636                goto out;
 637        }
 638
 639        write_lock_bh(&tbl->lock);
 640        n->next = tbl->phash_buckets[hash_val];
 641        tbl->phash_buckets[hash_val] = n;
 642        write_unlock_bh(&tbl->lock);
 643out:
 644        return n;
 645}
 646EXPORT_SYMBOL(pneigh_lookup);
 647
 648
 649int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 650                  struct net_device *dev)
 651{
 652        struct pneigh_entry *n, **np;
 653        int key_len = tbl->key_len;
 654        u32 hash_val = pneigh_hash(pkey, key_len);
 655
 656        write_lock_bh(&tbl->lock);
 657        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 658             np = &n->next) {
 659                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
 660                    net_eq(pneigh_net(n), net)) {
 661                        *np = n->next;
 662                        write_unlock_bh(&tbl->lock);
 663                        if (tbl->pdestructor)
 664                                tbl->pdestructor(n);
 665                        if (n->dev)
 666                                dev_put(n->dev);
 667                        release_net(pneigh_net(n));
 668                        kfree(n);
 669                        return 0;
 670                }
 671        }
 672        write_unlock_bh(&tbl->lock);
 673        return -ENOENT;
 674}
 675
 676static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 677{
 678        struct pneigh_entry *n, **np;
 679        u32 h;
 680
 681        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
 682                np = &tbl->phash_buckets[h];
 683                while ((n = *np) != NULL) {
 684                        if (!dev || n->dev == dev) {
 685                                *np = n->next;
 686                                if (tbl->pdestructor)
 687                                        tbl->pdestructor(n);
 688                                if (n->dev)
 689                                        dev_put(n->dev);
 690                                release_net(pneigh_net(n));
 691                                kfree(n);
 692                                continue;
 693                        }
 694                        np = &n->next;
 695                }
 696        }
 697        return -ENOENT;
 698}
 699
 700static void neigh_parms_destroy(struct neigh_parms *parms);
 701
 702static inline void neigh_parms_put(struct neigh_parms *parms)
 703{
 704        if (atomic_dec_and_test(&parms->refcnt))
 705                neigh_parms_destroy(parms);
 706}
 707
 708/*
 709 *      neighbour must already be out of the table;
 710 *
 711 */
 712void neigh_destroy(struct neighbour *neigh)
 713{
 714        struct net_device *dev = neigh->dev;
 715
 716        NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 717
 718        if (!neigh->dead) {
 719                pr_warn("Destroying alive neighbour %p\n", neigh);
 720                dump_stack();
 721                return;
 722        }
 723
 724        if (neigh_del_timer(neigh))
 725                pr_warn("Impossible event\n");
 726
 727        skb_queue_purge(&neigh->arp_queue);
 728        neigh->arp_queue_len_bytes = 0;
 729
 730        if (dev->netdev_ops->ndo_neigh_destroy)
 731                dev->netdev_ops->ndo_neigh_destroy(neigh);
 732
 733        dev_put(dev);
 734        neigh_parms_put(neigh->parms);
 735
 736        NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 737
 738        atomic_dec(&neigh->tbl->entries);
 739        kfree_rcu(neigh, rcu);
 740}
 741EXPORT_SYMBOL(neigh_destroy);
 742
 743/* Neighbour state is suspicious;
 744   disable fast path.
 745
 746   Called with write_locked neigh.
 747 */
 748static void neigh_suspect(struct neighbour *neigh)
 749{
 750        NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
 751
 752        neigh->output = neigh->ops->output;
 753}
 754
 755/* Neighbour state is OK;
 756   enable fast path.
 757
 758   Called with write_locked neigh.
 759 */
 760static void neigh_connect(struct neighbour *neigh)
 761{
 762        NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
 763
 764        neigh->output = neigh->ops->connected_output;
 765}
 766
 767static void neigh_periodic_work(struct work_struct *work)
 768{
 769        struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 770        struct neighbour *n;
 771        struct neighbour __rcu **np;
 772        unsigned int i;
 773        struct neigh_hash_table *nht;
 774
 775        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 776
 777        write_lock_bh(&tbl->lock);
 778        nht = rcu_dereference_protected(tbl->nht,
 779                                        lockdep_is_held(&tbl->lock));
 780
 781        /*
 782         *      periodically recompute ReachableTime from random function
 783         */
 784
 785        if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 786                struct neigh_parms *p;
 787                tbl->last_rand = jiffies;
 788                for (p = &tbl->parms; p; p = p->next)
 789                        p->reachable_time =
 790                                neigh_rand_reach_time(p->base_reachable_time);
 791        }
 792
 793        for (i = 0 ; i < (1 << nht->hash_shift); i++) {
 794                np = &nht->hash_buckets[i];
 795
 796                while ((n = rcu_dereference_protected(*np,
 797                                lockdep_is_held(&tbl->lock))) != NULL) {
 798                        unsigned int state;
 799
 800                        write_lock(&n->lock);
 801
 802                        state = n->nud_state;
 803                        if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
 804                                write_unlock(&n->lock);
 805                                goto next_elt;
 806                        }
 807
 808                        if (time_before(n->used, n->confirmed))
 809                                n->used = n->confirmed;
 810
 811                        if (atomic_read(&n->refcnt) == 1 &&
 812                            (state == NUD_FAILED ||
 813                             time_after(jiffies, n->used + n->parms->gc_staletime))) {
 814                                *np = n->next;
 815                                n->dead = 1;
 816                                write_unlock(&n->lock);
 817                                neigh_cleanup_and_release(n);
 818                                continue;
 819                        }
 820                        write_unlock(&n->lock);
 821
 822next_elt:
 823                        np = &n->next;
 824                }
 825                /*
 826                 * It's fine to release lock here, even if hash table
 827                 * grows while we are preempted.
 828                 */
 829                write_unlock_bh(&tbl->lock);
 830                cond_resched();
 831                write_lock_bh(&tbl->lock);
 832                nht = rcu_dereference_protected(tbl->nht,
 833                                                lockdep_is_held(&tbl->lock));
 834        }
 835        /* Cycle through all hash buckets every base_reachable_time/2 ticks.
 836         * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 837         * base_reachable_time.
 838         */
 839        schedule_delayed_work(&tbl->gc_work,
 840                              tbl->parms.base_reachable_time >> 1);
 841        write_unlock_bh(&tbl->lock);
 842}
 843
 844static __inline__ int neigh_max_probes(struct neighbour *n)
 845{
 846        struct neigh_parms *p = n->parms;
 847        return (n->nud_state & NUD_PROBE) ?
 848                p->ucast_probes :
 849                p->ucast_probes + p->app_probes + p->mcast_probes;
 850}
 851
 852static void neigh_invalidate(struct neighbour *neigh)
 853        __releases(neigh->lock)
 854        __acquires(neigh->lock)
 855{
 856        struct sk_buff *skb;
 857
 858        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
 859        NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
 860        neigh->updated = jiffies;
 861
 862        /* It is very thin place. report_unreachable is very complicated
 863           routine. Particularly, it can hit the same neighbour entry!
 864
 865           So that, we try to be accurate and avoid dead loop. --ANK
 866         */
 867        while (neigh->nud_state == NUD_FAILED &&
 868               (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
 869                write_unlock(&neigh->lock);
 870                neigh->ops->error_report(neigh, skb);
 871                write_lock(&neigh->lock);
 872        }
 873        skb_queue_purge(&neigh->arp_queue);
 874        neigh->arp_queue_len_bytes = 0;
 875}
 876
 877static void neigh_probe(struct neighbour *neigh)
 878        __releases(neigh->lock)
 879{
 880        struct sk_buff *skb = skb_peek(&neigh->arp_queue);
 881        /* keep skb alive even if arp_queue overflows */
 882        if (skb)
 883                skb = skb_copy(skb, GFP_ATOMIC);
 884        write_unlock(&neigh->lock);
 885        neigh->ops->solicit(neigh, skb);
 886        atomic_inc(&neigh->probes);
 887        kfree_skb(skb);
 888}
 889
 890/* Called when a timer expires for a neighbour entry. */
 891
 892static void neigh_timer_handler(unsigned long arg)
 893{
 894        unsigned long now, next;
 895        struct neighbour *neigh = (struct neighbour *)arg;
 896        unsigned int state;
 897        int notify = 0;
 898
 899        write_lock(&neigh->lock);
 900
 901        state = neigh->nud_state;
 902        now = jiffies;
 903        next = now + HZ;
 904
 905        if (!(state & NUD_IN_TIMER))
 906                goto out;
 907
 908        if (state & NUD_REACHABLE) {
 909                if (time_before_eq(now,
 910                                   neigh->confirmed + neigh->parms->reachable_time)) {
 911                        NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
 912                        next = neigh->confirmed + neigh->parms->reachable_time;
 913                } else if (time_before_eq(now,
 914                                          neigh->used + neigh->parms->delay_probe_time)) {
 915                        NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
 916                        neigh->nud_state = NUD_DELAY;
 917                        neigh->updated = jiffies;
 918                        neigh_suspect(neigh);
 919                        next = now + neigh->parms->delay_probe_time;
 920                } else {
 921                        NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
 922                        neigh->nud_state = NUD_STALE;
 923                        neigh->updated = jiffies;
 924                        neigh_suspect(neigh);
 925                        notify = 1;
 926                }
 927        } else if (state & NUD_DELAY) {
 928                if (time_before_eq(now,
 929                                   neigh->confirmed + neigh->parms->delay_probe_time)) {
 930                        NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
 931                        neigh->nud_state = NUD_REACHABLE;
 932                        neigh->updated = jiffies;
 933                        neigh_connect(neigh);
 934                        notify = 1;
 935                        next = neigh->confirmed + neigh->parms->reachable_time;
 936                } else {
 937                        NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
 938                        neigh->nud_state = NUD_PROBE;
 939                        neigh->updated = jiffies;
 940                        atomic_set(&neigh->probes, 0);
 941                        next = now + neigh->parms->retrans_time;
 942                }
 943        } else {
 944                /* NUD_PROBE|NUD_INCOMPLETE */
 945                next = now + neigh->parms->retrans_time;
 946        }
 947
 948        if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
 949            atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
 950                neigh->nud_state = NUD_FAILED;
 951                notify = 1;
 952                neigh_invalidate(neigh);
 953        }
 954
 955        if (neigh->nud_state & NUD_IN_TIMER) {
 956                if (time_before(next, jiffies + HZ/2))
 957                        next = jiffies + HZ/2;
 958                if (!mod_timer(&neigh->timer, next))
 959                        neigh_hold(neigh);
 960        }
 961        if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
 962                neigh_probe(neigh);
 963        } else {
 964out:
 965                write_unlock(&neigh->lock);
 966        }
 967
 968        if (notify)
 969                neigh_update_notify(neigh);
 970
 971        neigh_release(neigh);
 972}
 973
 974int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 975{
 976        int rc;
 977        bool immediate_probe = false;
 978
 979        write_lock_bh(&neigh->lock);
 980
 981        rc = 0;
 982        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
 983                goto out_unlock_bh;
 984
 985        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
 986                if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
 987                        unsigned long next, now = jiffies;
 988
 989                        atomic_set(&neigh->probes, neigh->parms->ucast_probes);
 990                        neigh->nud_state     = NUD_INCOMPLETE;
 991                        neigh->updated = now;
 992                        next = now + max(neigh->parms->retrans_time, HZ/2);
 993                        neigh_add_timer(neigh, next);
 994                        immediate_probe = true;
 995                } else {
 996                        neigh->nud_state = NUD_FAILED;
 997                        neigh->updated = jiffies;
 998                        write_unlock_bh(&neigh->lock);
 999
1000                        kfree_skb(skb);
1001                        return 1;
1002                }
1003        } else if (neigh->nud_state & NUD_STALE) {
1004                NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1005                neigh->nud_state = NUD_DELAY;
1006                neigh->updated = jiffies;
1007                neigh_add_timer(neigh,
1008                                jiffies + neigh->parms->delay_probe_time);
1009        }
1010
1011        if (neigh->nud_state == NUD_INCOMPLETE) {
1012                if (skb) {
1013                        while (neigh->arp_queue_len_bytes + skb->truesize >
1014                               neigh->parms->queue_len_bytes) {
1015                                struct sk_buff *buff;
1016
1017                                buff = __skb_dequeue(&neigh->arp_queue);
1018                                if (!buff)
1019                                        break;
1020                                neigh->arp_queue_len_bytes -= buff->truesize;
1021                                kfree_skb(buff);
1022                                NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1023                        }
1024                        skb_dst_force(skb);
1025                        __skb_queue_tail(&neigh->arp_queue, skb);
1026                        neigh->arp_queue_len_bytes += skb->truesize;
1027                }
1028                rc = 1;
1029        }
1030out_unlock_bh:
1031        if (immediate_probe)
1032                neigh_probe(neigh);
1033        else
1034                write_unlock(&neigh->lock);
1035        local_bh_enable();
1036        return rc;
1037}
1038EXPORT_SYMBOL(__neigh_event_send);
1039
1040static void neigh_update_hhs(struct neighbour *neigh)
1041{
1042        struct hh_cache *hh;
1043        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1044                = NULL;
1045
1046        if (neigh->dev->header_ops)
1047                update = neigh->dev->header_ops->cache_update;
1048
1049        if (update) {
1050                hh = &neigh->hh;
1051                if (hh->hh_len) {
1052                        write_seqlock_bh(&hh->hh_lock);
1053                        update(hh, neigh->dev, neigh->ha);
1054                        write_sequnlock_bh(&hh->hh_lock);
1055                }
1056        }
1057}
1058
1059
1060
1061/* Generic update routine.
1062   -- lladdr is new lladdr or NULL, if it is not supplied.
1063   -- new    is new state.
1064   -- flags
1065        NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1066                                if it is different.
1067        NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1068                                lladdr instead of overriding it
1069                                if it is different.
1070                                It also allows to retain current state
1071                                if lladdr is unchanged.
1072        NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1073
1074        NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1075                                NTF_ROUTER flag.
1076        NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1077                                a router.
1078
1079   Caller MUST hold reference count on the entry.
1080 */
1081
1082int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1083                 u32 flags)
1084{
1085        u8 old;
1086        int err;
1087        int notify = 0;
1088        struct net_device *dev;
1089        int update_isrouter = 0;
1090
1091        write_lock_bh(&neigh->lock);
1092
1093        dev    = neigh->dev;
1094        old    = neigh->nud_state;
1095        err    = -EPERM;
1096
1097        if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1098            (old & (NUD_NOARP | NUD_PERMANENT)))
1099                goto out;
1100
1101        if (!(new & NUD_VALID)) {
1102                neigh_del_timer(neigh);
1103                if (old & NUD_CONNECTED)
1104                        neigh_suspect(neigh);
1105                neigh->nud_state = new;
1106                err = 0;
1107                notify = old & NUD_VALID;
1108                if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1109                    (new & NUD_FAILED)) {
1110                        neigh_invalidate(neigh);
1111                        notify = 1;
1112                }
1113                goto out;
1114        }
1115
1116        /* Compare new lladdr with cached one */
1117        if (!dev->addr_len) {
1118                /* First case: device needs no address. */
1119                lladdr = neigh->ha;
1120        } else if (lladdr) {
1121                /* The second case: if something is already cached
1122                   and a new address is proposed:
1123                   - compare new & old
1124                   - if they are different, check override flag
1125                 */
1126                if ((old & NUD_VALID) &&
1127                    !memcmp(lladdr, neigh->ha, dev->addr_len))
1128                        lladdr = neigh->ha;
1129        } else {
1130                /* No address is supplied; if we know something,
1131                   use it, otherwise discard the request.
1132                 */
1133                err = -EINVAL;
1134                if (!(old & NUD_VALID))
1135                        goto out;
1136                lladdr = neigh->ha;
1137        }
1138
1139        if (new & NUD_CONNECTED)
1140                neigh->confirmed = jiffies;
1141        neigh->updated = jiffies;
1142
1143        /* If entry was valid and address is not changed,
1144           do not change entry state, if new one is STALE.
1145         */
1146        err = 0;
1147        update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1148        if (old & NUD_VALID) {
1149                if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1150                        update_isrouter = 0;
1151                        if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1152                            (old & NUD_CONNECTED)) {
1153                                lladdr = neigh->ha;
1154                                new = NUD_STALE;
1155                        } else
1156                                goto out;
1157                } else {
1158                        if (lladdr == neigh->ha && new == NUD_STALE &&
1159                            ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1160                             (old & NUD_CONNECTED))
1161                            )
1162                                new = old;
1163                }
1164        }
1165
1166        if (new != old) {
1167                neigh_del_timer(neigh);
1168                if (new & NUD_IN_TIMER)
1169                        neigh_add_timer(neigh, (jiffies +
1170                                                ((new & NUD_REACHABLE) ?
1171                                                 neigh->parms->reachable_time :
1172                                                 0)));
1173                neigh->nud_state = new;
1174        }
1175
1176        if (lladdr != neigh->ha) {
1177                write_seqlock(&neigh->ha_lock);
1178                memcpy(&neigh->ha, lladdr, dev->addr_len);
1179                write_sequnlock(&neigh->ha_lock);
1180                neigh_update_hhs(neigh);
1181                if (!(new & NUD_CONNECTED))
1182                        neigh->confirmed = jiffies -
1183                                      (neigh->parms->base_reachable_time << 1);
1184                notify = 1;
1185        }
1186        if (new == old)
1187                goto out;
1188        if (new & NUD_CONNECTED)
1189                neigh_connect(neigh);
1190        else
1191                neigh_suspect(neigh);
1192        if (!(old & NUD_VALID)) {
1193                struct sk_buff *skb;
1194
1195                /* Again: avoid dead loop if something went wrong */
1196
1197                while (neigh->nud_state & NUD_VALID &&
1198                       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1199                        struct dst_entry *dst = skb_dst(skb);
1200                        struct neighbour *n2, *n1 = neigh;
1201                        write_unlock_bh(&neigh->lock);
1202
1203                        rcu_read_lock();
1204
1205                        /* Why not just use 'neigh' as-is?  The problem is that
1206                         * things such as shaper, eql, and sch_teql can end up
1207                         * using alternative, different, neigh objects to output
1208                         * the packet in the output path.  So what we need to do
1209                         * here is re-lookup the top-level neigh in the path so
1210                         * we can reinject the packet there.
1211                         */
1212                        n2 = NULL;
1213                        if (dst) {
1214                                n2 = dst_neigh_lookup_skb(dst, skb);
1215                                if (n2)
1216                                        n1 = n2;
1217                        }
1218                        n1->output(n1, skb);
1219                        if (n2)
1220                                neigh_release(n2);
1221                        rcu_read_unlock();
1222
1223                        write_lock_bh(&neigh->lock);
1224                }
1225                skb_queue_purge(&neigh->arp_queue);
1226                neigh->arp_queue_len_bytes = 0;
1227        }
1228out:
1229        if (update_isrouter) {
1230                neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1231                        (neigh->flags | NTF_ROUTER) :
1232                        (neigh->flags & ~NTF_ROUTER);
1233        }
1234        write_unlock_bh(&neigh->lock);
1235
1236        if (notify)
1237                neigh_update_notify(neigh);
1238
1239        return err;
1240}
1241EXPORT_SYMBOL(neigh_update);
1242
1243struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1244                                 u8 *lladdr, void *saddr,
1245                                 struct net_device *dev)
1246{
1247        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1248                                                 lladdr || !dev->addr_len);
1249        if (neigh)
1250                neigh_update(neigh, lladdr, NUD_STALE,
1251                             NEIGH_UPDATE_F_OVERRIDE);
1252        return neigh;
1253}
1254EXPORT_SYMBOL(neigh_event_ns);
1255
1256/* called with read_lock_bh(&n->lock); */
1257static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1258{
1259        struct net_device *dev = dst->dev;
1260        __be16 prot = dst->ops->protocol;
1261        struct hh_cache *hh = &n->hh;
1262
1263        write_lock_bh(&n->lock);
1264
1265        /* Only one thread can come in here and initialize the
1266         * hh_cache entry.
1267         */
1268        if (!hh->hh_len)
1269                dev->header_ops->cache(n, hh, prot);
1270
1271        write_unlock_bh(&n->lock);
1272}
1273
1274/* This function can be used in contexts, where only old dev_queue_xmit
1275 * worked, f.e. if you want to override normal output path (eql, shaper),
1276 * but resolution is not made yet.
1277 */
1278
1279int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1280{
1281        struct net_device *dev = skb->dev;
1282
1283        __skb_pull(skb, skb_network_offset(skb));
1284
1285        if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1286                            skb->len) < 0 &&
1287            dev->header_ops->rebuild(skb))
1288                return 0;
1289
1290        return dev_queue_xmit(skb);
1291}
1292EXPORT_SYMBOL(neigh_compat_output);
1293
1294/* Slow and careful. */
1295
1296int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1297{
1298        struct dst_entry *dst = skb_dst(skb);
1299        int rc = 0;
1300
1301        if (!dst)
1302                goto discard;
1303
1304        if (!neigh_event_send(neigh, skb)) {
1305                int err;
1306                struct net_device *dev = neigh->dev;
1307                unsigned int seq;
1308
1309                if (dev->header_ops->cache && !neigh->hh.hh_len)
1310                        neigh_hh_init(neigh, dst);
1311
1312                do {
1313                        __skb_pull(skb, skb_network_offset(skb));
1314                        seq = read_seqbegin(&neigh->ha_lock);
1315                        err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1316                                              neigh->ha, NULL, skb->len);
1317                } while (read_seqretry(&neigh->ha_lock, seq));
1318
1319                if (err >= 0)
1320                        rc = dev_queue_xmit(skb);
1321                else
1322                        goto out_kfree_skb;
1323        }
1324out:
1325        return rc;
1326discard:
1327        NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1328                      dst, neigh);
1329out_kfree_skb:
1330        rc = -EINVAL;
1331        kfree_skb(skb);
1332        goto out;
1333}
1334EXPORT_SYMBOL(neigh_resolve_output);
1335
1336/* As fast as possible without hh cache */
1337
1338int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1339{
1340        struct net_device *dev = neigh->dev;
1341        unsigned int seq;
1342        int err;
1343
1344        do {
1345                __skb_pull(skb, skb_network_offset(skb));
1346                seq = read_seqbegin(&neigh->ha_lock);
1347                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1348                                      neigh->ha, NULL, skb->len);
1349        } while (read_seqretry(&neigh->ha_lock, seq));
1350
1351        if (err >= 0)
1352                err = dev_queue_xmit(skb);
1353        else {
1354                err = -EINVAL;
1355                kfree_skb(skb);
1356        }
1357        return err;
1358}
1359EXPORT_SYMBOL(neigh_connected_output);
1360
1361int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1362{
1363        return dev_queue_xmit(skb);
1364}
1365EXPORT_SYMBOL(neigh_direct_output);
1366
1367static void neigh_proxy_process(unsigned long arg)
1368{
1369        struct neigh_table *tbl = (struct neigh_table *)arg;
1370        long sched_next = 0;
1371        unsigned long now = jiffies;
1372        struct sk_buff *skb, *n;
1373
1374        spin_lock(&tbl->proxy_queue.lock);
1375
1376        skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1377                long tdif = NEIGH_CB(skb)->sched_next - now;
1378
1379                if (tdif <= 0) {
1380                        struct net_device *dev = skb->dev;
1381
1382                        __skb_unlink(skb, &tbl->proxy_queue);
1383                        if (tbl->proxy_redo && netif_running(dev)) {
1384                                rcu_read_lock();
1385                                tbl->proxy_redo(skb);
1386                                rcu_read_unlock();
1387                        } else {
1388                                kfree_skb(skb);
1389                        }
1390
1391                        dev_put(dev);
1392                } else if (!sched_next || tdif < sched_next)
1393                        sched_next = tdif;
1394        }
1395        del_timer(&tbl->proxy_timer);
1396        if (sched_next)
1397                mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1398        spin_unlock(&tbl->proxy_queue.lock);
1399}
1400
1401void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1402                    struct sk_buff *skb)
1403{
1404        unsigned long now = jiffies;
1405        unsigned long sched_next = now + (net_random() % p->proxy_delay);
1406
1407        if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1408                kfree_skb(skb);
1409                return;
1410        }
1411
1412        NEIGH_CB(skb)->sched_next = sched_next;
1413        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1414
1415        spin_lock(&tbl->proxy_queue.lock);
1416        if (del_timer(&tbl->proxy_timer)) {
1417                if (time_before(tbl->proxy_timer.expires, sched_next))
1418                        sched_next = tbl->proxy_timer.expires;
1419        }
1420        skb_dst_drop(skb);
1421        dev_hold(skb->dev);
1422        __skb_queue_tail(&tbl->proxy_queue, skb);
1423        mod_timer(&tbl->proxy_timer, sched_next);
1424        spin_unlock(&tbl->proxy_queue.lock);
1425}
1426EXPORT_SYMBOL(pneigh_enqueue);
1427
1428static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1429                                                      struct net *net, int ifindex)
1430{
1431        struct neigh_parms *p;
1432
1433        for (p = &tbl->parms; p; p = p->next) {
1434                if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1435                    (!p->dev && !ifindex))
1436                        return p;
1437        }
1438
1439        return NULL;
1440}
1441
1442struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1443                                      struct neigh_table *tbl)
1444{
1445        struct neigh_parms *p, *ref;
1446        struct net *net = dev_net(dev);
1447        const struct net_device_ops *ops = dev->netdev_ops;
1448
1449        ref = lookup_neigh_parms(tbl, net, 0);
1450        if (!ref)
1451                return NULL;
1452
1453        p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1454        if (p) {
1455                p->tbl            = tbl;
1456                atomic_set(&p->refcnt, 1);
1457                p->reachable_time =
1458                                neigh_rand_reach_time(p->base_reachable_time);
1459
1460                if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1461                        kfree(p);
1462                        return NULL;
1463                }
1464
1465                dev_hold(dev);
1466                p->dev = dev;
1467                write_pnet(&p->net, hold_net(net));
1468                p->sysctl_table = NULL;
1469                write_lock_bh(&tbl->lock);
1470                p->next         = tbl->parms.next;
1471                tbl->parms.next = p;
1472                write_unlock_bh(&tbl->lock);
1473        }
1474        return p;
1475}
1476EXPORT_SYMBOL(neigh_parms_alloc);
1477
1478static void neigh_rcu_free_parms(struct rcu_head *head)
1479{
1480        struct neigh_parms *parms =
1481                container_of(head, struct neigh_parms, rcu_head);
1482
1483        neigh_parms_put(parms);
1484}
1485
1486void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1487{
1488        struct neigh_parms **p;
1489
1490        if (!parms || parms == &tbl->parms)
1491                return;
1492        write_lock_bh(&tbl->lock);
1493        for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1494                if (*p == parms) {
1495                        *p = parms->next;
1496                        parms->dead = 1;
1497                        write_unlock_bh(&tbl->lock);
1498                        if (parms->dev)
1499                                dev_put(parms->dev);
1500                        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1501                        return;
1502                }
1503        }
1504        write_unlock_bh(&tbl->lock);
1505        NEIGH_PRINTK1("neigh_parms_release: not found\n");
1506}
1507EXPORT_SYMBOL(neigh_parms_release);
1508
1509static void neigh_parms_destroy(struct neigh_parms *parms)
1510{
1511        release_net(neigh_parms_net(parms));
1512        kfree(parms);
1513}
1514
1515static struct lock_class_key neigh_table_proxy_queue_class;
1516
1517static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1518{
1519        unsigned long now = jiffies;
1520        unsigned long phsize;
1521
1522        write_pnet(&tbl->parms.net, &init_net);
1523        atomic_set(&tbl->parms.refcnt, 1);
1524        tbl->parms.reachable_time =
1525                          neigh_rand_reach_time(tbl->parms.base_reachable_time);
1526
1527        tbl->stats = alloc_percpu(struct neigh_statistics);
1528        if (!tbl->stats)
1529                panic("cannot create neighbour cache statistics");
1530
1531#ifdef CONFIG_PROC_FS
1532        if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1533                              &neigh_stat_seq_fops, tbl))
1534                panic("cannot create neighbour proc dir entry");
1535#endif
1536
1537        RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1538
1539        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1540        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1541
1542        if (!tbl->nht || !tbl->phash_buckets)
1543                panic("cannot allocate neighbour cache hashes");
1544
1545        rwlock_init(&tbl->lock);
1546        INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1547        schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1548        setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1549        skb_queue_head_init_class(&tbl->proxy_queue,
1550                        &neigh_table_proxy_queue_class);
1551
1552        tbl->last_flush = now;
1553        tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1554}
1555
1556void neigh_table_init(struct neigh_table *tbl)
1557{
1558        struct neigh_table *tmp;
1559
1560        neigh_table_init_no_netlink(tbl);
1561        write_lock(&neigh_tbl_lock);
1562        for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1563                if (tmp->family == tbl->family)
1564                        break;
1565        }
1566        tbl->next       = neigh_tables;
1567        neigh_tables    = tbl;
1568        write_unlock(&neigh_tbl_lock);
1569
1570        if (unlikely(tmp)) {
1571                pr_err("Registering multiple tables for family %d\n",
1572                       tbl->family);
1573                dump_stack();
1574        }
1575}
1576EXPORT_SYMBOL(neigh_table_init);
1577
1578int neigh_table_clear(struct neigh_table *tbl)
1579{
1580        struct neigh_table **tp;
1581
1582        /* It is not clean... Fix it to unload IPv6 module safely */
1583        cancel_delayed_work_sync(&tbl->gc_work);
1584        del_timer_sync(&tbl->proxy_timer);
1585        pneigh_queue_purge(&tbl->proxy_queue);
1586        neigh_ifdown(tbl, NULL);
1587        if (atomic_read(&tbl->entries))
1588                pr_crit("neighbour leakage\n");
1589        write_lock(&neigh_tbl_lock);
1590        for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1591                if (*tp == tbl) {
1592                        *tp = tbl->next;
1593                        break;
1594                }
1595        }
1596        write_unlock(&neigh_tbl_lock);
1597
1598        call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1599                 neigh_hash_free_rcu);
1600        tbl->nht = NULL;
1601
1602        kfree(tbl->phash_buckets);
1603        tbl->phash_buckets = NULL;
1604
1605        remove_proc_entry(tbl->id, init_net.proc_net_stat);
1606
1607        free_percpu(tbl->stats);
1608        tbl->stats = NULL;
1609
1610        return 0;
1611}
1612EXPORT_SYMBOL(neigh_table_clear);
1613
1614static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1615{
1616        struct net *net = sock_net(skb->sk);
1617        struct ndmsg *ndm;
1618        struct nlattr *dst_attr;
1619        struct neigh_table *tbl;
1620        struct net_device *dev = NULL;
1621        int err = -EINVAL;
1622
1623        ASSERT_RTNL();
1624        if (nlmsg_len(nlh) < sizeof(*ndm))
1625                goto out;
1626
1627        dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1628        if (dst_attr == NULL)
1629                goto out;
1630
1631        ndm = nlmsg_data(nlh);
1632        if (ndm->ndm_ifindex) {
1633                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1634                if (dev == NULL) {
1635                        err = -ENODEV;
1636                        goto out;
1637                }
1638        }
1639
1640        read_lock(&neigh_tbl_lock);
1641        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1642                struct neighbour *neigh;
1643
1644                if (tbl->family != ndm->ndm_family)
1645                        continue;
1646                read_unlock(&neigh_tbl_lock);
1647
1648                if (nla_len(dst_attr) < tbl->key_len)
1649                        goto out;
1650
1651                if (ndm->ndm_flags & NTF_PROXY) {
1652                        err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1653                        goto out;
1654                }
1655
1656                if (dev == NULL)
1657                        goto out;
1658
1659                neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1660                if (neigh == NULL) {
1661                        err = -ENOENT;
1662                        goto out;
1663                }
1664
1665                err = neigh_update(neigh, NULL, NUD_FAILED,
1666                                   NEIGH_UPDATE_F_OVERRIDE |
1667                                   NEIGH_UPDATE_F_ADMIN);
1668                neigh_release(neigh);
1669                goto out;
1670        }
1671        read_unlock(&neigh_tbl_lock);
1672        err = -EAFNOSUPPORT;
1673
1674out:
1675        return err;
1676}
1677
1678static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1679{
1680        struct net *net = sock_net(skb->sk);
1681        struct ndmsg *ndm;
1682        struct nlattr *tb[NDA_MAX+1];
1683        struct neigh_table *tbl;
1684        struct net_device *dev = NULL;
1685        int err;
1686
1687        ASSERT_RTNL();
1688        err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1689        if (err < 0)
1690                goto out;
1691
1692        err = -EINVAL;
1693        if (tb[NDA_DST] == NULL)
1694                goto out;
1695
1696        ndm = nlmsg_data(nlh);
1697        if (ndm->ndm_ifindex) {
1698                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1699                if (dev == NULL) {
1700                        err = -ENODEV;
1701                        goto out;
1702                }
1703
1704                if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1705                        goto out;
1706        }
1707
1708        read_lock(&neigh_tbl_lock);
1709        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1710                int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1711                struct neighbour *neigh;
1712                void *dst, *lladdr;
1713
1714                if (tbl->family != ndm->ndm_family)
1715                        continue;
1716                read_unlock(&neigh_tbl_lock);
1717
1718                if (nla_len(tb[NDA_DST]) < tbl->key_len)
1719                        goto out;
1720                dst = nla_data(tb[NDA_DST]);
1721                lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1722
1723                if (ndm->ndm_flags & NTF_PROXY) {
1724                        struct pneigh_entry *pn;
1725
1726                        err = -ENOBUFS;
1727                        pn = pneigh_lookup(tbl, net, dst, dev, 1);
1728                        if (pn) {
1729                                pn->flags = ndm->ndm_flags;
1730                                err = 0;
1731                        }
1732                        goto out;
1733                }
1734
1735                if (dev == NULL)
1736                        goto out;
1737
1738                neigh = neigh_lookup(tbl, dst, dev);
1739                if (neigh == NULL) {
1740                        if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1741                                err = -ENOENT;
1742                                goto out;
1743                        }
1744
1745                        neigh = __neigh_lookup_errno(tbl, dst, dev);
1746                        if (IS_ERR(neigh)) {
1747                                err = PTR_ERR(neigh);
1748                                goto out;
1749                        }
1750                } else {
1751                        if (nlh->nlmsg_flags & NLM_F_EXCL) {
1752                                err = -EEXIST;
1753                                neigh_release(neigh);
1754                                goto out;
1755                        }
1756
1757                        if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1758                                flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1759                }
1760
1761                if (ndm->ndm_flags & NTF_USE) {
1762                        neigh_event_send(neigh, NULL);
1763                        err = 0;
1764                } else
1765                        err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1766                neigh_release(neigh);
1767                goto out;
1768        }
1769
1770        read_unlock(&neigh_tbl_lock);
1771        err = -EAFNOSUPPORT;
1772out:
1773        return err;
1774}
1775
1776static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1777{
1778        struct nlattr *nest;
1779
1780        nest = nla_nest_start(skb, NDTA_PARMS);
1781        if (nest == NULL)
1782                return -ENOBUFS;
1783
1784        if ((parms->dev &&
1785             nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1786            nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1787            nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1788            /* approximative value for deprecated QUEUE_LEN (in packets) */
1789            nla_put_u32(skb, NDTPA_QUEUE_LEN,
1790                        DIV_ROUND_UP(parms->queue_len_bytes,
1791                                     SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1792            nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1793            nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1794            nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1795            nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1796            nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1797            nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1798                          parms->base_reachable_time) ||
1799            nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1800            nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1801                          parms->delay_probe_time) ||
1802            nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1803            nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1804            nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1805            nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1806                goto nla_put_failure;
1807        return nla_nest_end(skb, nest);
1808
1809nla_put_failure:
1810        nla_nest_cancel(skb, nest);
1811        return -EMSGSIZE;
1812}
1813
1814static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1815                              u32 pid, u32 seq, int type, int flags)
1816{
1817        struct nlmsghdr *nlh;
1818        struct ndtmsg *ndtmsg;
1819
1820        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1821        if (nlh == NULL)
1822                return -EMSGSIZE;
1823
1824        ndtmsg = nlmsg_data(nlh);
1825
1826        read_lock_bh(&tbl->lock);
1827        ndtmsg->ndtm_family = tbl->family;
1828        ndtmsg->ndtm_pad1   = 0;
1829        ndtmsg->ndtm_pad2   = 0;
1830
1831        if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1832            nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1833            nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1834            nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1835            nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1836                goto nla_put_failure;
1837        {
1838                unsigned long now = jiffies;
1839                unsigned int flush_delta = now - tbl->last_flush;
1840                unsigned int rand_delta = now - tbl->last_rand;
1841                struct neigh_hash_table *nht;
1842                struct ndt_config ndc = {
1843                        .ndtc_key_len           = tbl->key_len,
1844                        .ndtc_entry_size        = tbl->entry_size,
1845                        .ndtc_entries           = atomic_read(&tbl->entries),
1846                        .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1847                        .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1848                        .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1849                };
1850
1851                rcu_read_lock_bh();
1852                nht = rcu_dereference_bh(tbl->nht);
1853                ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1854                ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1855                rcu_read_unlock_bh();
1856
1857                if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1858                        goto nla_put_failure;
1859        }
1860
1861        {
1862                int cpu;
1863                struct ndt_stats ndst;
1864
1865                memset(&ndst, 0, sizeof(ndst));
1866
1867                for_each_possible_cpu(cpu) {
1868                        struct neigh_statistics *st;
1869
1870                        st = per_cpu_ptr(tbl->stats, cpu);
1871                        ndst.ndts_allocs                += st->allocs;
1872                        ndst.ndts_destroys              += st->destroys;
1873                        ndst.ndts_hash_grows            += st->hash_grows;
1874                        ndst.ndts_res_failed            += st->res_failed;
1875                        ndst.ndts_lookups               += st->lookups;
1876                        ndst.ndts_hits                  += st->hits;
1877                        ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1878                        ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1879                        ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1880                        ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1881                }
1882
1883                if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1884                        goto nla_put_failure;
1885        }
1886
1887        BUG_ON(tbl->parms.dev);
1888        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1889                goto nla_put_failure;
1890
1891        read_unlock_bh(&tbl->lock);
1892        return nlmsg_end(skb, nlh);
1893
1894nla_put_failure:
1895        read_unlock_bh(&tbl->lock);
1896        nlmsg_cancel(skb, nlh);
1897        return -EMSGSIZE;
1898}
1899
1900static int neightbl_fill_param_info(struct sk_buff *skb,
1901                                    struct neigh_table *tbl,
1902                                    struct neigh_parms *parms,
1903                                    u32 pid, u32 seq, int type,
1904                                    unsigned int flags)
1905{
1906        struct ndtmsg *ndtmsg;
1907        struct nlmsghdr *nlh;
1908
1909        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1910        if (nlh == NULL)
1911                return -EMSGSIZE;
1912
1913        ndtmsg = nlmsg_data(nlh);
1914
1915        read_lock_bh(&tbl->lock);
1916        ndtmsg->ndtm_family = tbl->family;
1917        ndtmsg->ndtm_pad1   = 0;
1918        ndtmsg->ndtm_pad2   = 0;
1919
1920        if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1921            neightbl_fill_parms(skb, parms) < 0)
1922                goto errout;
1923
1924        read_unlock_bh(&tbl->lock);
1925        return nlmsg_end(skb, nlh);
1926errout:
1927        read_unlock_bh(&tbl->lock);
1928        nlmsg_cancel(skb, nlh);
1929        return -EMSGSIZE;
1930}
1931
1932static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1933        [NDTA_NAME]             = { .type = NLA_STRING },
1934        [NDTA_THRESH1]          = { .type = NLA_U32 },
1935        [NDTA_THRESH2]          = { .type = NLA_U32 },
1936        [NDTA_THRESH3]          = { .type = NLA_U32 },
1937        [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1938        [NDTA_PARMS]            = { .type = NLA_NESTED },
1939};
1940
1941static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1942        [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1943        [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1944        [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1945        [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1946        [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1947        [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1948        [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1949        [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1950        [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1951        [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1952        [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1953        [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1954        [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1955};
1956
1957static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1958{
1959        struct net *net = sock_net(skb->sk);
1960        struct neigh_table *tbl;
1961        struct ndtmsg *ndtmsg;
1962        struct nlattr *tb[NDTA_MAX+1];
1963        int err;
1964
1965        err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1966                          nl_neightbl_policy);
1967        if (err < 0)
1968                goto errout;
1969
1970        if (tb[NDTA_NAME] == NULL) {
1971                err = -EINVAL;
1972                goto errout;
1973        }
1974
1975        ndtmsg = nlmsg_data(nlh);
1976        read_lock(&neigh_tbl_lock);
1977        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1978                if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1979                        continue;
1980
1981                if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1982                        break;
1983        }
1984
1985        if (tbl == NULL) {
1986                err = -ENOENT;
1987                goto errout_locked;
1988        }
1989
1990        /*
1991         * We acquire tbl->lock to be nice to the periodic timers and
1992         * make sure they always see a consistent set of values.
1993         */
1994        write_lock_bh(&tbl->lock);
1995
1996        if (tb[NDTA_PARMS]) {
1997                struct nlattr *tbp[NDTPA_MAX+1];
1998                struct neigh_parms *p;
1999                int i, ifindex = 0;
2000
2001                err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2002                                       nl_ntbl_parm_policy);
2003                if (err < 0)
2004                        goto errout_tbl_lock;
2005
2006                if (tbp[NDTPA_IFINDEX])
2007                        ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2008
2009                p = lookup_neigh_parms(tbl, net, ifindex);
2010                if (p == NULL) {
2011                        err = -ENOENT;
2012                        goto errout_tbl_lock;
2013                }
2014
2015                for (i = 1; i <= NDTPA_MAX; i++) {
2016                        if (tbp[i] == NULL)
2017                                continue;
2018
2019                        switch (i) {
2020                        case NDTPA_QUEUE_LEN:
2021                                p->queue_len_bytes = nla_get_u32(tbp[i]) *
2022                                                     SKB_TRUESIZE(ETH_FRAME_LEN);
2023                                break;
2024                        case NDTPA_QUEUE_LENBYTES:
2025                                p->queue_len_bytes = nla_get_u32(tbp[i]);
2026                                break;
2027                        case NDTPA_PROXY_QLEN:
2028                                p->proxy_qlen = nla_get_u32(tbp[i]);
2029                                break;
2030                        case NDTPA_APP_PROBES:
2031                                p->app_probes = nla_get_u32(tbp[i]);
2032                                break;
2033                        case NDTPA_UCAST_PROBES:
2034                                p->ucast_probes = nla_get_u32(tbp[i]);
2035                                break;
2036                        case NDTPA_MCAST_PROBES:
2037                                p->mcast_probes = nla_get_u32(tbp[i]);
2038                                break;
2039                        case NDTPA_BASE_REACHABLE_TIME:
2040                                p->base_reachable_time = nla_get_msecs(tbp[i]);
2041                                break;
2042                        case NDTPA_GC_STALETIME:
2043                                p->gc_staletime = nla_get_msecs(tbp[i]);
2044                                break;
2045                        case NDTPA_DELAY_PROBE_TIME:
2046                                p->delay_probe_time = nla_get_msecs(tbp[i]);
2047                                break;
2048                        case NDTPA_RETRANS_TIME:
2049                                p->retrans_time = nla_get_msecs(tbp[i]);
2050                                break;
2051                        case NDTPA_ANYCAST_DELAY:
2052                                p->anycast_delay = nla_get_msecs(tbp[i]);
2053                                break;
2054                        case NDTPA_PROXY_DELAY:
2055                                p->proxy_delay = nla_get_msecs(tbp[i]);
2056                                break;
2057                        case NDTPA_LOCKTIME:
2058                                p->locktime = nla_get_msecs(tbp[i]);
2059                                break;
2060                        }
2061                }
2062        }
2063
2064        if (tb[NDTA_THRESH1])
2065                tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2066
2067        if (tb[NDTA_THRESH2])
2068                tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2069
2070        if (tb[NDTA_THRESH3])
2071                tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2072
2073        if (tb[NDTA_GC_INTERVAL])
2074                tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2075
2076        err = 0;
2077
2078errout_tbl_lock:
2079        write_unlock_bh(&tbl->lock);
2080errout_locked:
2081        read_unlock(&neigh_tbl_lock);
2082errout:
2083        return err;
2084}
2085
2086static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2087{
2088        struct net *net = sock_net(skb->sk);
2089        int family, tidx, nidx = 0;
2090        int tbl_skip = cb->args[0];
2091        int neigh_skip = cb->args[1];
2092        struct neigh_table *tbl;
2093
2094        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2095
2096        read_lock(&neigh_tbl_lock);
2097        for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2098                struct neigh_parms *p;
2099
2100                if (tidx < tbl_skip || (family && tbl->family != family))
2101                        continue;
2102
2103                if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2104                                       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2105                                       NLM_F_MULTI) <= 0)
2106                        break;
2107
2108                for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2109                        if (!net_eq(neigh_parms_net(p), net))
2110                                continue;
2111
2112                        if (nidx < neigh_skip)
2113                                goto next;
2114
2115                        if (neightbl_fill_param_info(skb, tbl, p,
2116                                                     NETLINK_CB(cb->skb).portid,
2117                                                     cb->nlh->nlmsg_seq,
2118                                                     RTM_NEWNEIGHTBL,
2119                                                     NLM_F_MULTI) <= 0)
2120                                goto out;
2121                next:
2122                        nidx++;
2123                }
2124
2125                neigh_skip = 0;
2126        }
2127out:
2128        read_unlock(&neigh_tbl_lock);
2129        cb->args[0] = tidx;
2130        cb->args[1] = nidx;
2131
2132        return skb->len;
2133}
2134
2135static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2136                           u32 pid, u32 seq, int type, unsigned int flags)
2137{
2138        unsigned long now = jiffies;
2139        struct nda_cacheinfo ci;
2140        struct nlmsghdr *nlh;
2141        struct ndmsg *ndm;
2142
2143        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2144        if (nlh == NULL)
2145                return -EMSGSIZE;
2146
2147        ndm = nlmsg_data(nlh);
2148        ndm->ndm_family  = neigh->ops->family;
2149        ndm->ndm_pad1    = 0;
2150        ndm->ndm_pad2    = 0;
2151        ndm->ndm_flags   = neigh->flags;
2152        ndm->ndm_type    = neigh->type;
2153        ndm->ndm_ifindex = neigh->dev->ifindex;
2154
2155        if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2156                goto nla_put_failure;
2157
2158        read_lock_bh(&neigh->lock);
2159        ndm->ndm_state   = neigh->nud_state;
2160        if (neigh->nud_state & NUD_VALID) {
2161                char haddr[MAX_ADDR_LEN];
2162
2163                neigh_ha_snapshot(haddr, neigh, neigh->dev);
2164                if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2165                        read_unlock_bh(&neigh->lock);
2166                        goto nla_put_failure;
2167                }
2168        }
2169
2170        ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2171        ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2172        ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2173        ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2174        read_unlock_bh(&neigh->lock);
2175
2176        if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2177            nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2178                goto nla_put_failure;
2179
2180        return nlmsg_end(skb, nlh);
2181
2182nla_put_failure:
2183        nlmsg_cancel(skb, nlh);
2184        return -EMSGSIZE;
2185}
2186
2187static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2188                            u32 pid, u32 seq, int type, unsigned int flags,
2189                            struct neigh_table *tbl)
2190{
2191        struct nlmsghdr *nlh;
2192        struct ndmsg *ndm;
2193
2194        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2195        if (nlh == NULL)
2196                return -EMSGSIZE;
2197
2198        ndm = nlmsg_data(nlh);
2199        ndm->ndm_family  = tbl->family;
2200        ndm->ndm_pad1    = 0;
2201        ndm->ndm_pad2    = 0;
2202        ndm->ndm_flags   = pn->flags | NTF_PROXY;
2203        ndm->ndm_type    = NDA_DST;
2204        ndm->ndm_ifindex = pn->dev->ifindex;
2205        ndm->ndm_state   = NUD_NONE;
2206
2207        if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2208                goto nla_put_failure;
2209
2210        return nlmsg_end(skb, nlh);
2211
2212nla_put_failure:
2213        nlmsg_cancel(skb, nlh);
2214        return -EMSGSIZE;
2215}
2216
2217static void neigh_update_notify(struct neighbour *neigh)
2218{
2219        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2220        __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2221}
2222
2223static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2224                            struct netlink_callback *cb)
2225{
2226        struct net *net = sock_net(skb->sk);
2227        struct neighbour *n;
2228        int rc, h, s_h = cb->args[1];
2229        int idx, s_idx = idx = cb->args[2];
2230        struct neigh_hash_table *nht;
2231
2232        rcu_read_lock_bh();
2233        nht = rcu_dereference_bh(tbl->nht);
2234
2235        for (h = s_h; h < (1 << nht->hash_shift); h++) {
2236                if (h > s_h)
2237                        s_idx = 0;
2238                for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2239                     n != NULL;
2240                     n = rcu_dereference_bh(n->next)) {
2241                        if (!net_eq(dev_net(n->dev), net))
2242                                continue;
2243                        if (idx < s_idx)
2244                                goto next;
2245                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2246                                            cb->nlh->nlmsg_seq,
2247                                            RTM_NEWNEIGH,
2248                                            NLM_F_MULTI) <= 0) {
2249                                rc = -1;
2250                                goto out;
2251                        }
2252next:
2253                        idx++;
2254                }
2255        }
2256        rc = skb->len;
2257out:
2258        rcu_read_unlock_bh();
2259        cb->args[1] = h;
2260        cb->args[2] = idx;
2261        return rc;
2262}
2263
2264static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2265                             struct netlink_callback *cb)
2266{
2267        struct pneigh_entry *n;
2268        struct net *net = sock_net(skb->sk);
2269        int rc, h, s_h = cb->args[3];
2270        int idx, s_idx = idx = cb->args[4];
2271
2272        read_lock_bh(&tbl->lock);
2273
2274        for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2275                if (h > s_h)
2276                        s_idx = 0;
2277                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2278                        if (dev_net(n->dev) != net)
2279                                continue;
2280                        if (idx < s_idx)
2281                                goto next;
2282                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2283                                            cb->nlh->nlmsg_seq,
2284                                            RTM_NEWNEIGH,
2285                                            NLM_F_MULTI, tbl) <= 0) {
2286                                read_unlock_bh(&tbl->lock);
2287                                rc = -1;
2288                                goto out;
2289                        }
2290                next:
2291                        idx++;
2292                }
2293        }
2294
2295        read_unlock_bh(&tbl->lock);
2296        rc = skb->len;
2297out:
2298        cb->args[3] = h;
2299        cb->args[4] = idx;
2300        return rc;
2301
2302}
2303
2304static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2305{
2306        struct neigh_table *tbl;
2307        int t, family, s_t;
2308        int proxy = 0;
2309        int err;
2310
2311        read_lock(&neigh_tbl_lock);
2312        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2313
2314        /* check for full ndmsg structure presence, family member is
2315         * the same for both structures
2316         */
2317        if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2318            ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2319                proxy = 1;
2320
2321        s_t = cb->args[0];
2322
2323        for (tbl = neigh_tables, t = 0; tbl;
2324             tbl = tbl->next, t++) {
2325                if (t < s_t || (family && tbl->family != family))
2326                        continue;
2327                if (t > s_t)
2328                        memset(&cb->args[1], 0, sizeof(cb->args) -
2329                                                sizeof(cb->args[0]));
2330                if (proxy)
2331                        err = pneigh_dump_table(tbl, skb, cb);
2332                else
2333                        err = neigh_dump_table(tbl, skb, cb);
2334                if (err < 0)
2335                        break;
2336        }
2337        read_unlock(&neigh_tbl_lock);
2338
2339        cb->args[0] = t;
2340        return skb->len;
2341}
2342
2343void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2344{
2345        int chain;
2346        struct neigh_hash_table *nht;
2347
2348        rcu_read_lock_bh();
2349        nht = rcu_dereference_bh(tbl->nht);
2350
2351        read_lock(&tbl->lock); /* avoid resizes */
2352        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2353                struct neighbour *n;
2354
2355                for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2356                     n != NULL;
2357                     n = rcu_dereference_bh(n->next))
2358                        cb(n, cookie);
2359        }
2360        read_unlock(&tbl->lock);
2361        rcu_read_unlock_bh();
2362}
2363EXPORT_SYMBOL(neigh_for_each);
2364
2365/* The tbl->lock must be held as a writer and BH disabled. */
2366void __neigh_for_each_release(struct neigh_table *tbl,
2367                              int (*cb)(struct neighbour *))
2368{
2369        int chain;
2370        struct neigh_hash_table *nht;
2371
2372        nht = rcu_dereference_protected(tbl->nht,
2373                                        lockdep_is_held(&tbl->lock));
2374        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2375                struct neighbour *n;
2376                struct neighbour __rcu **np;
2377
2378                np = &nht->hash_buckets[chain];
2379                while ((n = rcu_dereference_protected(*np,
2380                                        lockdep_is_held(&tbl->lock))) != NULL) {
2381                        int release;
2382
2383                        write_lock(&n->lock);
2384                        release = cb(n);
2385                        if (release) {
2386                                rcu_assign_pointer(*np,
2387                                        rcu_dereference_protected(n->next,
2388                                                lockdep_is_held(&tbl->lock)));
2389                                n->dead = 1;
2390                        } else
2391                                np = &n->next;
2392                        write_unlock(&n->lock);
2393                        if (release)
2394                                neigh_cleanup_and_release(n);
2395                }
2396        }
2397}
2398EXPORT_SYMBOL(__neigh_for_each_release);
2399
2400#ifdef CONFIG_PROC_FS
2401
2402static struct neighbour *neigh_get_first(struct seq_file *seq)
2403{
2404        struct neigh_seq_state *state = seq->private;
2405        struct net *net = seq_file_net(seq);
2406        struct neigh_hash_table *nht = state->nht;
2407        struct neighbour *n = NULL;
2408        int bucket = state->bucket;
2409
2410        state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2411        for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2412                n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2413
2414                while (n) {
2415                        if (!net_eq(dev_net(n->dev), net))
2416                                goto next;
2417                        if (state->neigh_sub_iter) {
2418                                loff_t fakep = 0;
2419                                void *v;
2420
2421                                v = state->neigh_sub_iter(state, n, &fakep);
2422                                if (!v)
2423                                        goto next;
2424                        }
2425                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2426                                break;
2427                        if (n->nud_state & ~NUD_NOARP)
2428                                break;
2429next:
2430                        n = rcu_dereference_bh(n->next);
2431                }
2432
2433                if (n)
2434                        break;
2435        }
2436        state->bucket = bucket;
2437
2438        return n;
2439}
2440
2441static struct neighbour *neigh_get_next(struct seq_file *seq,
2442                                        struct neighbour *n,
2443                                        loff_t *pos)
2444{
2445        struct neigh_seq_state *state = seq->private;
2446        struct net *net = seq_file_net(seq);
2447        struct neigh_hash_table *nht = state->nht;
2448
2449        if (state->neigh_sub_iter) {
2450                void *v = state->neigh_sub_iter(state, n, pos);
2451                if (v)
2452                        return n;
2453        }
2454        n = rcu_dereference_bh(n->next);
2455
2456        while (1) {
2457                while (n) {
2458                        if (!net_eq(dev_net(n->dev), net))
2459                                goto next;
2460                        if (state->neigh_sub_iter) {
2461                                void *v = state->neigh_sub_iter(state, n, pos);
2462                                if (v)
2463                                        return n;
2464                                goto next;
2465                        }
2466                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2467                                break;
2468
2469                        if (n->nud_state & ~NUD_NOARP)
2470                                break;
2471next:
2472                        n = rcu_dereference_bh(n->next);
2473                }
2474
2475                if (n)
2476                        break;
2477
2478                if (++state->bucket >= (1 << nht->hash_shift))
2479                        break;
2480
2481                n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2482        }
2483
2484        if (n && pos)
2485                --(*pos);
2486        return n;
2487}
2488
2489static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2490{
2491        struct neighbour *n = neigh_get_first(seq);
2492
2493        if (n) {
2494                --(*pos);
2495                while (*pos) {
2496                        n = neigh_get_next(seq, n, pos);
2497                        if (!n)
2498                                break;
2499                }
2500        }
2501        return *pos ? NULL : n;
2502}
2503
2504static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2505{
2506        struct neigh_seq_state *state = seq->private;
2507        struct net *net = seq_file_net(seq);
2508        struct neigh_table *tbl = state->tbl;
2509        struct pneigh_entry *pn = NULL;
2510        int bucket = state->bucket;
2511
2512        state->flags |= NEIGH_SEQ_IS_PNEIGH;
2513        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2514                pn = tbl->phash_buckets[bucket];
2515                while (pn && !net_eq(pneigh_net(pn), net))
2516                        pn = pn->next;
2517                if (pn)
2518                        break;
2519        }
2520        state->bucket = bucket;
2521
2522        return pn;
2523}
2524
2525static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2526                                            struct pneigh_entry *pn,
2527                                            loff_t *pos)
2528{
2529        struct neigh_seq_state *state = seq->private;
2530        struct net *net = seq_file_net(seq);
2531        struct neigh_table *tbl = state->tbl;
2532
2533        do {
2534                pn = pn->next;
2535        } while (pn && !net_eq(pneigh_net(pn), net));
2536
2537        while (!pn) {
2538                if (++state->bucket > PNEIGH_HASHMASK)
2539                        break;
2540                pn = tbl->phash_buckets[state->bucket];
2541                while (pn && !net_eq(pneigh_net(pn), net))
2542                        pn = pn->next;
2543                if (pn)
2544                        break;
2545        }
2546
2547        if (pn && pos)
2548                --(*pos);
2549
2550        return pn;
2551}
2552
2553static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2554{
2555        struct pneigh_entry *pn = pneigh_get_first(seq);
2556
2557        if (pn) {
2558                --(*pos);
2559                while (*pos) {
2560                        pn = pneigh_get_next(seq, pn, pos);
2561                        if (!pn)
2562                                break;
2563                }
2564        }
2565        return *pos ? NULL : pn;
2566}
2567
2568static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2569{
2570        struct neigh_seq_state *state = seq->private;
2571        void *rc;
2572        loff_t idxpos = *pos;
2573
2574        rc = neigh_get_idx(seq, &idxpos);
2575        if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2576                rc = pneigh_get_idx(seq, &idxpos);
2577
2578        return rc;
2579}
2580
2581void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2582        __acquires(rcu_bh)
2583{
2584        struct neigh_seq_state *state = seq->private;
2585
2586        state->tbl = tbl;
2587        state->bucket = 0;
2588        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2589
2590        rcu_read_lock_bh();
2591        state->nht = rcu_dereference_bh(tbl->nht);
2592
2593        return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2594}
2595EXPORT_SYMBOL(neigh_seq_start);
2596
2597void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2598{
2599        struct neigh_seq_state *state;
2600        void *rc;
2601
2602        if (v == SEQ_START_TOKEN) {
2603                rc = neigh_get_first(seq);
2604                goto out;
2605        }
2606
2607        state = seq->private;
2608        if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2609                rc = neigh_get_next(seq, v, NULL);
2610                if (rc)
2611                        goto out;
2612                if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2613                        rc = pneigh_get_first(seq);
2614        } else {
2615                BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2616                rc = pneigh_get_next(seq, v, NULL);
2617        }
2618out:
2619        ++(*pos);
2620        return rc;
2621}
2622EXPORT_SYMBOL(neigh_seq_next);
2623
2624void neigh_seq_stop(struct seq_file *seq, void *v)
2625        __releases(rcu_bh)
2626{
2627        rcu_read_unlock_bh();
2628}
2629EXPORT_SYMBOL(neigh_seq_stop);
2630
2631/* statistics via seq_file */
2632
2633static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2634{
2635        struct neigh_table *tbl = seq->private;
2636        int cpu;
2637
2638        if (*pos == 0)
2639                return SEQ_START_TOKEN;
2640
2641        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2642                if (!cpu_possible(cpu))
2643                        continue;
2644                *pos = cpu+1;
2645                return per_cpu_ptr(tbl->stats, cpu);
2646        }
2647        return NULL;
2648}
2649
2650static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2651{
2652        struct neigh_table *tbl = seq->private;
2653        int cpu;
2654
2655        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2656                if (!cpu_possible(cpu))
2657                        continue;
2658                *pos = cpu+1;
2659                return per_cpu_ptr(tbl->stats, cpu);
2660        }
2661        return NULL;
2662}
2663
2664static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2665{
2666
2667}
2668
2669static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2670{
2671        struct neigh_table *tbl = seq->private;
2672        struct neigh_statistics *st = v;
2673
2674        if (v == SEQ_START_TOKEN) {
2675                seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2676                return 0;
2677        }
2678
2679        seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2680                        "%08lx %08lx  %08lx %08lx %08lx\n",
2681                   atomic_read(&tbl->entries),
2682
2683                   st->allocs,
2684                   st->destroys,
2685                   st->hash_grows,
2686
2687                   st->lookups,
2688                   st->hits,
2689
2690                   st->res_failed,
2691
2692                   st->rcv_probes_mcast,
2693                   st->rcv_probes_ucast,
2694
2695                   st->periodic_gc_runs,
2696                   st->forced_gc_runs,
2697                   st->unres_discards
2698                   );
2699
2700        return 0;
2701}
2702
2703static const struct seq_operations neigh_stat_seq_ops = {
2704        .start  = neigh_stat_seq_start,
2705        .next   = neigh_stat_seq_next,
2706        .stop   = neigh_stat_seq_stop,
2707        .show   = neigh_stat_seq_show,
2708};
2709
2710static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2711{
2712        int ret = seq_open(file, &neigh_stat_seq_ops);
2713
2714        if (!ret) {
2715                struct seq_file *sf = file->private_data;
2716                sf->private = PDE(inode)->data;
2717        }
2718        return ret;
2719};
2720
2721static const struct file_operations neigh_stat_seq_fops = {
2722        .owner   = THIS_MODULE,
2723        .open    = neigh_stat_seq_open,
2724        .read    = seq_read,
2725        .llseek  = seq_lseek,
2726        .release = seq_release,
2727};
2728
2729#endif /* CONFIG_PROC_FS */
2730
2731static inline size_t neigh_nlmsg_size(void)
2732{
2733        return NLMSG_ALIGN(sizeof(struct ndmsg))
2734               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2735               + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2736               + nla_total_size(sizeof(struct nda_cacheinfo))
2737               + nla_total_size(4); /* NDA_PROBES */
2738}
2739
2740static void __neigh_notify(struct neighbour *n, int type, int flags)
2741{
2742        struct net *net = dev_net(n->dev);
2743        struct sk_buff *skb;
2744        int err = -ENOBUFS;
2745
2746        skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2747        if (skb == NULL)
2748                goto errout;
2749
2750        err = neigh_fill_info(skb, n, 0, 0, type, flags);
2751        if (err < 0) {
2752                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2753                WARN_ON(err == -EMSGSIZE);
2754                kfree_skb(skb);
2755                goto errout;
2756        }
2757        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2758        return;
2759errout:
2760        if (err < 0)
2761                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2762}
2763
2764#ifdef CONFIG_ARPD
2765void neigh_app_ns(struct neighbour *n)
2766{
2767        __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2768}
2769EXPORT_SYMBOL(neigh_app_ns);
2770#endif /* CONFIG_ARPD */
2771
2772#ifdef CONFIG_SYSCTL
2773
2774static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2775                           size_t *lenp, loff_t *ppos)
2776{
2777        int size, ret;
2778        ctl_table tmp = *ctl;
2779
2780        tmp.data = &size;
2781        size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2782        ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2783        if (write && !ret)
2784                *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2785        return ret;
2786}
2787
2788enum {
2789        NEIGH_VAR_MCAST_PROBE,
2790        NEIGH_VAR_UCAST_PROBE,
2791        NEIGH_VAR_APP_PROBE,
2792        NEIGH_VAR_RETRANS_TIME,
2793        NEIGH_VAR_BASE_REACHABLE_TIME,
2794        NEIGH_VAR_DELAY_PROBE_TIME,
2795        NEIGH_VAR_GC_STALETIME,
2796        NEIGH_VAR_QUEUE_LEN,
2797        NEIGH_VAR_QUEUE_LEN_BYTES,
2798        NEIGH_VAR_PROXY_QLEN,
2799        NEIGH_VAR_ANYCAST_DELAY,
2800        NEIGH_VAR_PROXY_DELAY,
2801        NEIGH_VAR_LOCKTIME,
2802        NEIGH_VAR_RETRANS_TIME_MS,
2803        NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2804        NEIGH_VAR_GC_INTERVAL,
2805        NEIGH_VAR_GC_THRESH1,
2806        NEIGH_VAR_GC_THRESH2,
2807        NEIGH_VAR_GC_THRESH3,
2808        NEIGH_VAR_MAX
2809};
2810
2811static struct neigh_sysctl_table {
2812        struct ctl_table_header *sysctl_header;
2813        struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2814} neigh_sysctl_template __read_mostly = {
2815        .neigh_vars = {
2816                [NEIGH_VAR_MCAST_PROBE] = {
2817                        .procname       = "mcast_solicit",
2818                        .maxlen         = sizeof(int),
2819                        .mode           = 0644,
2820                        .proc_handler   = proc_dointvec,
2821                },
2822                [NEIGH_VAR_UCAST_PROBE] = {
2823                        .procname       = "ucast_solicit",
2824                        .maxlen         = sizeof(int),
2825                        .mode           = 0644,
2826                        .proc_handler   = proc_dointvec,
2827                },
2828                [NEIGH_VAR_APP_PROBE] = {
2829                        .procname       = "app_solicit",
2830                        .maxlen         = sizeof(int),
2831                        .mode           = 0644,
2832                        .proc_handler   = proc_dointvec,
2833                },
2834                [NEIGH_VAR_RETRANS_TIME] = {
2835                        .procname       = "retrans_time",
2836                        .maxlen         = sizeof(int),
2837                        .mode           = 0644,
2838                        .proc_handler   = proc_dointvec_userhz_jiffies,
2839                },
2840                [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2841                        .procname       = "base_reachable_time",
2842                        .maxlen         = sizeof(int),
2843                        .mode           = 0644,
2844                        .proc_handler   = proc_dointvec_jiffies,
2845                },
2846                [NEIGH_VAR_DELAY_PROBE_TIME] = {
2847                        .procname       = "delay_first_probe_time",
2848                        .maxlen         = sizeof(int),
2849                        .mode           = 0644,
2850                        .proc_handler   = proc_dointvec_jiffies,
2851                },
2852                [NEIGH_VAR_GC_STALETIME] = {
2853                        .procname       = "gc_stale_time",
2854                        .maxlen         = sizeof(int),
2855                        .mode           = 0644,
2856                        .proc_handler   = proc_dointvec_jiffies,
2857                },
2858                [NEIGH_VAR_QUEUE_LEN] = {
2859                        .procname       = "unres_qlen",
2860                        .maxlen         = sizeof(int),
2861                        .mode           = 0644,
2862                        .proc_handler   = proc_unres_qlen,
2863                },
2864                [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2865                        .procname       = "unres_qlen_bytes",
2866                        .maxlen         = sizeof(int),
2867                        .mode           = 0644,
2868                        .proc_handler   = proc_dointvec,
2869                },
2870                [NEIGH_VAR_PROXY_QLEN] = {
2871                        .procname       = "proxy_qlen",
2872                        .maxlen         = sizeof(int),
2873                        .mode           = 0644,
2874                        .proc_handler   = proc_dointvec,
2875                },
2876                [NEIGH_VAR_ANYCAST_DELAY] = {
2877                        .procname       = "anycast_delay",
2878                        .maxlen         = sizeof(int),
2879                        .mode           = 0644,
2880                        .proc_handler   = proc_dointvec_userhz_jiffies,
2881                },
2882                [NEIGH_VAR_PROXY_DELAY] = {
2883                        .procname       = "proxy_delay",
2884                        .maxlen         = sizeof(int),
2885                        .mode           = 0644,
2886                        .proc_handler   = proc_dointvec_userhz_jiffies,
2887                },
2888                [NEIGH_VAR_LOCKTIME] = {
2889                        .procname       = "locktime",
2890                        .maxlen         = sizeof(int),
2891                        .mode           = 0644,
2892                        .proc_handler   = proc_dointvec_userhz_jiffies,
2893                },
2894                [NEIGH_VAR_RETRANS_TIME_MS] = {
2895                        .procname       = "retrans_time_ms",
2896                        .maxlen         = sizeof(int),
2897                        .mode           = 0644,
2898                        .proc_handler   = proc_dointvec_ms_jiffies,
2899                },
2900                [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2901                        .procname       = "base_reachable_time_ms",
2902                        .maxlen         = sizeof(int),
2903                        .mode           = 0644,
2904                        .proc_handler   = proc_dointvec_ms_jiffies,
2905                },
2906                [NEIGH_VAR_GC_INTERVAL] = {
2907                        .procname       = "gc_interval",
2908                        .maxlen         = sizeof(int),
2909                        .mode           = 0644,
2910                        .proc_handler   = proc_dointvec_jiffies,
2911                },
2912                [NEIGH_VAR_GC_THRESH1] = {
2913                        .procname       = "gc_thresh1",
2914                        .maxlen         = sizeof(int),
2915                        .mode           = 0644,
2916                        .proc_handler   = proc_dointvec,
2917                },
2918                [NEIGH_VAR_GC_THRESH2] = {
2919                        .procname       = "gc_thresh2",
2920                        .maxlen         = sizeof(int),
2921                        .mode           = 0644,
2922                        .proc_handler   = proc_dointvec,
2923                },
2924                [NEIGH_VAR_GC_THRESH3] = {
2925                        .procname       = "gc_thresh3",
2926                        .maxlen         = sizeof(int),
2927                        .mode           = 0644,
2928                        .proc_handler   = proc_dointvec,
2929                },
2930                {},
2931        },
2932};
2933
2934int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2935                          char *p_name, proc_handler *handler)
2936{
2937        struct neigh_sysctl_table *t;
2938        const char *dev_name_source = NULL;
2939        char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2940
2941        t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2942        if (!t)
2943                goto err;
2944
2945        t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2946        t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2947        t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2948        t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2949        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2950        t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2951        t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2952        t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2953        t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2954        t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2955        t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2956        t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2957        t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2958        t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2959        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2960
2961        if (dev) {
2962                dev_name_source = dev->name;
2963                /* Terminate the table early */
2964                memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2965                       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2966        } else {
2967                dev_name_source = "default";
2968                t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2969                t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2970                t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2971                t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2972        }
2973
2974
2975        if (handler) {
2976                /* RetransTime */
2977                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2978                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2979                /* ReachableTime */
2980                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2981                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2982                /* RetransTime (in milliseconds)*/
2983                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2984                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2985                /* ReachableTime (in milliseconds) */
2986                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2987                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2988        }
2989
2990        snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2991                p_name, dev_name_source);
2992        t->sysctl_header =
2993                register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2994        if (!t->sysctl_header)
2995                goto free;
2996
2997        p->sysctl_table = t;
2998        return 0;
2999
3000free:
3001        kfree(t);
3002err:
3003        return -ENOBUFS;
3004}
3005EXPORT_SYMBOL(neigh_sysctl_register);
3006
3007void neigh_sysctl_unregister(struct neigh_parms *p)
3008{
3009        if (p->sysctl_table) {
3010                struct neigh_sysctl_table *t = p->sysctl_table;
3011                p->sysctl_table = NULL;
3012                unregister_net_sysctl_table(t->sysctl_header);
3013                kfree(t);
3014        }
3015}
3016EXPORT_SYMBOL(neigh_sysctl_unregister);
3017
3018#endif  /* CONFIG_SYSCTL */
3019
3020static int __init neigh_init(void)
3021{
3022        rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3023        rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3024        rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3025
3026        rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3027                      NULL);
3028        rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3029
3030        return 0;
3031}
3032
3033subsys_initcall(neigh_init);
3034
3035