linux/net/core/neighbour.c
<<
>>
Prefs
   1/*
   2 *      Generic address resolution entity
   3 *
   4 *      Authors:
   5 *      Pedro Roque             <roque@di.fc.ul.pt>
   6 *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 *      Fixes:
  14 *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
  15 *      Harald Welte            Add neighbour cache statistics like rtstat
  16 */
  17
  18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19
  20#include <linux/slab.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/socket.h>
  25#include <linux/netdevice.h>
  26#include <linux/proc_fs.h>
  27#ifdef CONFIG_SYSCTL
  28#include <linux/sysctl.h>
  29#endif
  30#include <linux/times.h>
  31#include <net/net_namespace.h>
  32#include <net/neighbour.h>
  33#include <net/dst.h>
  34#include <net/sock.h>
  35#include <net/netevent.h>
  36#include <net/netlink.h>
  37#include <linux/rtnetlink.h>
  38#include <linux/random.h>
  39#include <linux/string.h>
  40#include <linux/log2.h>
  41
  42#define DEBUG
  43#define NEIGH_DEBUG 1
  44#define neigh_dbg(level, fmt, ...)              \
  45do {                                            \
  46        if (level <= NEIGH_DEBUG)               \
  47                pr_debug(fmt, ##__VA_ARGS__);   \
  48} while (0)
  49
  50#define PNEIGH_HASHMASK         0xF
  51
  52static void neigh_timer_handler(unsigned long arg);
  53static void __neigh_notify(struct neighbour *n, int type, int flags);
  54static void neigh_update_notify(struct neighbour *neigh);
  55static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
  56
  57static struct neigh_table *neigh_tables;
  58#ifdef CONFIG_PROC_FS
  59static const struct file_operations neigh_stat_seq_fops;
  60#endif
  61
  62/*
  63   Neighbour hash table buckets are protected with rwlock tbl->lock.
  64
  65   - All the scans/updates to hash buckets MUST be made under this lock.
  66   - NOTHING clever should be made under this lock: no callbacks
  67     to protocol backends, no attempts to send something to network.
  68     It will result in deadlocks, if backend/driver wants to use neighbour
  69     cache.
  70   - If the entry requires some non-trivial actions, increase
  71     its reference count and release table lock.
  72
  73   Neighbour entries are protected:
  74   - with reference count.
  75   - with rwlock neigh->lock
  76
  77   Reference count prevents destruction.
  78
  79   neigh->lock mainly serializes ll address data and its validity state.
  80   However, the same lock is used to protect another entry fields:
  81    - timer
  82    - resolution queue
  83
  84   Again, nothing clever shall be made under neigh->lock,
  85   the most complicated procedure, which we allow is dev->hard_header.
  86   It is supposed, that dev->hard_header is simplistic and does
  87   not make callbacks to neighbour tables.
  88
  89   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
  90   list of neighbour tables. This list is used only in process context,
  91 */
  92
  93static DEFINE_RWLOCK(neigh_tbl_lock);
  94
  95static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
  96{
  97        kfree_skb(skb);
  98        return -ENETDOWN;
  99}
 100
 101static void neigh_cleanup_and_release(struct neighbour *neigh)
 102{
 103        if (neigh->parms->neigh_cleanup)
 104                neigh->parms->neigh_cleanup(neigh);
 105
 106        __neigh_notify(neigh, RTM_DELNEIGH, 0);
 107        neigh_release(neigh);
 108}
 109
 110/*
 111 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 112 * It corresponds to default IPv6 settings and is not overridable,
 113 * because it is really reasonable choice.
 114 */
 115
 116unsigned long neigh_rand_reach_time(unsigned long base)
 117{
 118        return base ? (net_random() % base) + (base >> 1) : 0;
 119}
 120EXPORT_SYMBOL(neigh_rand_reach_time);
 121
 122
 123static int neigh_forced_gc(struct neigh_table *tbl)
 124{
 125        int shrunk = 0;
 126        int i;
 127        struct neigh_hash_table *nht;
 128
 129        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 130
 131        write_lock_bh(&tbl->lock);
 132        nht = rcu_dereference_protected(tbl->nht,
 133                                        lockdep_is_held(&tbl->lock));
 134        for (i = 0; i < (1 << nht->hash_shift); i++) {
 135                struct neighbour *n;
 136                struct neighbour __rcu **np;
 137
 138                np = &nht->hash_buckets[i];
 139                while ((n = rcu_dereference_protected(*np,
 140                                        lockdep_is_held(&tbl->lock))) != NULL) {
 141                        /* Neighbour record may be discarded if:
 142                         * - nobody refers to it.
 143                         * - it is not permanent
 144                         */
 145                        write_lock(&n->lock);
 146                        if (atomic_read(&n->refcnt) == 1 &&
 147                            !(n->nud_state & NUD_PERMANENT)) {
 148                                rcu_assign_pointer(*np,
 149                                        rcu_dereference_protected(n->next,
 150                                                  lockdep_is_held(&tbl->lock)));
 151                                n->dead = 1;
 152                                shrunk  = 1;
 153                                write_unlock(&n->lock);
 154                                neigh_cleanup_and_release(n);
 155                                continue;
 156                        }
 157                        write_unlock(&n->lock);
 158                        np = &n->next;
 159                }
 160        }
 161
 162        tbl->last_flush = jiffies;
 163
 164        write_unlock_bh(&tbl->lock);
 165
 166        return shrunk;
 167}
 168
 169static void neigh_add_timer(struct neighbour *n, unsigned long when)
 170{
 171        neigh_hold(n);
 172        if (unlikely(mod_timer(&n->timer, when))) {
 173                printk("NEIGH: BUG, double timer add, state is %x\n",
 174                       n->nud_state);
 175                dump_stack();
 176        }
 177}
 178
 179static int neigh_del_timer(struct neighbour *n)
 180{
 181        if ((n->nud_state & NUD_IN_TIMER) &&
 182            del_timer(&n->timer)) {
 183                neigh_release(n);
 184                return 1;
 185        }
 186        return 0;
 187}
 188
 189static void pneigh_queue_purge(struct sk_buff_head *list)
 190{
 191        struct sk_buff *skb;
 192
 193        while ((skb = skb_dequeue(list)) != NULL) {
 194                dev_put(skb->dev);
 195                kfree_skb(skb);
 196        }
 197}
 198
 199static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 200{
 201        int i;
 202        struct neigh_hash_table *nht;
 203
 204        nht = rcu_dereference_protected(tbl->nht,
 205                                        lockdep_is_held(&tbl->lock));
 206
 207        for (i = 0; i < (1 << nht->hash_shift); i++) {
 208                struct neighbour *n;
 209                struct neighbour __rcu **np = &nht->hash_buckets[i];
 210
 211                while ((n = rcu_dereference_protected(*np,
 212                                        lockdep_is_held(&tbl->lock))) != NULL) {
 213                        if (dev && n->dev != dev) {
 214                                np = &n->next;
 215                                continue;
 216                        }
 217                        rcu_assign_pointer(*np,
 218                                   rcu_dereference_protected(n->next,
 219                                                lockdep_is_held(&tbl->lock)));
 220                        write_lock(&n->lock);
 221                        neigh_del_timer(n);
 222                        n->dead = 1;
 223
 224                        if (atomic_read(&n->refcnt) != 1) {
 225                                /* The most unpleasant situation.
 226                                   We must destroy neighbour entry,
 227                                   but someone still uses it.
 228
 229                                   The destroy will be delayed until
 230                                   the last user releases us, but
 231                                   we must kill timers etc. and move
 232                                   it to safe state.
 233                                 */
 234                                __skb_queue_purge(&n->arp_queue);
 235                                n->arp_queue_len_bytes = 0;
 236                                n->output = neigh_blackhole;
 237                                if (n->nud_state & NUD_VALID)
 238                                        n->nud_state = NUD_NOARP;
 239                                else
 240                                        n->nud_state = NUD_NONE;
 241                                neigh_dbg(2, "neigh %p is stray\n", n);
 242                        }
 243                        write_unlock(&n->lock);
 244                        neigh_cleanup_and_release(n);
 245                }
 246        }
 247}
 248
 249void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 250{
 251        write_lock_bh(&tbl->lock);
 252        neigh_flush_dev(tbl, dev);
 253        write_unlock_bh(&tbl->lock);
 254}
 255EXPORT_SYMBOL(neigh_changeaddr);
 256
 257int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 258{
 259        write_lock_bh(&tbl->lock);
 260        neigh_flush_dev(tbl, dev);
 261        pneigh_ifdown(tbl, dev);
 262        write_unlock_bh(&tbl->lock);
 263
 264        del_timer_sync(&tbl->proxy_timer);
 265        pneigh_queue_purge(&tbl->proxy_queue);
 266        return 0;
 267}
 268EXPORT_SYMBOL(neigh_ifdown);
 269
 270static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 271{
 272        struct neighbour *n = NULL;
 273        unsigned long now = jiffies;
 274        int entries;
 275
 276        entries = atomic_inc_return(&tbl->entries) - 1;
 277        if (entries >= tbl->gc_thresh3 ||
 278            (entries >= tbl->gc_thresh2 &&
 279             time_after(now, tbl->last_flush + 5 * HZ))) {
 280                if (!neigh_forced_gc(tbl) &&
 281                    entries >= tbl->gc_thresh3)
 282                        goto out_entries;
 283        }
 284
 285        n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
 286        if (!n)
 287                goto out_entries;
 288
 289        __skb_queue_head_init(&n->arp_queue);
 290        rwlock_init(&n->lock);
 291        seqlock_init(&n->ha_lock);
 292        n->updated        = n->used = now;
 293        n->nud_state      = NUD_NONE;
 294        n->output         = neigh_blackhole;
 295        seqlock_init(&n->hh.hh_lock);
 296        n->parms          = neigh_parms_clone(&tbl->parms);
 297        setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
 298
 299        NEIGH_CACHE_STAT_INC(tbl, allocs);
 300        n->tbl            = tbl;
 301        atomic_set(&n->refcnt, 1);
 302        n->dead           = 1;
 303out:
 304        return n;
 305
 306out_entries:
 307        atomic_dec(&tbl->entries);
 308        goto out;
 309}
 310
 311static void neigh_get_hash_rnd(u32 *x)
 312{
 313        get_random_bytes(x, sizeof(*x));
 314        *x |= 1;
 315}
 316
 317static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 318{
 319        size_t size = (1 << shift) * sizeof(struct neighbour *);
 320        struct neigh_hash_table *ret;
 321        struct neighbour __rcu **buckets;
 322        int i;
 323
 324        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 325        if (!ret)
 326                return NULL;
 327        if (size <= PAGE_SIZE)
 328                buckets = kzalloc(size, GFP_ATOMIC);
 329        else
 330                buckets = (struct neighbour __rcu **)
 331                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 332                                           get_order(size));
 333        if (!buckets) {
 334                kfree(ret);
 335                return NULL;
 336        }
 337        ret->hash_buckets = buckets;
 338        ret->hash_shift = shift;
 339        for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
 340                neigh_get_hash_rnd(&ret->hash_rnd[i]);
 341        return ret;
 342}
 343
 344static void neigh_hash_free_rcu(struct rcu_head *head)
 345{
 346        struct neigh_hash_table *nht = container_of(head,
 347                                                    struct neigh_hash_table,
 348                                                    rcu);
 349        size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 350        struct neighbour __rcu **buckets = nht->hash_buckets;
 351
 352        if (size <= PAGE_SIZE)
 353                kfree(buckets);
 354        else
 355                free_pages((unsigned long)buckets, get_order(size));
 356        kfree(nht);
 357}
 358
 359static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 360                                                unsigned long new_shift)
 361{
 362        unsigned int i, hash;
 363        struct neigh_hash_table *new_nht, *old_nht;
 364
 365        NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 366
 367        old_nht = rcu_dereference_protected(tbl->nht,
 368                                            lockdep_is_held(&tbl->lock));
 369        new_nht = neigh_hash_alloc(new_shift);
 370        if (!new_nht)
 371                return old_nht;
 372
 373        for (i = 0; i < (1 << old_nht->hash_shift); i++) {
 374                struct neighbour *n, *next;
 375
 376                for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
 377                                                   lockdep_is_held(&tbl->lock));
 378                     n != NULL;
 379                     n = next) {
 380                        hash = tbl->hash(n->primary_key, n->dev,
 381                                         new_nht->hash_rnd);
 382
 383                        hash >>= (32 - new_nht->hash_shift);
 384                        next = rcu_dereference_protected(n->next,
 385                                                lockdep_is_held(&tbl->lock));
 386
 387                        rcu_assign_pointer(n->next,
 388                                           rcu_dereference_protected(
 389                                                new_nht->hash_buckets[hash],
 390                                                lockdep_is_held(&tbl->lock)));
 391                        rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 392                }
 393        }
 394
 395        rcu_assign_pointer(tbl->nht, new_nht);
 396        call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
 397        return new_nht;
 398}
 399
 400struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 401                               struct net_device *dev)
 402{
 403        struct neighbour *n;
 404        int key_len = tbl->key_len;
 405        u32 hash_val;
 406        struct neigh_hash_table *nht;
 407
 408        NEIGH_CACHE_STAT_INC(tbl, lookups);
 409
 410        rcu_read_lock_bh();
 411        nht = rcu_dereference_bh(tbl->nht);
 412        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 413
 414        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 415             n != NULL;
 416             n = rcu_dereference_bh(n->next)) {
 417                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
 418                        if (!atomic_inc_not_zero(&n->refcnt))
 419                                n = NULL;
 420                        NEIGH_CACHE_STAT_INC(tbl, hits);
 421                        break;
 422                }
 423        }
 424
 425        rcu_read_unlock_bh();
 426        return n;
 427}
 428EXPORT_SYMBOL(neigh_lookup);
 429
 430struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 431                                     const void *pkey)
 432{
 433        struct neighbour *n;
 434        int key_len = tbl->key_len;
 435        u32 hash_val;
 436        struct neigh_hash_table *nht;
 437
 438        NEIGH_CACHE_STAT_INC(tbl, lookups);
 439
 440        rcu_read_lock_bh();
 441        nht = rcu_dereference_bh(tbl->nht);
 442        hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
 443
 444        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 445             n != NULL;
 446             n = rcu_dereference_bh(n->next)) {
 447                if (!memcmp(n->primary_key, pkey, key_len) &&
 448                    net_eq(dev_net(n->dev), net)) {
 449                        if (!atomic_inc_not_zero(&n->refcnt))
 450                                n = NULL;
 451                        NEIGH_CACHE_STAT_INC(tbl, hits);
 452                        break;
 453                }
 454        }
 455
 456        rcu_read_unlock_bh();
 457        return n;
 458}
 459EXPORT_SYMBOL(neigh_lookup_nodev);
 460
 461struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 462                                 struct net_device *dev, bool want_ref)
 463{
 464        u32 hash_val;
 465        int key_len = tbl->key_len;
 466        int error;
 467        struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 468        struct neigh_hash_table *nht;
 469
 470        if (!n) {
 471                rc = ERR_PTR(-ENOBUFS);
 472                goto out;
 473        }
 474
 475        memcpy(n->primary_key, pkey, key_len);
 476        n->dev = dev;
 477        dev_hold(dev);
 478
 479        /* Protocol specific setup. */
 480        if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
 481                rc = ERR_PTR(error);
 482                goto out_neigh_release;
 483        }
 484
 485        if (dev->netdev_ops->ndo_neigh_construct) {
 486                error = dev->netdev_ops->ndo_neigh_construct(n);
 487                if (error < 0) {
 488                        rc = ERR_PTR(error);
 489                        goto out_neigh_release;
 490                }
 491        }
 492
 493        /* Device specific setup. */
 494        if (n->parms->neigh_setup &&
 495            (error = n->parms->neigh_setup(n)) < 0) {
 496                rc = ERR_PTR(error);
 497                goto out_neigh_release;
 498        }
 499
 500        n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 501
 502        write_lock_bh(&tbl->lock);
 503        nht = rcu_dereference_protected(tbl->nht,
 504                                        lockdep_is_held(&tbl->lock));
 505
 506        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
 507                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 508
 509        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 510
 511        if (n->parms->dead) {
 512                rc = ERR_PTR(-EINVAL);
 513                goto out_tbl_unlock;
 514        }
 515
 516        for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
 517                                            lockdep_is_held(&tbl->lock));
 518             n1 != NULL;
 519             n1 = rcu_dereference_protected(n1->next,
 520                        lockdep_is_held(&tbl->lock))) {
 521                if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 522                        if (want_ref)
 523                                neigh_hold(n1);
 524                        rc = n1;
 525                        goto out_tbl_unlock;
 526                }
 527        }
 528
 529        n->dead = 0;
 530        if (want_ref)
 531                neigh_hold(n);
 532        rcu_assign_pointer(n->next,
 533                           rcu_dereference_protected(nht->hash_buckets[hash_val],
 534                                                     lockdep_is_held(&tbl->lock)));
 535        rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 536        write_unlock_bh(&tbl->lock);
 537        neigh_dbg(2, "neigh %p is created\n", n);
 538        rc = n;
 539out:
 540        return rc;
 541out_tbl_unlock:
 542        write_unlock_bh(&tbl->lock);
 543out_neigh_release:
 544        neigh_release(n);
 545        goto out;
 546}
 547EXPORT_SYMBOL(__neigh_create);
 548
 549static u32 pneigh_hash(const void *pkey, int key_len)
 550{
 551        u32 hash_val = *(u32 *)(pkey + key_len - 4);
 552        hash_val ^= (hash_val >> 16);
 553        hash_val ^= hash_val >> 8;
 554        hash_val ^= hash_val >> 4;
 555        hash_val &= PNEIGH_HASHMASK;
 556        return hash_val;
 557}
 558
 559static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 560                                              struct net *net,
 561                                              const void *pkey,
 562                                              int key_len,
 563                                              struct net_device *dev)
 564{
 565        while (n) {
 566                if (!memcmp(n->key, pkey, key_len) &&
 567                    net_eq(pneigh_net(n), net) &&
 568                    (n->dev == dev || !n->dev))
 569                        return n;
 570                n = n->next;
 571        }
 572        return NULL;
 573}
 574
 575struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
 576                struct net *net, const void *pkey, struct net_device *dev)
 577{
 578        int key_len = tbl->key_len;
 579        u32 hash_val = pneigh_hash(pkey, key_len);
 580
 581        return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 582                                 net, pkey, key_len, dev);
 583}
 584EXPORT_SYMBOL_GPL(__pneigh_lookup);
 585
 586struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 587                                    struct net *net, const void *pkey,
 588                                    struct net_device *dev, int creat)
 589{
 590        struct pneigh_entry *n;
 591        int key_len = tbl->key_len;
 592        u32 hash_val = pneigh_hash(pkey, key_len);
 593
 594        read_lock_bh(&tbl->lock);
 595        n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 596                              net, pkey, key_len, dev);
 597        read_unlock_bh(&tbl->lock);
 598
 599        if (n || !creat)
 600                goto out;
 601
 602        ASSERT_RTNL();
 603
 604        n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 605        if (!n)
 606                goto out;
 607
 608        write_pnet(&n->net, hold_net(net));
 609        memcpy(n->key, pkey, key_len);
 610        n->dev = dev;
 611        if (dev)
 612                dev_hold(dev);
 613
 614        if (tbl->pconstructor && tbl->pconstructor(n)) {
 615                if (dev)
 616                        dev_put(dev);
 617                release_net(net);
 618                kfree(n);
 619                n = NULL;
 620                goto out;
 621        }
 622
 623        write_lock_bh(&tbl->lock);
 624        n->next = tbl->phash_buckets[hash_val];
 625        tbl->phash_buckets[hash_val] = n;
 626        write_unlock_bh(&tbl->lock);
 627out:
 628        return n;
 629}
 630EXPORT_SYMBOL(pneigh_lookup);
 631
 632
 633int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 634                  struct net_device *dev)
 635{
 636        struct pneigh_entry *n, **np;
 637        int key_len = tbl->key_len;
 638        u32 hash_val = pneigh_hash(pkey, key_len);
 639
 640        write_lock_bh(&tbl->lock);
 641        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 642             np = &n->next) {
 643                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
 644                    net_eq(pneigh_net(n), net)) {
 645                        *np = n->next;
 646                        write_unlock_bh(&tbl->lock);
 647                        if (tbl->pdestructor)
 648                                tbl->pdestructor(n);
 649                        if (n->dev)
 650                                dev_put(n->dev);
 651                        release_net(pneigh_net(n));
 652                        kfree(n);
 653                        return 0;
 654                }
 655        }
 656        write_unlock_bh(&tbl->lock);
 657        return -ENOENT;
 658}
 659
 660static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 661{
 662        struct pneigh_entry *n, **np;
 663        u32 h;
 664
 665        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
 666                np = &tbl->phash_buckets[h];
 667                while ((n = *np) != NULL) {
 668                        if (!dev || n->dev == dev) {
 669                                *np = n->next;
 670                                if (tbl->pdestructor)
 671                                        tbl->pdestructor(n);
 672                                if (n->dev)
 673                                        dev_put(n->dev);
 674                                release_net(pneigh_net(n));
 675                                kfree(n);
 676                                continue;
 677                        }
 678                        np = &n->next;
 679                }
 680        }
 681        return -ENOENT;
 682}
 683
 684static void neigh_parms_destroy(struct neigh_parms *parms);
 685
 686static inline void neigh_parms_put(struct neigh_parms *parms)
 687{
 688        if (atomic_dec_and_test(&parms->refcnt))
 689                neigh_parms_destroy(parms);
 690}
 691
 692/*
 693 *      neighbour must already be out of the table;
 694 *
 695 */
 696void neigh_destroy(struct neighbour *neigh)
 697{
 698        struct net_device *dev = neigh->dev;
 699
 700        NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 701
 702        if (!neigh->dead) {
 703                pr_warn("Destroying alive neighbour %p\n", neigh);
 704                dump_stack();
 705                return;
 706        }
 707
 708        if (neigh_del_timer(neigh))
 709                pr_warn("Impossible event\n");
 710
 711        write_lock_bh(&neigh->lock);
 712        __skb_queue_purge(&neigh->arp_queue);
 713        write_unlock_bh(&neigh->lock);
 714        neigh->arp_queue_len_bytes = 0;
 715
 716        if (dev->netdev_ops->ndo_neigh_destroy)
 717                dev->netdev_ops->ndo_neigh_destroy(neigh);
 718
 719        dev_put(dev);
 720        neigh_parms_put(neigh->parms);
 721
 722        neigh_dbg(2, "neigh %p is destroyed\n", neigh);
 723
 724        atomic_dec(&neigh->tbl->entries);
 725        kfree_rcu(neigh, rcu);
 726}
 727EXPORT_SYMBOL(neigh_destroy);
 728
 729/* Neighbour state is suspicious;
 730   disable fast path.
 731
 732   Called with write_locked neigh.
 733 */
 734static void neigh_suspect(struct neighbour *neigh)
 735{
 736        neigh_dbg(2, "neigh %p is suspected\n", neigh);
 737
 738        neigh->output = neigh->ops->output;
 739}
 740
 741/* Neighbour state is OK;
 742   enable fast path.
 743
 744   Called with write_locked neigh.
 745 */
 746static void neigh_connect(struct neighbour *neigh)
 747{
 748        neigh_dbg(2, "neigh %p is connected\n", neigh);
 749
 750        neigh->output = neigh->ops->connected_output;
 751}
 752
 753static void neigh_periodic_work(struct work_struct *work)
 754{
 755        struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 756        struct neighbour *n;
 757        struct neighbour __rcu **np;
 758        unsigned int i;
 759        struct neigh_hash_table *nht;
 760
 761        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 762
 763        write_lock_bh(&tbl->lock);
 764        nht = rcu_dereference_protected(tbl->nht,
 765                                        lockdep_is_held(&tbl->lock));
 766
 767        if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
 768                goto out;
 769
 770        /*
 771         *      periodically recompute ReachableTime from random function
 772         */
 773
 774        if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 775                struct neigh_parms *p;
 776                tbl->last_rand = jiffies;
 777                for (p = &tbl->parms; p; p = p->next)
 778                        p->reachable_time =
 779                                neigh_rand_reach_time(p->base_reachable_time);
 780        }
 781
 782        for (i = 0 ; i < (1 << nht->hash_shift); i++) {
 783                np = &nht->hash_buckets[i];
 784
 785                while ((n = rcu_dereference_protected(*np,
 786                                lockdep_is_held(&tbl->lock))) != NULL) {
 787                        unsigned int state;
 788
 789                        write_lock(&n->lock);
 790
 791                        state = n->nud_state;
 792                        if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
 793                                write_unlock(&n->lock);
 794                                goto next_elt;
 795                        }
 796
 797                        if (time_before(n->used, n->confirmed))
 798                                n->used = n->confirmed;
 799
 800                        if (atomic_read(&n->refcnt) == 1 &&
 801                            (state == NUD_FAILED ||
 802                             time_after(jiffies, n->used + n->parms->gc_staletime))) {
 803                                *np = n->next;
 804                                n->dead = 1;
 805                                write_unlock(&n->lock);
 806                                neigh_cleanup_and_release(n);
 807                                continue;
 808                        }
 809                        write_unlock(&n->lock);
 810
 811next_elt:
 812                        np = &n->next;
 813                }
 814                /*
 815                 * It's fine to release lock here, even if hash table
 816                 * grows while we are preempted.
 817                 */
 818                write_unlock_bh(&tbl->lock);
 819                cond_resched();
 820                write_lock_bh(&tbl->lock);
 821                nht = rcu_dereference_protected(tbl->nht,
 822                                                lockdep_is_held(&tbl->lock));
 823        }
 824out:
 825        /* Cycle through all hash buckets every base_reachable_time/2 ticks.
 826         * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 827         * base_reachable_time.
 828         */
 829        schedule_delayed_work(&tbl->gc_work,
 830                              tbl->parms.base_reachable_time >> 1);
 831        write_unlock_bh(&tbl->lock);
 832}
 833
 834static __inline__ int neigh_max_probes(struct neighbour *n)
 835{
 836        struct neigh_parms *p = n->parms;
 837        return (n->nud_state & NUD_PROBE) ?
 838                p->ucast_probes :
 839                p->ucast_probes + p->app_probes + p->mcast_probes;
 840}
 841
 842static void neigh_invalidate(struct neighbour *neigh)
 843        __releases(neigh->lock)
 844        __acquires(neigh->lock)
 845{
 846        struct sk_buff *skb;
 847
 848        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
 849        neigh_dbg(2, "neigh %p is failed\n", neigh);
 850        neigh->updated = jiffies;
 851
 852        /* It is very thin place. report_unreachable is very complicated
 853           routine. Particularly, it can hit the same neighbour entry!
 854
 855           So that, we try to be accurate and avoid dead loop. --ANK
 856         */
 857        while (neigh->nud_state == NUD_FAILED &&
 858               (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
 859                write_unlock(&neigh->lock);
 860                neigh->ops->error_report(neigh, skb);
 861                write_lock(&neigh->lock);
 862        }
 863        __skb_queue_purge(&neigh->arp_queue);
 864        neigh->arp_queue_len_bytes = 0;
 865}
 866
 867static void neigh_probe(struct neighbour *neigh)
 868        __releases(neigh->lock)
 869{
 870        struct sk_buff *skb = skb_peek(&neigh->arp_queue);
 871        /* keep skb alive even if arp_queue overflows */
 872        if (skb)
 873                skb = skb_copy(skb, GFP_ATOMIC);
 874        write_unlock(&neigh->lock);
 875        neigh->ops->solicit(neigh, skb);
 876        atomic_inc(&neigh->probes);
 877        kfree_skb(skb);
 878}
 879
 880/* Called when a timer expires for a neighbour entry. */
 881
 882static void neigh_timer_handler(unsigned long arg)
 883{
 884        unsigned long now, next;
 885        struct neighbour *neigh = (struct neighbour *)arg;
 886        unsigned int state;
 887        int notify = 0;
 888
 889        write_lock(&neigh->lock);
 890
 891        state = neigh->nud_state;
 892        now = jiffies;
 893        next = now + HZ;
 894
 895        if (!(state & NUD_IN_TIMER))
 896                goto out;
 897
 898        if (state & NUD_REACHABLE) {
 899                if (time_before_eq(now,
 900                                   neigh->confirmed + neigh->parms->reachable_time)) {
 901                        neigh_dbg(2, "neigh %p is still alive\n", neigh);
 902                        next = neigh->confirmed + neigh->parms->reachable_time;
 903                } else if (time_before_eq(now,
 904                                          neigh->used + neigh->parms->delay_probe_time)) {
 905                        neigh_dbg(2, "neigh %p is delayed\n", neigh);
 906                        neigh->nud_state = NUD_DELAY;
 907                        neigh->updated = jiffies;
 908                        neigh_suspect(neigh);
 909                        next = now + neigh->parms->delay_probe_time;
 910                } else {
 911                        neigh_dbg(2, "neigh %p is suspected\n", neigh);
 912                        neigh->nud_state = NUD_STALE;
 913                        neigh->updated = jiffies;
 914                        neigh_suspect(neigh);
 915                        notify = 1;
 916                }
 917        } else if (state & NUD_DELAY) {
 918                if (time_before_eq(now,
 919                                   neigh->confirmed + neigh->parms->delay_probe_time)) {
 920                        neigh_dbg(2, "neigh %p is now reachable\n", neigh);
 921                        neigh->nud_state = NUD_REACHABLE;
 922                        neigh->updated = jiffies;
 923                        neigh_connect(neigh);
 924                        notify = 1;
 925                        next = neigh->confirmed + neigh->parms->reachable_time;
 926                } else {
 927                        neigh_dbg(2, "neigh %p is probed\n", neigh);
 928                        neigh->nud_state = NUD_PROBE;
 929                        neigh->updated = jiffies;
 930                        atomic_set(&neigh->probes, 0);
 931                        next = now + neigh->parms->retrans_time;
 932                }
 933        } else {
 934                /* NUD_PROBE|NUD_INCOMPLETE */
 935                next = now + neigh->parms->retrans_time;
 936        }
 937
 938        if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
 939            atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
 940                neigh->nud_state = NUD_FAILED;
 941                notify = 1;
 942                neigh_invalidate(neigh);
 943        }
 944
 945        if (neigh->nud_state & NUD_IN_TIMER) {
 946                if (time_before(next, jiffies + HZ/2))
 947                        next = jiffies + HZ/2;
 948                if (!mod_timer(&neigh->timer, next))
 949                        neigh_hold(neigh);
 950        }
 951        if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
 952                neigh_probe(neigh);
 953        } else {
 954out:
 955                write_unlock(&neigh->lock);
 956        }
 957
 958        if (notify)
 959                neigh_update_notify(neigh);
 960
 961        neigh_release(neigh);
 962}
 963
 964int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 965{
 966        int rc;
 967        bool immediate_probe = false;
 968
 969        write_lock_bh(&neigh->lock);
 970
 971        rc = 0;
 972        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
 973                goto out_unlock_bh;
 974
 975        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
 976                if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
 977                        unsigned long next, now = jiffies;
 978
 979                        atomic_set(&neigh->probes, neigh->parms->ucast_probes);
 980                        neigh->nud_state     = NUD_INCOMPLETE;
 981                        neigh->updated = now;
 982                        next = now + max(neigh->parms->retrans_time, HZ/2);
 983                        neigh_add_timer(neigh, next);
 984                        immediate_probe = true;
 985                } else {
 986                        neigh->nud_state = NUD_FAILED;
 987                        neigh->updated = jiffies;
 988                        write_unlock_bh(&neigh->lock);
 989
 990                        kfree_skb(skb);
 991                        return 1;
 992                }
 993        } else if (neigh->nud_state & NUD_STALE) {
 994                neigh_dbg(2, "neigh %p is delayed\n", neigh);
 995                neigh->nud_state = NUD_DELAY;
 996                neigh->updated = jiffies;
 997                neigh_add_timer(neigh,
 998                                jiffies + neigh->parms->delay_probe_time);
 999        }
1000
1001        if (neigh->nud_state == NUD_INCOMPLETE) {
1002                if (skb) {
1003                        while (neigh->arp_queue_len_bytes + skb->truesize >
1004                               neigh->parms->queue_len_bytes) {
1005                                struct sk_buff *buff;
1006
1007                                buff = __skb_dequeue(&neigh->arp_queue);
1008                                if (!buff)
1009                                        break;
1010                                neigh->arp_queue_len_bytes -= buff->truesize;
1011                                kfree_skb(buff);
1012                                NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013                        }
1014                        skb_dst_force(skb);
1015                        __skb_queue_tail(&neigh->arp_queue, skb);
1016                        neigh->arp_queue_len_bytes += skb->truesize;
1017                }
1018                rc = 1;
1019        }
1020out_unlock_bh:
1021        if (immediate_probe)
1022                neigh_probe(neigh);
1023        else
1024                write_unlock(&neigh->lock);
1025        local_bh_enable();
1026        return rc;
1027}
1028EXPORT_SYMBOL(__neigh_event_send);
1029
1030static void neigh_update_hhs(struct neighbour *neigh)
1031{
1032        struct hh_cache *hh;
1033        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034                = NULL;
1035
1036        if (neigh->dev->header_ops)
1037                update = neigh->dev->header_ops->cache_update;
1038
1039        if (update) {
1040                hh = &neigh->hh;
1041                if (hh->hh_len) {
1042                        write_seqlock_bh(&hh->hh_lock);
1043                        update(hh, neigh->dev, neigh->ha);
1044                        write_sequnlock_bh(&hh->hh_lock);
1045                }
1046        }
1047}
1048
1049
1050
1051/* Generic update routine.
1052   -- lladdr is new lladdr or NULL, if it is not supplied.
1053   -- new    is new state.
1054   -- flags
1055        NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056                                if it is different.
1057        NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058                                lladdr instead of overriding it
1059                                if it is different.
1060                                It also allows to retain current state
1061                                if lladdr is unchanged.
1062        NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1063
1064        NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065                                NTF_ROUTER flag.
1066        NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1067                                a router.
1068
1069   Caller MUST hold reference count on the entry.
1070 */
1071
1072int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073                 u32 flags)
1074{
1075        u8 old;
1076        int err;
1077        int notify = 0;
1078        struct net_device *dev;
1079        int update_isrouter = 0;
1080
1081        write_lock_bh(&neigh->lock);
1082
1083        dev    = neigh->dev;
1084        old    = neigh->nud_state;
1085        err    = -EPERM;
1086
1087        if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088            (old & (NUD_NOARP | NUD_PERMANENT)))
1089                goto out;
1090
1091        if (!(new & NUD_VALID)) {
1092                neigh_del_timer(neigh);
1093                if (old & NUD_CONNECTED)
1094                        neigh_suspect(neigh);
1095                neigh->nud_state = new;
1096                err = 0;
1097                notify = old & NUD_VALID;
1098                if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099                    (new & NUD_FAILED)) {
1100                        neigh_invalidate(neigh);
1101                        notify = 1;
1102                }
1103                goto out;
1104        }
1105
1106        /* Compare new lladdr with cached one */
1107        if (!dev->addr_len) {
1108                /* First case: device needs no address. */
1109                lladdr = neigh->ha;
1110        } else if (lladdr) {
1111                /* The second case: if something is already cached
1112                   and a new address is proposed:
1113                   - compare new & old
1114                   - if they are different, check override flag
1115                 */
1116                if ((old & NUD_VALID) &&
1117                    !memcmp(lladdr, neigh->ha, dev->addr_len))
1118                        lladdr = neigh->ha;
1119        } else {
1120                /* No address is supplied; if we know something,
1121                   use it, otherwise discard the request.
1122                 */
1123                err = -EINVAL;
1124                if (!(old & NUD_VALID))
1125                        goto out;
1126                lladdr = neigh->ha;
1127        }
1128
1129        if (new & NUD_CONNECTED)
1130                neigh->confirmed = jiffies;
1131        neigh->updated = jiffies;
1132
1133        /* If entry was valid and address is not changed,
1134           do not change entry state, if new one is STALE.
1135         */
1136        err = 0;
1137        update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138        if (old & NUD_VALID) {
1139                if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140                        update_isrouter = 0;
1141                        if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142                            (old & NUD_CONNECTED)) {
1143                                lladdr = neigh->ha;
1144                                new = NUD_STALE;
1145                        } else
1146                                goto out;
1147                } else {
1148                        if (lladdr == neigh->ha && new == NUD_STALE &&
1149                            ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150                             (old & NUD_CONNECTED))
1151                            )
1152                                new = old;
1153                }
1154        }
1155
1156        if (new != old) {
1157                neigh_del_timer(neigh);
1158                if (new & NUD_IN_TIMER)
1159                        neigh_add_timer(neigh, (jiffies +
1160                                                ((new & NUD_REACHABLE) ?
1161                                                 neigh->parms->reachable_time :
1162                                                 0)));
1163                neigh->nud_state = new;
1164        }
1165
1166        if (lladdr != neigh->ha) {
1167                write_seqlock(&neigh->ha_lock);
1168                memcpy(&neigh->ha, lladdr, dev->addr_len);
1169                write_sequnlock(&neigh->ha_lock);
1170                neigh_update_hhs(neigh);
1171                if (!(new & NUD_CONNECTED))
1172                        neigh->confirmed = jiffies -
1173                                      (neigh->parms->base_reachable_time << 1);
1174                notify = 1;
1175        }
1176        if (new == old)
1177                goto out;
1178        if (new & NUD_CONNECTED)
1179                neigh_connect(neigh);
1180        else
1181                neigh_suspect(neigh);
1182        if (!(old & NUD_VALID)) {
1183                struct sk_buff *skb;
1184
1185                /* Again: avoid dead loop if something went wrong */
1186
1187                while (neigh->nud_state & NUD_VALID &&
1188                       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1189                        struct dst_entry *dst = skb_dst(skb);
1190                        struct neighbour *n2, *n1 = neigh;
1191                        write_unlock_bh(&neigh->lock);
1192
1193                        rcu_read_lock();
1194
1195                        /* Why not just use 'neigh' as-is?  The problem is that
1196                         * things such as shaper, eql, and sch_teql can end up
1197                         * using alternative, different, neigh objects to output
1198                         * the packet in the output path.  So what we need to do
1199                         * here is re-lookup the top-level neigh in the path so
1200                         * we can reinject the packet there.
1201                         */
1202                        n2 = NULL;
1203                        if (dst) {
1204                                n2 = dst_neigh_lookup_skb(dst, skb);
1205                                if (n2)
1206                                        n1 = n2;
1207                        }
1208                        n1->output(n1, skb);
1209                        if (n2)
1210                                neigh_release(n2);
1211                        rcu_read_unlock();
1212
1213                        write_lock_bh(&neigh->lock);
1214                }
1215                __skb_queue_purge(&neigh->arp_queue);
1216                neigh->arp_queue_len_bytes = 0;
1217        }
1218out:
1219        if (update_isrouter) {
1220                neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1221                        (neigh->flags | NTF_ROUTER) :
1222                        (neigh->flags & ~NTF_ROUTER);
1223        }
1224        write_unlock_bh(&neigh->lock);
1225
1226        if (notify)
1227                neigh_update_notify(neigh);
1228
1229        return err;
1230}
1231EXPORT_SYMBOL(neigh_update);
1232
1233struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1234                                 u8 *lladdr, void *saddr,
1235                                 struct net_device *dev)
1236{
1237        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1238                                                 lladdr || !dev->addr_len);
1239        if (neigh)
1240                neigh_update(neigh, lladdr, NUD_STALE,
1241                             NEIGH_UPDATE_F_OVERRIDE);
1242        return neigh;
1243}
1244EXPORT_SYMBOL(neigh_event_ns);
1245
1246/* called with read_lock_bh(&n->lock); */
1247static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1248{
1249        struct net_device *dev = dst->dev;
1250        __be16 prot = dst->ops->protocol;
1251        struct hh_cache *hh = &n->hh;
1252
1253        write_lock_bh(&n->lock);
1254
1255        /* Only one thread can come in here and initialize the
1256         * hh_cache entry.
1257         */
1258        if (!hh->hh_len)
1259                dev->header_ops->cache(n, hh, prot);
1260
1261        write_unlock_bh(&n->lock);
1262}
1263
1264/* This function can be used in contexts, where only old dev_queue_xmit
1265 * worked, f.e. if you want to override normal output path (eql, shaper),
1266 * but resolution is not made yet.
1267 */
1268
1269int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1270{
1271        struct net_device *dev = skb->dev;
1272
1273        __skb_pull(skb, skb_network_offset(skb));
1274
1275        if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276                            skb->len) < 0 &&
1277            dev->header_ops->rebuild(skb))
1278                return 0;
1279
1280        return dev_queue_xmit(skb);
1281}
1282EXPORT_SYMBOL(neigh_compat_output);
1283
1284/* Slow and careful. */
1285
1286int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1287{
1288        struct dst_entry *dst = skb_dst(skb);
1289        int rc = 0;
1290
1291        if (!dst)
1292                goto discard;
1293
1294        if (!neigh_event_send(neigh, skb)) {
1295                int err;
1296                struct net_device *dev = neigh->dev;
1297                unsigned int seq;
1298
1299                if (dev->header_ops->cache && !neigh->hh.hh_len)
1300                        neigh_hh_init(neigh, dst);
1301
1302                do {
1303                        __skb_pull(skb, skb_network_offset(skb));
1304                        seq = read_seqbegin(&neigh->ha_lock);
1305                        err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306                                              neigh->ha, NULL, skb->len);
1307                } while (read_seqretry(&neigh->ha_lock, seq));
1308
1309                if (err >= 0)
1310                        rc = dev_queue_xmit(skb);
1311                else
1312                        goto out_kfree_skb;
1313        }
1314out:
1315        return rc;
1316discard:
1317        neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1318out_kfree_skb:
1319        rc = -EINVAL;
1320        kfree_skb(skb);
1321        goto out;
1322}
1323EXPORT_SYMBOL(neigh_resolve_output);
1324
1325/* As fast as possible without hh cache */
1326
1327int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1328{
1329        struct net_device *dev = neigh->dev;
1330        unsigned int seq;
1331        int err;
1332
1333        do {
1334                __skb_pull(skb, skb_network_offset(skb));
1335                seq = read_seqbegin(&neigh->ha_lock);
1336                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1337                                      neigh->ha, NULL, skb->len);
1338        } while (read_seqretry(&neigh->ha_lock, seq));
1339
1340        if (err >= 0)
1341                err = dev_queue_xmit(skb);
1342        else {
1343                err = -EINVAL;
1344                kfree_skb(skb);
1345        }
1346        return err;
1347}
1348EXPORT_SYMBOL(neigh_connected_output);
1349
1350int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1351{
1352        return dev_queue_xmit(skb);
1353}
1354EXPORT_SYMBOL(neigh_direct_output);
1355
1356static void neigh_proxy_process(unsigned long arg)
1357{
1358        struct neigh_table *tbl = (struct neigh_table *)arg;
1359        long sched_next = 0;
1360        unsigned long now = jiffies;
1361        struct sk_buff *skb, *n;
1362
1363        spin_lock(&tbl->proxy_queue.lock);
1364
1365        skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1366                long tdif = NEIGH_CB(skb)->sched_next - now;
1367
1368                if (tdif <= 0) {
1369                        struct net_device *dev = skb->dev;
1370
1371                        __skb_unlink(skb, &tbl->proxy_queue);
1372                        if (tbl->proxy_redo && netif_running(dev)) {
1373                                rcu_read_lock();
1374                                tbl->proxy_redo(skb);
1375                                rcu_read_unlock();
1376                        } else {
1377                                kfree_skb(skb);
1378                        }
1379
1380                        dev_put(dev);
1381                } else if (!sched_next || tdif < sched_next)
1382                        sched_next = tdif;
1383        }
1384        del_timer(&tbl->proxy_timer);
1385        if (sched_next)
1386                mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1387        spin_unlock(&tbl->proxy_queue.lock);
1388}
1389
1390void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1391                    struct sk_buff *skb)
1392{
1393        unsigned long now = jiffies;
1394        unsigned long sched_next = now + (net_random() % p->proxy_delay);
1395
1396        if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1397                kfree_skb(skb);
1398                return;
1399        }
1400
1401        NEIGH_CB(skb)->sched_next = sched_next;
1402        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403
1404        spin_lock(&tbl->proxy_queue.lock);
1405        if (del_timer(&tbl->proxy_timer)) {
1406                if (time_before(tbl->proxy_timer.expires, sched_next))
1407                        sched_next = tbl->proxy_timer.expires;
1408        }
1409        skb_dst_drop(skb);
1410        dev_hold(skb->dev);
1411        __skb_queue_tail(&tbl->proxy_queue, skb);
1412        mod_timer(&tbl->proxy_timer, sched_next);
1413        spin_unlock(&tbl->proxy_queue.lock);
1414}
1415EXPORT_SYMBOL(pneigh_enqueue);
1416
1417static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418                                                      struct net *net, int ifindex)
1419{
1420        struct neigh_parms *p;
1421
1422        for (p = &tbl->parms; p; p = p->next) {
1423                if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424                    (!p->dev && !ifindex && net_eq(net, &init_net)))
1425                        return p;
1426        }
1427
1428        return NULL;
1429}
1430
1431struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432                                      struct neigh_table *tbl)
1433{
1434        struct neigh_parms *p;
1435        struct net *net = dev_net(dev);
1436        const struct net_device_ops *ops = dev->netdev_ops;
1437
1438        p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1439        if (p) {
1440                p->tbl            = tbl;
1441                atomic_set(&p->refcnt, 1);
1442                p->reachable_time =
1443                                neigh_rand_reach_time(p->base_reachable_time);
1444                dev_hold(dev);
1445                p->dev = dev;
1446                write_pnet(&p->net, hold_net(net));
1447                p->sysctl_table = NULL;
1448
1449                if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1450                        release_net(net);
1451                        dev_put(dev);
1452                        kfree(p);
1453                        return NULL;
1454                }
1455
1456                write_lock_bh(&tbl->lock);
1457                p->next         = tbl->parms.next;
1458                tbl->parms.next = p;
1459                write_unlock_bh(&tbl->lock);
1460        }
1461        return p;
1462}
1463EXPORT_SYMBOL(neigh_parms_alloc);
1464
1465static void neigh_rcu_free_parms(struct rcu_head *head)
1466{
1467        struct neigh_parms *parms =
1468                container_of(head, struct neigh_parms, rcu_head);
1469
1470        neigh_parms_put(parms);
1471}
1472
1473void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474{
1475        struct neigh_parms **p;
1476
1477        if (!parms || parms == &tbl->parms)
1478                return;
1479        write_lock_bh(&tbl->lock);
1480        for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481                if (*p == parms) {
1482                        *p = parms->next;
1483                        parms->dead = 1;
1484                        write_unlock_bh(&tbl->lock);
1485                        if (parms->dev)
1486                                dev_put(parms->dev);
1487                        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488                        return;
1489                }
1490        }
1491        write_unlock_bh(&tbl->lock);
1492        neigh_dbg(1, "%s: not found\n", __func__);
1493}
1494EXPORT_SYMBOL(neigh_parms_release);
1495
1496static void neigh_parms_destroy(struct neigh_parms *parms)
1497{
1498        release_net(neigh_parms_net(parms));
1499        kfree(parms);
1500}
1501
1502static struct lock_class_key neigh_table_proxy_queue_class;
1503
1504static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505{
1506        unsigned long now = jiffies;
1507        unsigned long phsize;
1508
1509        write_pnet(&tbl->parms.net, &init_net);
1510        atomic_set(&tbl->parms.refcnt, 1);
1511        tbl->parms.reachable_time =
1512                          neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513
1514        tbl->stats = alloc_percpu(struct neigh_statistics);
1515        if (!tbl->stats)
1516                panic("cannot create neighbour cache statistics");
1517
1518#ifdef CONFIG_PROC_FS
1519        if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520                              &neigh_stat_seq_fops, tbl))
1521                panic("cannot create neighbour proc dir entry");
1522#endif
1523
1524        RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525
1526        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528
1529        if (!tbl->nht || !tbl->phash_buckets)
1530                panic("cannot allocate neighbour cache hashes");
1531
1532        if (!tbl->entry_size)
1533                tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1534                                        tbl->key_len, NEIGH_PRIV_ALIGN);
1535        else
1536                WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1537
1538        rwlock_init(&tbl->lock);
1539        INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1540        schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1541        setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1542        skb_queue_head_init_class(&tbl->proxy_queue,
1543                        &neigh_table_proxy_queue_class);
1544
1545        tbl->last_flush = now;
1546        tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1547}
1548
1549void neigh_table_init(struct neigh_table *tbl)
1550{
1551        struct neigh_table *tmp;
1552
1553        neigh_table_init_no_netlink(tbl);
1554        write_lock(&neigh_tbl_lock);
1555        for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1556                if (tmp->family == tbl->family)
1557                        break;
1558        }
1559        tbl->next       = neigh_tables;
1560        neigh_tables    = tbl;
1561        write_unlock(&neigh_tbl_lock);
1562
1563        if (unlikely(tmp)) {
1564                pr_err("Registering multiple tables for family %d\n",
1565                       tbl->family);
1566                dump_stack();
1567        }
1568}
1569EXPORT_SYMBOL(neigh_table_init);
1570
1571int neigh_table_clear(struct neigh_table *tbl)
1572{
1573        struct neigh_table **tp;
1574
1575        /* It is not clean... Fix it to unload IPv6 module safely */
1576        cancel_delayed_work_sync(&tbl->gc_work);
1577        del_timer_sync(&tbl->proxy_timer);
1578        pneigh_queue_purge(&tbl->proxy_queue);
1579        neigh_ifdown(tbl, NULL);
1580        if (atomic_read(&tbl->entries))
1581                pr_crit("neighbour leakage\n");
1582        write_lock(&neigh_tbl_lock);
1583        for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1584                if (*tp == tbl) {
1585                        *tp = tbl->next;
1586                        break;
1587                }
1588        }
1589        write_unlock(&neigh_tbl_lock);
1590
1591        call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1592                 neigh_hash_free_rcu);
1593        tbl->nht = NULL;
1594
1595        kfree(tbl->phash_buckets);
1596        tbl->phash_buckets = NULL;
1597
1598        remove_proc_entry(tbl->id, init_net.proc_net_stat);
1599
1600        free_percpu(tbl->stats);
1601        tbl->stats = NULL;
1602
1603        return 0;
1604}
1605EXPORT_SYMBOL(neigh_table_clear);
1606
1607static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1608{
1609        struct net *net = sock_net(skb->sk);
1610        struct ndmsg *ndm;
1611        struct nlattr *dst_attr;
1612        struct neigh_table *tbl;
1613        struct net_device *dev = NULL;
1614        int err = -EINVAL;
1615
1616        ASSERT_RTNL();
1617        if (nlmsg_len(nlh) < sizeof(*ndm))
1618                goto out;
1619
1620        dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1621        if (dst_attr == NULL)
1622                goto out;
1623
1624        ndm = nlmsg_data(nlh);
1625        if (ndm->ndm_ifindex) {
1626                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1627                if (dev == NULL) {
1628                        err = -ENODEV;
1629                        goto out;
1630                }
1631        }
1632
1633        read_lock(&neigh_tbl_lock);
1634        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1635                struct neighbour *neigh;
1636
1637                if (tbl->family != ndm->ndm_family)
1638                        continue;
1639                read_unlock(&neigh_tbl_lock);
1640
1641                if (nla_len(dst_attr) < tbl->key_len)
1642                        goto out;
1643
1644                if (ndm->ndm_flags & NTF_PROXY) {
1645                        err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1646                        goto out;
1647                }
1648
1649                if (dev == NULL)
1650                        goto out;
1651
1652                neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1653                if (neigh == NULL) {
1654                        err = -ENOENT;
1655                        goto out;
1656                }
1657
1658                err = neigh_update(neigh, NULL, NUD_FAILED,
1659                                   NEIGH_UPDATE_F_OVERRIDE |
1660                                   NEIGH_UPDATE_F_ADMIN);
1661                neigh_release(neigh);
1662                goto out;
1663        }
1664        read_unlock(&neigh_tbl_lock);
1665        err = -EAFNOSUPPORT;
1666
1667out:
1668        return err;
1669}
1670
1671static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1672{
1673        struct net *net = sock_net(skb->sk);
1674        struct ndmsg *ndm;
1675        struct nlattr *tb[NDA_MAX+1];
1676        struct neigh_table *tbl;
1677        struct net_device *dev = NULL;
1678        int err;
1679
1680        ASSERT_RTNL();
1681        err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1682        if (err < 0)
1683                goto out;
1684
1685        err = -EINVAL;
1686        if (tb[NDA_DST] == NULL)
1687                goto out;
1688
1689        ndm = nlmsg_data(nlh);
1690        if (ndm->ndm_ifindex) {
1691                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1692                if (dev == NULL) {
1693                        err = -ENODEV;
1694                        goto out;
1695                }
1696
1697                if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1698                        goto out;
1699        }
1700
1701        read_lock(&neigh_tbl_lock);
1702        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1703                int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1704                struct neighbour *neigh;
1705                void *dst, *lladdr;
1706
1707                if (tbl->family != ndm->ndm_family)
1708                        continue;
1709                read_unlock(&neigh_tbl_lock);
1710
1711                if (nla_len(tb[NDA_DST]) < tbl->key_len)
1712                        goto out;
1713                dst = nla_data(tb[NDA_DST]);
1714                lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1715
1716                if (ndm->ndm_flags & NTF_PROXY) {
1717                        struct pneigh_entry *pn;
1718
1719                        err = -ENOBUFS;
1720                        pn = pneigh_lookup(tbl, net, dst, dev, 1);
1721                        if (pn) {
1722                                pn->flags = ndm->ndm_flags;
1723                                err = 0;
1724                        }
1725                        goto out;
1726                }
1727
1728                if (dev == NULL)
1729                        goto out;
1730
1731                neigh = neigh_lookup(tbl, dst, dev);
1732                if (neigh == NULL) {
1733                        if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1734                                err = -ENOENT;
1735                                goto out;
1736                        }
1737
1738                        neigh = __neigh_lookup_errno(tbl, dst, dev);
1739                        if (IS_ERR(neigh)) {
1740                                err = PTR_ERR(neigh);
1741                                goto out;
1742                        }
1743                } else {
1744                        if (nlh->nlmsg_flags & NLM_F_EXCL) {
1745                                err = -EEXIST;
1746                                neigh_release(neigh);
1747                                goto out;
1748                        }
1749
1750                        if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1751                                flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1752                }
1753
1754                if (ndm->ndm_flags & NTF_USE) {
1755                        neigh_event_send(neigh, NULL);
1756                        err = 0;
1757                } else
1758                        err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1759                neigh_release(neigh);
1760                goto out;
1761        }
1762
1763        read_unlock(&neigh_tbl_lock);
1764        err = -EAFNOSUPPORT;
1765out:
1766        return err;
1767}
1768
1769static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1770{
1771        struct nlattr *nest;
1772
1773        nest = nla_nest_start(skb, NDTA_PARMS);
1774        if (nest == NULL)
1775                return -ENOBUFS;
1776
1777        if ((parms->dev &&
1778             nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1779            nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1780            nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1781            /* approximative value for deprecated QUEUE_LEN (in packets) */
1782            nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783                        parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784            nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1785            nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1786            nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1787            nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1788            nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1789            nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1790                          parms->base_reachable_time) ||
1791            nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1792            nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1793                          parms->delay_probe_time) ||
1794            nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1795            nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1796            nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1797            nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1798                goto nla_put_failure;
1799        return nla_nest_end(skb, nest);
1800
1801nla_put_failure:
1802        nla_nest_cancel(skb, nest);
1803        return -EMSGSIZE;
1804}
1805
1806static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1807                              u32 pid, u32 seq, int type, int flags)
1808{
1809        struct nlmsghdr *nlh;
1810        struct ndtmsg *ndtmsg;
1811
1812        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1813        if (nlh == NULL)
1814                return -EMSGSIZE;
1815
1816        ndtmsg = nlmsg_data(nlh);
1817
1818        read_lock_bh(&tbl->lock);
1819        ndtmsg->ndtm_family = tbl->family;
1820        ndtmsg->ndtm_pad1   = 0;
1821        ndtmsg->ndtm_pad2   = 0;
1822
1823        if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1824            nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1825            nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1826            nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1827            nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1828                goto nla_put_failure;
1829        {
1830                unsigned long now = jiffies;
1831                unsigned int flush_delta = now - tbl->last_flush;
1832                unsigned int rand_delta = now - tbl->last_rand;
1833                struct neigh_hash_table *nht;
1834                struct ndt_config ndc = {
1835                        .ndtc_key_len           = tbl->key_len,
1836                        .ndtc_entry_size        = tbl->entry_size,
1837                        .ndtc_entries           = atomic_read(&tbl->entries),
1838                        .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1839                        .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1840                        .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1841                };
1842
1843                rcu_read_lock_bh();
1844                nht = rcu_dereference_bh(tbl->nht);
1845                ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1846                ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1847                rcu_read_unlock_bh();
1848
1849                if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1850                        goto nla_put_failure;
1851        }
1852
1853        {
1854                int cpu;
1855                struct ndt_stats ndst;
1856
1857                memset(&ndst, 0, sizeof(ndst));
1858
1859                for_each_possible_cpu(cpu) {
1860                        struct neigh_statistics *st;
1861
1862                        st = per_cpu_ptr(tbl->stats, cpu);
1863                        ndst.ndts_allocs                += st->allocs;
1864                        ndst.ndts_destroys              += st->destroys;
1865                        ndst.ndts_hash_grows            += st->hash_grows;
1866                        ndst.ndts_res_failed            += st->res_failed;
1867                        ndst.ndts_lookups               += st->lookups;
1868                        ndst.ndts_hits                  += st->hits;
1869                        ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1870                        ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1871                        ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1872                        ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1873                }
1874
1875                if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1876                        goto nla_put_failure;
1877        }
1878
1879        BUG_ON(tbl->parms.dev);
1880        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1881                goto nla_put_failure;
1882
1883        read_unlock_bh(&tbl->lock);
1884        return nlmsg_end(skb, nlh);
1885
1886nla_put_failure:
1887        read_unlock_bh(&tbl->lock);
1888        nlmsg_cancel(skb, nlh);
1889        return -EMSGSIZE;
1890}
1891
1892static int neightbl_fill_param_info(struct sk_buff *skb,
1893                                    struct neigh_table *tbl,
1894                                    struct neigh_parms *parms,
1895                                    u32 pid, u32 seq, int type,
1896                                    unsigned int flags)
1897{
1898        struct ndtmsg *ndtmsg;
1899        struct nlmsghdr *nlh;
1900
1901        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1902        if (nlh == NULL)
1903                return -EMSGSIZE;
1904
1905        ndtmsg = nlmsg_data(nlh);
1906
1907        read_lock_bh(&tbl->lock);
1908        ndtmsg->ndtm_family = tbl->family;
1909        ndtmsg->ndtm_pad1   = 0;
1910        ndtmsg->ndtm_pad2   = 0;
1911
1912        if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1913            neightbl_fill_parms(skb, parms) < 0)
1914                goto errout;
1915
1916        read_unlock_bh(&tbl->lock);
1917        return nlmsg_end(skb, nlh);
1918errout:
1919        read_unlock_bh(&tbl->lock);
1920        nlmsg_cancel(skb, nlh);
1921        return -EMSGSIZE;
1922}
1923
1924static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1925        [NDTA_NAME]             = { .type = NLA_STRING },
1926        [NDTA_THRESH1]          = { .type = NLA_U32 },
1927        [NDTA_THRESH2]          = { .type = NLA_U32 },
1928        [NDTA_THRESH3]          = { .type = NLA_U32 },
1929        [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1930        [NDTA_PARMS]            = { .type = NLA_NESTED },
1931};
1932
1933static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1934        [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1935        [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1936        [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1937        [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1938        [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1939        [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1940        [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1941        [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1942        [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1943        [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1944        [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1945        [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1946        [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1947};
1948
1949static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1950{
1951        struct net *net = sock_net(skb->sk);
1952        struct neigh_table *tbl;
1953        struct ndtmsg *ndtmsg;
1954        struct nlattr *tb[NDTA_MAX+1];
1955        int err;
1956
1957        err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1958                          nl_neightbl_policy);
1959        if (err < 0)
1960                goto errout;
1961
1962        if (tb[NDTA_NAME] == NULL) {
1963                err = -EINVAL;
1964                goto errout;
1965        }
1966
1967        ndtmsg = nlmsg_data(nlh);
1968        read_lock(&neigh_tbl_lock);
1969        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1970                if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1971                        continue;
1972
1973                if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1974                        break;
1975        }
1976
1977        if (tbl == NULL) {
1978                err = -ENOENT;
1979                goto errout_locked;
1980        }
1981
1982        /*
1983         * We acquire tbl->lock to be nice to the periodic timers and
1984         * make sure they always see a consistent set of values.
1985         */
1986        write_lock_bh(&tbl->lock);
1987
1988        if (tb[NDTA_PARMS]) {
1989                struct nlattr *tbp[NDTPA_MAX+1];
1990                struct neigh_parms *p;
1991                int i, ifindex = 0;
1992
1993                err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1994                                       nl_ntbl_parm_policy);
1995                if (err < 0)
1996                        goto errout_tbl_lock;
1997
1998                if (tbp[NDTPA_IFINDEX])
1999                        ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2000
2001                p = lookup_neigh_parms(tbl, net, ifindex);
2002                if (p == NULL) {
2003                        err = -ENOENT;
2004                        goto errout_tbl_lock;
2005                }
2006
2007                for (i = 1; i <= NDTPA_MAX; i++) {
2008                        if (tbp[i] == NULL)
2009                                continue;
2010
2011                        switch (i) {
2012                        case NDTPA_QUEUE_LEN:
2013                                p->queue_len_bytes = nla_get_u32(tbp[i]) *
2014                                                     SKB_TRUESIZE(ETH_FRAME_LEN);
2015                                break;
2016                        case NDTPA_QUEUE_LENBYTES:
2017                                p->queue_len_bytes = nla_get_u32(tbp[i]);
2018                                break;
2019                        case NDTPA_PROXY_QLEN:
2020                                p->proxy_qlen = nla_get_u32(tbp[i]);
2021                                break;
2022                        case NDTPA_APP_PROBES:
2023                                p->app_probes = nla_get_u32(tbp[i]);
2024                                break;
2025                        case NDTPA_UCAST_PROBES:
2026                                p->ucast_probes = nla_get_u32(tbp[i]);
2027                                break;
2028                        case NDTPA_MCAST_PROBES:
2029                                p->mcast_probes = nla_get_u32(tbp[i]);
2030                                break;
2031                        case NDTPA_BASE_REACHABLE_TIME:
2032                                p->base_reachable_time = nla_get_msecs(tbp[i]);
2033                                break;
2034                        case NDTPA_GC_STALETIME:
2035                                p->gc_staletime = nla_get_msecs(tbp[i]);
2036                                break;
2037                        case NDTPA_DELAY_PROBE_TIME:
2038                                p->delay_probe_time = nla_get_msecs(tbp[i]);
2039                                break;
2040                        case NDTPA_RETRANS_TIME:
2041                                p->retrans_time = nla_get_msecs(tbp[i]);
2042                                break;
2043                        case NDTPA_ANYCAST_DELAY:
2044                                p->anycast_delay = nla_get_msecs(tbp[i]);
2045                                break;
2046                        case NDTPA_PROXY_DELAY:
2047                                p->proxy_delay = nla_get_msecs(tbp[i]);
2048                                break;
2049                        case NDTPA_LOCKTIME:
2050                                p->locktime = nla_get_msecs(tbp[i]);
2051                                break;
2052                        }
2053                }
2054        }
2055
2056        err = -ENOENT;
2057        if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2058             tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2059            !net_eq(net, &init_net))
2060                goto errout_tbl_lock;
2061
2062        if (tb[NDTA_THRESH1])
2063                tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2064
2065        if (tb[NDTA_THRESH2])
2066                tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2067
2068        if (tb[NDTA_THRESH3])
2069                tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2070
2071        if (tb[NDTA_GC_INTERVAL])
2072                tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2073
2074        err = 0;
2075
2076errout_tbl_lock:
2077        write_unlock_bh(&tbl->lock);
2078errout_locked:
2079        read_unlock(&neigh_tbl_lock);
2080errout:
2081        return err;
2082}
2083
2084static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2085{
2086        struct net *net = sock_net(skb->sk);
2087        int family, tidx, nidx = 0;
2088        int tbl_skip = cb->args[0];
2089        int neigh_skip = cb->args[1];
2090        struct neigh_table *tbl;
2091
2092        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2093
2094        read_lock(&neigh_tbl_lock);
2095        for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2096                struct neigh_parms *p;
2097
2098                if (tidx < tbl_skip || (family && tbl->family != family))
2099                        continue;
2100
2101                if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2102                                       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2103                                       NLM_F_MULTI) <= 0)
2104                        break;
2105
2106                for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2107                        if (!net_eq(neigh_parms_net(p), net))
2108                                continue;
2109
2110                        if (nidx < neigh_skip)
2111                                goto next;
2112
2113                        if (neightbl_fill_param_info(skb, tbl, p,
2114                                                     NETLINK_CB(cb->skb).portid,
2115                                                     cb->nlh->nlmsg_seq,
2116                                                     RTM_NEWNEIGHTBL,
2117                                                     NLM_F_MULTI) <= 0)
2118                                goto out;
2119                next:
2120                        nidx++;
2121                }
2122
2123                neigh_skip = 0;
2124        }
2125out:
2126        read_unlock(&neigh_tbl_lock);
2127        cb->args[0] = tidx;
2128        cb->args[1] = nidx;
2129
2130        return skb->len;
2131}
2132
2133static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2134                           u32 pid, u32 seq, int type, unsigned int flags)
2135{
2136        unsigned long now = jiffies;
2137        struct nda_cacheinfo ci;
2138        struct nlmsghdr *nlh;
2139        struct ndmsg *ndm;
2140
2141        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2142        if (nlh == NULL)
2143                return -EMSGSIZE;
2144
2145        ndm = nlmsg_data(nlh);
2146        ndm->ndm_family  = neigh->ops->family;
2147        ndm->ndm_pad1    = 0;
2148        ndm->ndm_pad2    = 0;
2149        ndm->ndm_flags   = neigh->flags;
2150        ndm->ndm_type    = neigh->type;
2151        ndm->ndm_ifindex = neigh->dev->ifindex;
2152
2153        if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2154                goto nla_put_failure;
2155
2156        read_lock_bh(&neigh->lock);
2157        ndm->ndm_state   = neigh->nud_state;
2158        if (neigh->nud_state & NUD_VALID) {
2159                char haddr[MAX_ADDR_LEN];
2160
2161                neigh_ha_snapshot(haddr, neigh, neigh->dev);
2162                if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2163                        read_unlock_bh(&neigh->lock);
2164                        goto nla_put_failure;
2165                }
2166        }
2167
2168        ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2169        ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2170        ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2171        ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2172        read_unlock_bh(&neigh->lock);
2173
2174        if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2175            nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2176                goto nla_put_failure;
2177
2178        return nlmsg_end(skb, nlh);
2179
2180nla_put_failure:
2181        nlmsg_cancel(skb, nlh);
2182        return -EMSGSIZE;
2183}
2184
2185static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2186                            u32 pid, u32 seq, int type, unsigned int flags,
2187                            struct neigh_table *tbl)
2188{
2189        struct nlmsghdr *nlh;
2190        struct ndmsg *ndm;
2191
2192        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2193        if (nlh == NULL)
2194                return -EMSGSIZE;
2195
2196        ndm = nlmsg_data(nlh);
2197        ndm->ndm_family  = tbl->family;
2198        ndm->ndm_pad1    = 0;
2199        ndm->ndm_pad2    = 0;
2200        ndm->ndm_flags   = pn->flags | NTF_PROXY;
2201        ndm->ndm_type    = NDA_DST;
2202        ndm->ndm_ifindex = pn->dev->ifindex;
2203        ndm->ndm_state   = NUD_NONE;
2204
2205        if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2206                goto nla_put_failure;
2207
2208        return nlmsg_end(skb, nlh);
2209
2210nla_put_failure:
2211        nlmsg_cancel(skb, nlh);
2212        return -EMSGSIZE;
2213}
2214
2215static void neigh_update_notify(struct neighbour *neigh)
2216{
2217        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2218        __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2219}
2220
2221static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2222                            struct netlink_callback *cb)
2223{
2224        struct net *net = sock_net(skb->sk);
2225        struct neighbour *n;
2226        int rc, h, s_h = cb->args[1];
2227        int idx, s_idx = idx = cb->args[2];
2228        struct neigh_hash_table *nht;
2229
2230        rcu_read_lock_bh();
2231        nht = rcu_dereference_bh(tbl->nht);
2232
2233        for (h = s_h; h < (1 << nht->hash_shift); h++) {
2234                if (h > s_h)
2235                        s_idx = 0;
2236                for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2237                     n != NULL;
2238                     n = rcu_dereference_bh(n->next)) {
2239                        if (!net_eq(dev_net(n->dev), net))
2240                                continue;
2241                        if (idx < s_idx)
2242                                goto next;
2243                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2244                                            cb->nlh->nlmsg_seq,
2245                                            RTM_NEWNEIGH,
2246                                            NLM_F_MULTI) <= 0) {
2247                                rc = -1;
2248                                goto out;
2249                        }
2250next:
2251                        idx++;
2252                }
2253        }
2254        rc = skb->len;
2255out:
2256        rcu_read_unlock_bh();
2257        cb->args[1] = h;
2258        cb->args[2] = idx;
2259        return rc;
2260}
2261
2262static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2263                             struct netlink_callback *cb)
2264{
2265        struct pneigh_entry *n;
2266        struct net *net = sock_net(skb->sk);
2267        int rc, h, s_h = cb->args[3];
2268        int idx, s_idx = idx = cb->args[4];
2269
2270        read_lock_bh(&tbl->lock);
2271
2272        for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2273                if (h > s_h)
2274                        s_idx = 0;
2275                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2276                        if (dev_net(n->dev) != net)
2277                                continue;
2278                        if (idx < s_idx)
2279                                goto next;
2280                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2281                                            cb->nlh->nlmsg_seq,
2282                                            RTM_NEWNEIGH,
2283                                            NLM_F_MULTI, tbl) <= 0) {
2284                                read_unlock_bh(&tbl->lock);
2285                                rc = -1;
2286                                goto out;
2287                        }
2288                next:
2289                        idx++;
2290                }
2291        }
2292
2293        read_unlock_bh(&tbl->lock);
2294        rc = skb->len;
2295out:
2296        cb->args[3] = h;
2297        cb->args[4] = idx;
2298        return rc;
2299
2300}
2301
2302static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2303{
2304        struct neigh_table *tbl;
2305        int t, family, s_t;
2306        int proxy = 0;
2307        int err;
2308
2309        read_lock(&neigh_tbl_lock);
2310        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2311
2312        /* check for full ndmsg structure presence, family member is
2313         * the same for both structures
2314         */
2315        if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2316            ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2317                proxy = 1;
2318
2319        s_t = cb->args[0];
2320
2321        for (tbl = neigh_tables, t = 0; tbl;
2322             tbl = tbl->next, t++) {
2323                if (t < s_t || (family && tbl->family != family))
2324                        continue;
2325                if (t > s_t)
2326                        memset(&cb->args[1], 0, sizeof(cb->args) -
2327                                                sizeof(cb->args[0]));
2328                if (proxy)
2329                        err = pneigh_dump_table(tbl, skb, cb);
2330                else
2331                        err = neigh_dump_table(tbl, skb, cb);
2332                if (err < 0)
2333                        break;
2334        }
2335        read_unlock(&neigh_tbl_lock);
2336
2337        cb->args[0] = t;
2338        return skb->len;
2339}
2340
2341void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2342{
2343        int chain;
2344        struct neigh_hash_table *nht;
2345
2346        rcu_read_lock_bh();
2347        nht = rcu_dereference_bh(tbl->nht);
2348
2349        read_lock(&tbl->lock); /* avoid resizes */
2350        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2351                struct neighbour *n;
2352
2353                for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2354                     n != NULL;
2355                     n = rcu_dereference_bh(n->next))
2356                        cb(n, cookie);
2357        }
2358        read_unlock(&tbl->lock);
2359        rcu_read_unlock_bh();
2360}
2361EXPORT_SYMBOL(neigh_for_each);
2362
2363/* The tbl->lock must be held as a writer and BH disabled. */
2364void __neigh_for_each_release(struct neigh_table *tbl,
2365                              int (*cb)(struct neighbour *))
2366{
2367        int chain;
2368        struct neigh_hash_table *nht;
2369
2370        nht = rcu_dereference_protected(tbl->nht,
2371                                        lockdep_is_held(&tbl->lock));
2372        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2373                struct neighbour *n;
2374                struct neighbour __rcu **np;
2375
2376                np = &nht->hash_buckets[chain];
2377                while ((n = rcu_dereference_protected(*np,
2378                                        lockdep_is_held(&tbl->lock))) != NULL) {
2379                        int release;
2380
2381                        write_lock(&n->lock);
2382                        release = cb(n);
2383                        if (release) {
2384                                rcu_assign_pointer(*np,
2385                                        rcu_dereference_protected(n->next,
2386                                                lockdep_is_held(&tbl->lock)));
2387                                n->dead = 1;
2388                        } else
2389                                np = &n->next;
2390                        write_unlock(&n->lock);
2391                        if (release)
2392                                neigh_cleanup_and_release(n);
2393                }
2394        }
2395}
2396EXPORT_SYMBOL(__neigh_for_each_release);
2397
2398#ifdef CONFIG_PROC_FS
2399
2400static struct neighbour *neigh_get_first(struct seq_file *seq)
2401{
2402        struct neigh_seq_state *state = seq->private;
2403        struct net *net = seq_file_net(seq);
2404        struct neigh_hash_table *nht = state->nht;
2405        struct neighbour *n = NULL;
2406        int bucket = state->bucket;
2407
2408        state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2409        for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2410                n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2411
2412                while (n) {
2413                        if (!net_eq(dev_net(n->dev), net))
2414                                goto next;
2415                        if (state->neigh_sub_iter) {
2416                                loff_t fakep = 0;
2417                                void *v;
2418
2419                                v = state->neigh_sub_iter(state, n, &fakep);
2420                                if (!v)
2421                                        goto next;
2422                        }
2423                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2424                                break;
2425                        if (n->nud_state & ~NUD_NOARP)
2426                                break;
2427next:
2428                        n = rcu_dereference_bh(n->next);
2429                }
2430
2431                if (n)
2432                        break;
2433        }
2434        state->bucket = bucket;
2435
2436        return n;
2437}
2438
2439static struct neighbour *neigh_get_next(struct seq_file *seq,
2440                                        struct neighbour *n,
2441                                        loff_t *pos)
2442{
2443        struct neigh_seq_state *state = seq->private;
2444        struct net *net = seq_file_net(seq);
2445        struct neigh_hash_table *nht = state->nht;
2446
2447        if (state->neigh_sub_iter) {
2448                void *v = state->neigh_sub_iter(state, n, pos);
2449                if (v)
2450                        return n;
2451        }
2452        n = rcu_dereference_bh(n->next);
2453
2454        while (1) {
2455                while (n) {
2456                        if (!net_eq(dev_net(n->dev), net))
2457                                goto next;
2458                        if (state->neigh_sub_iter) {
2459                                void *v = state->neigh_sub_iter(state, n, pos);
2460                                if (v)
2461                                        return n;
2462                                goto next;
2463                        }
2464                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2465                                break;
2466
2467                        if (n->nud_state & ~NUD_NOARP)
2468                                break;
2469next:
2470                        n = rcu_dereference_bh(n->next);
2471                }
2472
2473                if (n)
2474                        break;
2475
2476                if (++state->bucket >= (1 << nht->hash_shift))
2477                        break;
2478
2479                n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2480        }
2481
2482        if (n && pos)
2483                --(*pos);
2484        return n;
2485}
2486
2487static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2488{
2489        struct neighbour *n = neigh_get_first(seq);
2490
2491        if (n) {
2492                --(*pos);
2493                while (*pos) {
2494                        n = neigh_get_next(seq, n, pos);
2495                        if (!n)
2496                                break;
2497                }
2498        }
2499        return *pos ? NULL : n;
2500}
2501
2502static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2503{
2504        struct neigh_seq_state *state = seq->private;
2505        struct net *net = seq_file_net(seq);
2506        struct neigh_table *tbl = state->tbl;
2507        struct pneigh_entry *pn = NULL;
2508        int bucket = state->bucket;
2509
2510        state->flags |= NEIGH_SEQ_IS_PNEIGH;
2511        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2512                pn = tbl->phash_buckets[bucket];
2513                while (pn && !net_eq(pneigh_net(pn), net))
2514                        pn = pn->next;
2515                if (pn)
2516                        break;
2517        }
2518        state->bucket = bucket;
2519
2520        return pn;
2521}
2522
2523static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2524                                            struct pneigh_entry *pn,
2525                                            loff_t *pos)
2526{
2527        struct neigh_seq_state *state = seq->private;
2528        struct net *net = seq_file_net(seq);
2529        struct neigh_table *tbl = state->tbl;
2530
2531        do {
2532                pn = pn->next;
2533        } while (pn && !net_eq(pneigh_net(pn), net));
2534
2535        while (!pn) {
2536                if (++state->bucket > PNEIGH_HASHMASK)
2537                        break;
2538                pn = tbl->phash_buckets[state->bucket];
2539                while (pn && !net_eq(pneigh_net(pn), net))
2540                        pn = pn->next;
2541                if (pn)
2542                        break;
2543        }
2544
2545        if (pn && pos)
2546                --(*pos);
2547
2548        return pn;
2549}
2550
2551static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2552{
2553        struct pneigh_entry *pn = pneigh_get_first(seq);
2554
2555        if (pn) {
2556                --(*pos);
2557                while (*pos) {
2558                        pn = pneigh_get_next(seq, pn, pos);
2559                        if (!pn)
2560                                break;
2561                }
2562        }
2563        return *pos ? NULL : pn;
2564}
2565
2566static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2567{
2568        struct neigh_seq_state *state = seq->private;
2569        void *rc;
2570        loff_t idxpos = *pos;
2571
2572        rc = neigh_get_idx(seq, &idxpos);
2573        if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2574                rc = pneigh_get_idx(seq, &idxpos);
2575
2576        return rc;
2577}
2578
2579void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2580        __acquires(rcu_bh)
2581{
2582        struct neigh_seq_state *state = seq->private;
2583
2584        state->tbl = tbl;
2585        state->bucket = 0;
2586        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2587
2588        rcu_read_lock_bh();
2589        state->nht = rcu_dereference_bh(tbl->nht);
2590
2591        return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2592}
2593EXPORT_SYMBOL(neigh_seq_start);
2594
2595void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2596{
2597        struct neigh_seq_state *state;
2598        void *rc;
2599
2600        if (v == SEQ_START_TOKEN) {
2601                rc = neigh_get_first(seq);
2602                goto out;
2603        }
2604
2605        state = seq->private;
2606        if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2607                rc = neigh_get_next(seq, v, NULL);
2608                if (rc)
2609                        goto out;
2610                if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2611                        rc = pneigh_get_first(seq);
2612        } else {
2613                BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2614                rc = pneigh_get_next(seq, v, NULL);
2615        }
2616out:
2617        ++(*pos);
2618        return rc;
2619}
2620EXPORT_SYMBOL(neigh_seq_next);
2621
2622void neigh_seq_stop(struct seq_file *seq, void *v)
2623        __releases(rcu_bh)
2624{
2625        rcu_read_unlock_bh();
2626}
2627EXPORT_SYMBOL(neigh_seq_stop);
2628
2629/* statistics via seq_file */
2630
2631static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2632{
2633        struct neigh_table *tbl = seq->private;
2634        int cpu;
2635
2636        if (*pos == 0)
2637                return SEQ_START_TOKEN;
2638
2639        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2640                if (!cpu_possible(cpu))
2641                        continue;
2642                *pos = cpu+1;
2643                return per_cpu_ptr(tbl->stats, cpu);
2644        }
2645        return NULL;
2646}
2647
2648static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2649{
2650        struct neigh_table *tbl = seq->private;
2651        int cpu;
2652
2653        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2654                if (!cpu_possible(cpu))
2655                        continue;
2656                *pos = cpu+1;
2657                return per_cpu_ptr(tbl->stats, cpu);
2658        }
2659        return NULL;
2660}
2661
2662static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2663{
2664
2665}
2666
2667static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2668{
2669        struct neigh_table *tbl = seq->private;
2670        struct neigh_statistics *st = v;
2671
2672        if (v == SEQ_START_TOKEN) {
2673                seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2674                return 0;
2675        }
2676
2677        seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2678                        "%08lx %08lx  %08lx %08lx %08lx\n",
2679                   atomic_read(&tbl->entries),
2680
2681                   st->allocs,
2682                   st->destroys,
2683                   st->hash_grows,
2684
2685                   st->lookups,
2686                   st->hits,
2687
2688                   st->res_failed,
2689
2690                   st->rcv_probes_mcast,
2691                   st->rcv_probes_ucast,
2692
2693                   st->periodic_gc_runs,
2694                   st->forced_gc_runs,
2695                   st->unres_discards
2696                   );
2697
2698        return 0;
2699}
2700
2701static const struct seq_operations neigh_stat_seq_ops = {
2702        .start  = neigh_stat_seq_start,
2703        .next   = neigh_stat_seq_next,
2704        .stop   = neigh_stat_seq_stop,
2705        .show   = neigh_stat_seq_show,
2706};
2707
2708static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2709{
2710        int ret = seq_open(file, &neigh_stat_seq_ops);
2711
2712        if (!ret) {
2713                struct seq_file *sf = file->private_data;
2714                sf->private = PDE_DATA(inode);
2715        }
2716        return ret;
2717};
2718
2719static const struct file_operations neigh_stat_seq_fops = {
2720        .owner   = THIS_MODULE,
2721        .open    = neigh_stat_seq_open,
2722        .read    = seq_read,
2723        .llseek  = seq_lseek,
2724        .release = seq_release,
2725};
2726
2727#endif /* CONFIG_PROC_FS */
2728
2729static inline size_t neigh_nlmsg_size(void)
2730{
2731        return NLMSG_ALIGN(sizeof(struct ndmsg))
2732               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2733               + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2734               + nla_total_size(sizeof(struct nda_cacheinfo))
2735               + nla_total_size(4); /* NDA_PROBES */
2736}
2737
2738static void __neigh_notify(struct neighbour *n, int type, int flags)
2739{
2740        struct net *net = dev_net(n->dev);
2741        struct sk_buff *skb;
2742        int err = -ENOBUFS;
2743
2744        skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2745        if (skb == NULL)
2746                goto errout;
2747
2748        err = neigh_fill_info(skb, n, 0, 0, type, flags);
2749        if (err < 0) {
2750                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2751                WARN_ON(err == -EMSGSIZE);
2752                kfree_skb(skb);
2753                goto errout;
2754        }
2755        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2756        return;
2757errout:
2758        if (err < 0)
2759                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2760}
2761
2762#ifdef CONFIG_ARPD
2763void neigh_app_ns(struct neighbour *n)
2764{
2765        __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2766}
2767EXPORT_SYMBOL(neigh_app_ns);
2768#endif /* CONFIG_ARPD */
2769
2770#ifdef CONFIG_SYSCTL
2771static int zero;
2772static int int_max = INT_MAX;
2773static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2774
2775static int proc_unres_qlen(struct ctl_table *ctl, int write,
2776                           void __user *buffer, size_t *lenp, loff_t *ppos)
2777{
2778        int size, ret;
2779        struct ctl_table tmp = *ctl;
2780
2781        tmp.extra1 = &zero;
2782        tmp.extra2 = &unres_qlen_max;
2783        tmp.data = &size;
2784
2785        size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2786        ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2787
2788        if (write && !ret)
2789                *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2790        return ret;
2791}
2792
2793enum {
2794        NEIGH_VAR_MCAST_PROBE,
2795        NEIGH_VAR_UCAST_PROBE,
2796        NEIGH_VAR_APP_PROBE,
2797        NEIGH_VAR_RETRANS_TIME,
2798        NEIGH_VAR_BASE_REACHABLE_TIME,
2799        NEIGH_VAR_DELAY_PROBE_TIME,
2800        NEIGH_VAR_GC_STALETIME,
2801        NEIGH_VAR_QUEUE_LEN,
2802        NEIGH_VAR_QUEUE_LEN_BYTES,
2803        NEIGH_VAR_PROXY_QLEN,
2804        NEIGH_VAR_ANYCAST_DELAY,
2805        NEIGH_VAR_PROXY_DELAY,
2806        NEIGH_VAR_LOCKTIME,
2807        NEIGH_VAR_RETRANS_TIME_MS,
2808        NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2809        NEIGH_VAR_GC_INTERVAL,
2810        NEIGH_VAR_GC_THRESH1,
2811        NEIGH_VAR_GC_THRESH2,
2812        NEIGH_VAR_GC_THRESH3,
2813        NEIGH_VAR_MAX
2814};
2815
2816static struct neigh_sysctl_table {
2817        struct ctl_table_header *sysctl_header;
2818        struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2819} neigh_sysctl_template __read_mostly = {
2820        .neigh_vars = {
2821                [NEIGH_VAR_MCAST_PROBE] = {
2822                        .procname       = "mcast_solicit",
2823                        .maxlen         = sizeof(int),
2824                        .mode           = 0644,
2825                        .extra1         = &zero,
2826                        .extra2         = &int_max,
2827                        .proc_handler   = proc_dointvec_minmax,
2828                },
2829                [NEIGH_VAR_UCAST_PROBE] = {
2830                        .procname       = "ucast_solicit",
2831                        .maxlen         = sizeof(int),
2832                        .mode           = 0644,
2833                        .extra1         = &zero,
2834                        .extra2         = &int_max,
2835                        .proc_handler   = proc_dointvec_minmax,
2836                },
2837                [NEIGH_VAR_APP_PROBE] = {
2838                        .procname       = "app_solicit",
2839                        .maxlen         = sizeof(int),
2840                        .mode           = 0644,
2841                        .extra1         = &zero,
2842                        .extra2         = &int_max,
2843                        .proc_handler   = proc_dointvec_minmax,
2844                },
2845                [NEIGH_VAR_RETRANS_TIME] = {
2846                        .procname       = "retrans_time",
2847                        .maxlen         = sizeof(int),
2848                        .mode           = 0644,
2849                        .proc_handler   = proc_dointvec_userhz_jiffies,
2850                },
2851                [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2852                        .procname       = "base_reachable_time",
2853                        .maxlen         = sizeof(int),
2854                        .mode           = 0644,
2855                        .proc_handler   = proc_dointvec_jiffies,
2856                },
2857                [NEIGH_VAR_DELAY_PROBE_TIME] = {
2858                        .procname       = "delay_first_probe_time",
2859                        .maxlen         = sizeof(int),
2860                        .mode           = 0644,
2861                        .proc_handler   = proc_dointvec_jiffies,
2862                },
2863                [NEIGH_VAR_GC_STALETIME] = {
2864                        .procname       = "gc_stale_time",
2865                        .maxlen         = sizeof(int),
2866                        .mode           = 0644,
2867                        .proc_handler   = proc_dointvec_jiffies,
2868                },
2869                [NEIGH_VAR_QUEUE_LEN] = {
2870                        .procname       = "unres_qlen",
2871                        .maxlen         = sizeof(int),
2872                        .mode           = 0644,
2873                        .proc_handler   = proc_unres_qlen,
2874                },
2875                [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2876                        .procname       = "unres_qlen_bytes",
2877                        .maxlen         = sizeof(int),
2878                        .mode           = 0644,
2879                        .extra1         = &zero,
2880                        .proc_handler   = proc_dointvec_minmax,
2881                },
2882                [NEIGH_VAR_PROXY_QLEN] = {
2883                        .procname       = "proxy_qlen",
2884                        .maxlen         = sizeof(int),
2885                        .mode           = 0644,
2886                        .extra1         = &zero,
2887                        .extra2         = &int_max,
2888                        .proc_handler   = proc_dointvec_minmax,
2889                },
2890                [NEIGH_VAR_ANYCAST_DELAY] = {
2891                        .procname       = "anycast_delay",
2892                        .maxlen         = sizeof(int),
2893                        .mode           = 0644,
2894                        .proc_handler   = proc_dointvec_userhz_jiffies,
2895                },
2896                [NEIGH_VAR_PROXY_DELAY] = {
2897                        .procname       = "proxy_delay",
2898                        .maxlen         = sizeof(int),
2899                        .mode           = 0644,
2900                        .proc_handler   = proc_dointvec_userhz_jiffies,
2901                },
2902                [NEIGH_VAR_LOCKTIME] = {
2903                        .procname       = "locktime",
2904                        .maxlen         = sizeof(int),
2905                        .mode           = 0644,
2906                        .proc_handler   = proc_dointvec_userhz_jiffies,
2907                },
2908                [NEIGH_VAR_RETRANS_TIME_MS] = {
2909                        .procname       = "retrans_time_ms",
2910                        .maxlen         = sizeof(int),
2911                        .mode           = 0644,
2912                        .proc_handler   = proc_dointvec_ms_jiffies,
2913                },
2914                [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2915                        .procname       = "base_reachable_time_ms",
2916                        .maxlen         = sizeof(int),
2917                        .mode           = 0644,
2918                        .proc_handler   = proc_dointvec_ms_jiffies,
2919                },
2920                [NEIGH_VAR_GC_INTERVAL] = {
2921                        .procname       = "gc_interval",
2922                        .maxlen         = sizeof(int),
2923                        .mode           = 0644,
2924                        .proc_handler   = proc_dointvec_jiffies,
2925                },
2926                [NEIGH_VAR_GC_THRESH1] = {
2927                        .procname       = "gc_thresh1",
2928                        .maxlen         = sizeof(int),
2929                        .mode           = 0644,
2930                        .extra1         = &zero,
2931                        .extra2         = &int_max,
2932                        .proc_handler   = proc_dointvec_minmax,
2933                },
2934                [NEIGH_VAR_GC_THRESH2] = {
2935                        .procname       = "gc_thresh2",
2936                        .maxlen         = sizeof(int),
2937                        .mode           = 0644,
2938                        .extra1         = &zero,
2939                        .extra2         = &int_max,
2940                        .proc_handler   = proc_dointvec_minmax,
2941                },
2942                [NEIGH_VAR_GC_THRESH3] = {
2943                        .procname       = "gc_thresh3",
2944                        .maxlen         = sizeof(int),
2945                        .mode           = 0644,
2946                        .extra1         = &zero,
2947                        .extra2         = &int_max,
2948                        .proc_handler   = proc_dointvec_minmax,
2949                },
2950                {},
2951        },
2952};
2953
2954int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2955                          char *p_name, proc_handler *handler)
2956{
2957        struct neigh_sysctl_table *t;
2958        const char *dev_name_source = NULL;
2959        char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2960
2961        t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2962        if (!t)
2963                goto err;
2964
2965        t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2966        t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2967        t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2968        t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2969        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2970        t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2971        t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2972        t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2973        t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2974        t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2975        t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2976        t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2977        t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2978        t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2979        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2980
2981        if (dev) {
2982                dev_name_source = dev->name;
2983                /* Terminate the table early */
2984                memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2985                       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2986        } else {
2987                dev_name_source = "default";
2988                t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2989                t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2990                t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2991                t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2992        }
2993
2994
2995        if (handler) {
2996                /* RetransTime */
2997                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2998                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2999                /* ReachableTime */
3000                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3001                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3002                /* RetransTime (in milliseconds)*/
3003                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3004                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3005                /* ReachableTime (in milliseconds) */
3006                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3007                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3008        }
3009
3010        /* Don't export sysctls to unprivileged users */
3011        if (neigh_parms_net(p)->user_ns != &init_user_ns)
3012                t->neigh_vars[0].procname = NULL;
3013
3014        snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3015                p_name, dev_name_source);
3016        t->sysctl_header =
3017                register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3018        if (!t->sysctl_header)
3019                goto free;
3020
3021        p->sysctl_table = t;
3022        return 0;
3023
3024free:
3025        kfree(t);
3026err:
3027        return -ENOBUFS;
3028}
3029EXPORT_SYMBOL(neigh_sysctl_register);
3030
3031void neigh_sysctl_unregister(struct neigh_parms *p)
3032{
3033        if (p->sysctl_table) {
3034                struct neigh_sysctl_table *t = p->sysctl_table;
3035                p->sysctl_table = NULL;
3036                unregister_net_sysctl_table(t->sysctl_header);
3037                kfree(t);
3038        }
3039}
3040EXPORT_SYMBOL(neigh_sysctl_unregister);
3041
3042#endif  /* CONFIG_SYSCTL */
3043
3044static int __init neigh_init(void)
3045{
3046        rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3047        rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3048        rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3049
3050        rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3051                      NULL);
3052        rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3053
3054        return 0;
3055}
3056
3057subsys_initcall(neigh_init);
3058
3059