linux/drivers/net/cxgb4/l2t.c
<<
>>
Prefs
   1/*
   2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
   3 *
   4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <linux/skbuff.h>
  36#include <linux/netdevice.h>
  37#include <linux/if.h>
  38#include <linux/if_vlan.h>
  39#include <linux/jhash.h>
  40#include <net/neighbour.h>
  41#include "cxgb4.h"
  42#include "l2t.h"
  43#include "t4_msg.h"
  44#include "t4fw_api.h"
  45
  46#define VLAN_NONE 0xfff
  47
  48/* identifies sync vs async L2T_WRITE_REQs */
  49#define F_SYNC_WR    (1 << 12)
  50
  51enum {
  52        L2T_STATE_VALID,      /* entry is up to date */
  53        L2T_STATE_STALE,      /* entry may be used but needs revalidation */
  54        L2T_STATE_RESOLVING,  /* entry needs address resolution */
  55        L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
  56
  57        /* when state is one of the below the entry is not hashed */
  58        L2T_STATE_SWITCHING,  /* entry is being used by a switching filter */
  59        L2T_STATE_UNUSED      /* entry not in use */
  60};
  61
  62struct l2t_data {
  63        rwlock_t lock;
  64        atomic_t nfree;             /* number of free entries */
  65        struct l2t_entry *rover;    /* starting point for next allocation */
  66        struct l2t_entry l2tab[L2T_SIZE];
  67};
  68
  69static inline unsigned int vlan_prio(const struct l2t_entry *e)
  70{
  71        return e->vlan >> 13;
  72}
  73
  74static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
  75{
  76        if (atomic_add_return(1, &e->refcnt) == 1)  /* 0 -> 1 transition */
  77                atomic_dec(&d->nfree);
  78}
  79
  80/*
  81 * To avoid having to check address families we do not allow v4 and v6
  82 * neighbors to be on the same hash chain.  We keep v4 entries in the first
  83 * half of available hash buckets and v6 in the second.
  84 */
  85enum {
  86        L2T_SZ_HALF = L2T_SIZE / 2,
  87        L2T_HASH_MASK = L2T_SZ_HALF - 1
  88};
  89
  90static inline unsigned int arp_hash(const u32 *key, int ifindex)
  91{
  92        return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
  93}
  94
  95static inline unsigned int ipv6_hash(const u32 *key, int ifindex)
  96{
  97        u32 xor = key[0] ^ key[1] ^ key[2] ^ key[3];
  98
  99        return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
 100}
 101
 102static unsigned int addr_hash(const u32 *addr, int addr_len, int ifindex)
 103{
 104        return addr_len == 4 ? arp_hash(addr, ifindex) :
 105                               ipv6_hash(addr, ifindex);
 106}
 107
 108/*
 109 * Checks if an L2T entry is for the given IP/IPv6 address.  It does not check
 110 * whether the L2T entry and the address are of the same address family.
 111 * Callers ensure an address is only checked against L2T entries of the same
 112 * family, something made trivial by the separation of IP and IPv6 hash chains
 113 * mentioned above.  Returns 0 if there's a match,
 114 */
 115static int addreq(const struct l2t_entry *e, const u32 *addr)
 116{
 117        if (e->v6)
 118                return (e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) |
 119                       (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3]);
 120        return e->addr[0] ^ addr[0];
 121}
 122
 123static void neigh_replace(struct l2t_entry *e, struct neighbour *n)
 124{
 125        neigh_hold(n);
 126        if (e->neigh)
 127                neigh_release(e->neigh);
 128        e->neigh = n;
 129}
 130
 131/*
 132 * Write an L2T entry.  Must be called with the entry locked.
 133 * The write may be synchronous or asynchronous.
 134 */
 135static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync)
 136{
 137        struct sk_buff *skb;
 138        struct cpl_l2t_write_req *req;
 139
 140        skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
 141        if (!skb)
 142                return -ENOMEM;
 143
 144        req = (struct cpl_l2t_write_req *)__skb_put(skb, sizeof(*req));
 145        INIT_TP_WR(req, 0);
 146
 147        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ,
 148                                        e->idx | (sync ? F_SYNC_WR : 0) |
 149                                        TID_QID(adap->sge.fw_evtq.abs_id)));
 150        req->params = htons(L2T_W_PORT(e->lport) | L2T_W_NOREPLY(!sync));
 151        req->l2t_idx = htons(e->idx);
 152        req->vlan = htons(e->vlan);
 153        if (e->neigh)
 154                memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
 155        memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
 156
 157        set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
 158        t4_ofld_send(adap, skb);
 159
 160        if (sync && e->state != L2T_STATE_SWITCHING)
 161                e->state = L2T_STATE_SYNC_WRITE;
 162        return 0;
 163}
 164
 165/*
 166 * Send packets waiting in an L2T entry's ARP queue.  Must be called with the
 167 * entry locked.
 168 */
 169static void send_pending(struct adapter *adap, struct l2t_entry *e)
 170{
 171        while (e->arpq_head) {
 172                struct sk_buff *skb = e->arpq_head;
 173
 174                e->arpq_head = skb->next;
 175                skb->next = NULL;
 176                t4_ofld_send(adap, skb);
 177        }
 178        e->arpq_tail = NULL;
 179}
 180
 181/*
 182 * Process a CPL_L2T_WRITE_RPL.  Wake up the ARP queue if it completes a
 183 * synchronous L2T_WRITE.  Note that the TID in the reply is really the L2T
 184 * index it refers to.
 185 */
 186void do_l2t_write_rpl(struct adapter *adap, const struct cpl_l2t_write_rpl *rpl)
 187{
 188        unsigned int tid = GET_TID(rpl);
 189        unsigned int idx = tid & (L2T_SIZE - 1);
 190
 191        if (unlikely(rpl->status != CPL_ERR_NONE)) {
 192                dev_err(adap->pdev_dev,
 193                        "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
 194                        rpl->status, idx);
 195                return;
 196        }
 197
 198        if (tid & F_SYNC_WR) {
 199                struct l2t_entry *e = &adap->l2t->l2tab[idx];
 200
 201                spin_lock(&e->lock);
 202                if (e->state != L2T_STATE_SWITCHING) {
 203                        send_pending(adap, e);
 204                        e->state = (e->neigh->nud_state & NUD_STALE) ?
 205                                        L2T_STATE_STALE : L2T_STATE_VALID;
 206                }
 207                spin_unlock(&e->lock);
 208        }
 209}
 210
 211/*
 212 * Add a packet to an L2T entry's queue of packets awaiting resolution.
 213 * Must be called with the entry's lock held.
 214 */
 215static inline void arpq_enqueue(struct l2t_entry *e, struct sk_buff *skb)
 216{
 217        skb->next = NULL;
 218        if (e->arpq_head)
 219                e->arpq_tail->next = skb;
 220        else
 221                e->arpq_head = skb;
 222        e->arpq_tail = skb;
 223}
 224
 225int cxgb4_l2t_send(struct net_device *dev, struct sk_buff *skb,
 226                   struct l2t_entry *e)
 227{
 228        struct adapter *adap = netdev2adap(dev);
 229
 230again:
 231        switch (e->state) {
 232        case L2T_STATE_STALE:     /* entry is stale, kick off revalidation */
 233                neigh_event_send(e->neigh, NULL);
 234                spin_lock_bh(&e->lock);
 235                if (e->state == L2T_STATE_STALE)
 236                        e->state = L2T_STATE_VALID;
 237                spin_unlock_bh(&e->lock);
 238        case L2T_STATE_VALID:     /* fast-path, send the packet on */
 239                return t4_ofld_send(adap, skb);
 240        case L2T_STATE_RESOLVING:
 241        case L2T_STATE_SYNC_WRITE:
 242                spin_lock_bh(&e->lock);
 243                if (e->state != L2T_STATE_SYNC_WRITE &&
 244                    e->state != L2T_STATE_RESOLVING) {
 245                        spin_unlock_bh(&e->lock);
 246                        goto again;
 247                }
 248                arpq_enqueue(e, skb);
 249                spin_unlock_bh(&e->lock);
 250
 251                if (e->state == L2T_STATE_RESOLVING &&
 252                    !neigh_event_send(e->neigh, NULL)) {
 253                        spin_lock_bh(&e->lock);
 254                        if (e->state == L2T_STATE_RESOLVING && e->arpq_head)
 255                                write_l2e(adap, e, 1);
 256                        spin_unlock_bh(&e->lock);
 257                }
 258        }
 259        return 0;
 260}
 261EXPORT_SYMBOL(cxgb4_l2t_send);
 262
 263/*
 264 * Allocate a free L2T entry.  Must be called with l2t_data.lock held.
 265 */
 266static struct l2t_entry *alloc_l2e(struct l2t_data *d)
 267{
 268        struct l2t_entry *end, *e, **p;
 269
 270        if (!atomic_read(&d->nfree))
 271                return NULL;
 272
 273        /* there's definitely a free entry */
 274        for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
 275                if (atomic_read(&e->refcnt) == 0)
 276                        goto found;
 277
 278        for (e = d->l2tab; atomic_read(&e->refcnt); ++e)
 279                ;
 280found:
 281        d->rover = e + 1;
 282        atomic_dec(&d->nfree);
 283
 284        /*
 285         * The entry we found may be an inactive entry that is
 286         * presently in the hash table.  We need to remove it.
 287         */
 288        if (e->state < L2T_STATE_SWITCHING)
 289                for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next)
 290                        if (*p == e) {
 291                                *p = e->next;
 292                                e->next = NULL;
 293                                break;
 294                        }
 295
 296        e->state = L2T_STATE_UNUSED;
 297        return e;
 298}
 299
 300/*
 301 * Called when an L2T entry has no more users.
 302 */
 303static void t4_l2e_free(struct l2t_entry *e)
 304{
 305        struct l2t_data *d;
 306
 307        spin_lock_bh(&e->lock);
 308        if (atomic_read(&e->refcnt) == 0) {  /* hasn't been recycled */
 309                if (e->neigh) {
 310                        neigh_release(e->neigh);
 311                        e->neigh = NULL;
 312                }
 313                while (e->arpq_head) {
 314                        struct sk_buff *skb = e->arpq_head;
 315
 316                        e->arpq_head = skb->next;
 317                        kfree_skb(skb);
 318                }
 319                e->arpq_tail = NULL;
 320        }
 321        spin_unlock_bh(&e->lock);
 322
 323        d = container_of(e, struct l2t_data, l2tab[e->idx]);
 324        atomic_inc(&d->nfree);
 325}
 326
 327void cxgb4_l2t_release(struct l2t_entry *e)
 328{
 329        if (atomic_dec_and_test(&e->refcnt))
 330                t4_l2e_free(e);
 331}
 332EXPORT_SYMBOL(cxgb4_l2t_release);
 333
 334/*
 335 * Update an L2T entry that was previously used for the same next hop as neigh.
 336 * Must be called with softirqs disabled.
 337 */
 338static void reuse_entry(struct l2t_entry *e, struct neighbour *neigh)
 339{
 340        unsigned int nud_state;
 341
 342        spin_lock(&e->lock);                /* avoid race with t4_l2t_free */
 343        if (neigh != e->neigh)
 344                neigh_replace(e, neigh);
 345        nud_state = neigh->nud_state;
 346        if (memcmp(e->dmac, neigh->ha, sizeof(e->dmac)) ||
 347            !(nud_state & NUD_VALID))
 348                e->state = L2T_STATE_RESOLVING;
 349        else if (nud_state & NUD_CONNECTED)
 350                e->state = L2T_STATE_VALID;
 351        else
 352                e->state = L2T_STATE_STALE;
 353        spin_unlock(&e->lock);
 354}
 355
 356struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh,
 357                                const struct net_device *physdev,
 358                                unsigned int priority)
 359{
 360        u8 lport;
 361        u16 vlan;
 362        struct l2t_entry *e;
 363        int addr_len = neigh->tbl->key_len;
 364        u32 *addr = (u32 *)neigh->primary_key;
 365        int ifidx = neigh->dev->ifindex;
 366        int hash = addr_hash(addr, addr_len, ifidx);
 367
 368        if (neigh->dev->flags & IFF_LOOPBACK)
 369                lport = netdev2pinfo(physdev)->tx_chan + 4;
 370        else
 371                lport = netdev2pinfo(physdev)->lport;
 372
 373        if (neigh->dev->priv_flags & IFF_802_1Q_VLAN)
 374                vlan = vlan_dev_vlan_id(neigh->dev);
 375        else
 376                vlan = VLAN_NONE;
 377
 378        write_lock_bh(&d->lock);
 379        for (e = d->l2tab[hash].first; e; e = e->next)
 380                if (!addreq(e, addr) && e->ifindex == ifidx &&
 381                    e->vlan == vlan && e->lport == lport) {
 382                        l2t_hold(d, e);
 383                        if (atomic_read(&e->refcnt) == 1)
 384                                reuse_entry(e, neigh);
 385                        goto done;
 386                }
 387
 388        /* Need to allocate a new entry */
 389        e = alloc_l2e(d);
 390        if (e) {
 391                spin_lock(&e->lock);          /* avoid race with t4_l2t_free */
 392                e->state = L2T_STATE_RESOLVING;
 393                memcpy(e->addr, addr, addr_len);
 394                e->ifindex = ifidx;
 395                e->hash = hash;
 396                e->lport = lport;
 397                e->v6 = addr_len == 16;
 398                atomic_set(&e->refcnt, 1);
 399                neigh_replace(e, neigh);
 400                e->vlan = vlan;
 401                e->next = d->l2tab[hash].first;
 402                d->l2tab[hash].first = e;
 403                spin_unlock(&e->lock);
 404        }
 405done:
 406        write_unlock_bh(&d->lock);
 407        return e;
 408}
 409EXPORT_SYMBOL(cxgb4_l2t_get);
 410
 411/*
 412 * Called when address resolution fails for an L2T entry to handle packets
 413 * on the arpq head.  If a packet specifies a failure handler it is invoked,
 414 * otherwise the packet is sent to the device.
 415 */
 416static void handle_failed_resolution(struct adapter *adap, struct sk_buff *arpq)
 417{
 418        while (arpq) {
 419                struct sk_buff *skb = arpq;
 420                const struct l2t_skb_cb *cb = L2T_SKB_CB(skb);
 421
 422                arpq = skb->next;
 423                skb->next = NULL;
 424                if (cb->arp_err_handler)
 425                        cb->arp_err_handler(cb->handle, skb);
 426                else
 427                        t4_ofld_send(adap, skb);
 428        }
 429}
 430
 431/*
 432 * Called when the host's neighbor layer makes a change to some entry that is
 433 * loaded into the HW L2 table.
 434 */
 435void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
 436{
 437        struct l2t_entry *e;
 438        struct sk_buff *arpq = NULL;
 439        struct l2t_data *d = adap->l2t;
 440        int addr_len = neigh->tbl->key_len;
 441        u32 *addr = (u32 *) neigh->primary_key;
 442        int ifidx = neigh->dev->ifindex;
 443        int hash = addr_hash(addr, addr_len, ifidx);
 444
 445        read_lock_bh(&d->lock);
 446        for (e = d->l2tab[hash].first; e; e = e->next)
 447                if (!addreq(e, addr) && e->ifindex == ifidx) {
 448                        spin_lock(&e->lock);
 449                        if (atomic_read(&e->refcnt))
 450                                goto found;
 451                        spin_unlock(&e->lock);
 452                        break;
 453                }
 454        read_unlock_bh(&d->lock);
 455        return;
 456
 457 found:
 458        read_unlock(&d->lock);
 459
 460        if (neigh != e->neigh)
 461                neigh_replace(e, neigh);
 462
 463        if (e->state == L2T_STATE_RESOLVING) {
 464                if (neigh->nud_state & NUD_FAILED) {
 465                        arpq = e->arpq_head;
 466                        e->arpq_head = e->arpq_tail = NULL;
 467                } else if ((neigh->nud_state & (NUD_CONNECTED | NUD_STALE)) &&
 468                           e->arpq_head) {
 469                        write_l2e(adap, e, 1);
 470                }
 471        } else {
 472                e->state = neigh->nud_state & NUD_CONNECTED ?
 473                        L2T_STATE_VALID : L2T_STATE_STALE;
 474                if (memcmp(e->dmac, neigh->ha, sizeof(e->dmac)))
 475                        write_l2e(adap, e, 0);
 476        }
 477
 478        spin_unlock_bh(&e->lock);
 479
 480        if (arpq)
 481                handle_failed_resolution(adap, arpq);
 482}
 483
 484struct l2t_data *t4_init_l2t(void)
 485{
 486        int i;
 487        struct l2t_data *d;
 488
 489        d = t4_alloc_mem(sizeof(*d));
 490        if (!d)
 491                return NULL;
 492
 493        d->rover = d->l2tab;
 494        atomic_set(&d->nfree, L2T_SIZE);
 495        rwlock_init(&d->lock);
 496
 497        for (i = 0; i < L2T_SIZE; ++i) {
 498                d->l2tab[i].idx = i;
 499                d->l2tab[i].state = L2T_STATE_UNUSED;
 500                spin_lock_init(&d->l2tab[i].lock);
 501                atomic_set(&d->l2tab[i].refcnt, 0);
 502        }
 503        return d;
 504}
 505
 506#include <linux/module.h>
 507#include <linux/debugfs.h>
 508#include <linux/seq_file.h>
 509
 510static inline void *l2t_get_idx(struct seq_file *seq, loff_t pos)
 511{
 512        struct l2t_entry *l2tab = seq->private;
 513
 514        return pos >= L2T_SIZE ? NULL : &l2tab[pos];
 515}
 516
 517static void *l2t_seq_start(struct seq_file *seq, loff_t *pos)
 518{
 519        return *pos ? l2t_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 520}
 521
 522static void *l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 523{
 524        v = l2t_get_idx(seq, *pos);
 525        if (v)
 526                ++*pos;
 527        return v;
 528}
 529
 530static void l2t_seq_stop(struct seq_file *seq, void *v)
 531{
 532}
 533
 534static char l2e_state(const struct l2t_entry *e)
 535{
 536        switch (e->state) {
 537        case L2T_STATE_VALID: return 'V';
 538        case L2T_STATE_STALE: return 'S';
 539        case L2T_STATE_SYNC_WRITE: return 'W';
 540        case L2T_STATE_RESOLVING: return e->arpq_head ? 'A' : 'R';
 541        case L2T_STATE_SWITCHING: return 'X';
 542        default:
 543                return 'U';
 544        }
 545}
 546
 547static int l2t_seq_show(struct seq_file *seq, void *v)
 548{
 549        if (v == SEQ_START_TOKEN)
 550                seq_puts(seq, " Idx IP address                "
 551                         "Ethernet address  VLAN/P LP State Users Port\n");
 552        else {
 553                char ip[60];
 554                struct l2t_entry *e = v;
 555
 556                spin_lock_bh(&e->lock);
 557                if (e->state == L2T_STATE_SWITCHING)
 558                        ip[0] = '\0';
 559                else
 560                        sprintf(ip, e->v6 ? "%pI6c" : "%pI4", e->addr);
 561                seq_printf(seq, "%4u %-25s %17pM %4d %u %2u   %c   %5u %s\n",
 562                           e->idx, ip, e->dmac,
 563                           e->vlan & VLAN_VID_MASK, vlan_prio(e), e->lport,
 564                           l2e_state(e), atomic_read(&e->refcnt),
 565                           e->neigh ? e->neigh->dev->name : "");
 566                spin_unlock_bh(&e->lock);
 567        }
 568        return 0;
 569}
 570
 571static const struct seq_operations l2t_seq_ops = {
 572        .start = l2t_seq_start,
 573        .next = l2t_seq_next,
 574        .stop = l2t_seq_stop,
 575        .show = l2t_seq_show
 576};
 577
 578static int l2t_seq_open(struct inode *inode, struct file *file)
 579{
 580        int rc = seq_open(file, &l2t_seq_ops);
 581
 582        if (!rc) {
 583                struct adapter *adap = inode->i_private;
 584                struct seq_file *seq = file->private_data;
 585
 586                seq->private = adap->l2t->l2tab;
 587        }
 588        return rc;
 589}
 590
 591const struct file_operations t4_l2t_fops = {
 592        .owner = THIS_MODULE,
 593        .open = l2t_seq_open,
 594        .read = seq_read,
 595        .llseek = seq_lseek,
 596        .release = seq_release,
 597};
 598