linux/drivers/net/iseries_veth.c
<<
>>
Prefs
   1/* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. */
   2/*
   3 * IBM eServer iSeries Virtual Ethernet Device Driver
   4 * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
   5 * Substantially cleaned up by:
   6 * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
   7 * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation.
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License as
  11 * published by the Free Software Foundation; either version 2 of the
  12 * License, or (at your option) any later version.
  13 *
  14 * This program is distributed in the hope that it will be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public License
  20 * along with this program; if not, write to the Free Software
  21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  22 * USA
  23 *
  24 *
  25 * This module implements the virtual ethernet device for iSeries LPAR
  26 * Linux.  It uses hypervisor message passing to implement an
  27 * ethernet-like network device communicating between partitions on
  28 * the iSeries.
  29 *
  30 * The iSeries LPAR hypervisor currently allows for up to 16 different
  31 * virtual ethernets.  These are all dynamically configurable on
  32 * OS/400 partitions, but dynamic configuration is not supported under
  33 * Linux yet.  An ethXX network device will be created for each
  34 * virtual ethernet this partition is connected to.
  35 *
  36 * - This driver is responsible for routing packets to and from other
  37 *   partitions.  The MAC addresses used by the virtual ethernets
  38 *   contains meaning and must not be modified.
  39 *
  40 * - Having 2 virtual ethernets to the same remote partition DOES NOT
  41 *   double the available bandwidth.  The 2 devices will share the
  42 *   available hypervisor bandwidth.
  43 *
  44 * - If you send a packet to your own mac address, it will just be
  45 *   dropped, you won't get it on the receive side.
  46 *
  47 * - Multicast is implemented by sending the frame frame to every
  48 *   other partition.  It is the responsibility of the receiving
  49 *   partition to filter the addresses desired.
  50 *
  51 * Tunable parameters:
  52 *
  53 * VETH_NUMBUFFERS: This compile time option defaults to 120.  It
  54 * controls how much memory Linux will allocate per remote partition
  55 * it is communicating with.  It can be thought of as the maximum
  56 * number of packets outstanding to a remote partition at a time.
  57 */
  58
  59#include <linux/module.h>
  60#include <linux/types.h>
  61#include <linux/errno.h>
  62#include <linux/ioport.h>
  63#include <linux/kernel.h>
  64#include <linux/netdevice.h>
  65#include <linux/etherdevice.h>
  66#include <linux/skbuff.h>
  67#include <linux/init.h>
  68#include <linux/delay.h>
  69#include <linux/mm.h>
  70#include <linux/ethtool.h>
  71#include <linux/if_ether.h>
  72#include <linux/slab.h>
  73
  74#include <asm/abs_addr.h>
  75#include <asm/iseries/mf.h>
  76#include <asm/uaccess.h>
  77#include <asm/firmware.h>
  78#include <asm/iseries/hv_lp_config.h>
  79#include <asm/iseries/hv_types.h>
  80#include <asm/iseries/hv_lp_event.h>
  81#include <asm/iommu.h>
  82#include <asm/vio.h>
  83
  84#undef DEBUG
  85
  86MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
  87MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
  88MODULE_LICENSE("GPL");
  89
  90#define VETH_EVENT_CAP  (0)
  91#define VETH_EVENT_FRAMES       (1)
  92#define VETH_EVENT_MONITOR      (2)
  93#define VETH_EVENT_FRAMES_ACK   (3)
  94
  95#define VETH_MAX_ACKS_PER_MSG   (20)
  96#define VETH_MAX_FRAMES_PER_MSG (6)
  97
  98struct veth_frames_data {
  99        u32 addr[VETH_MAX_FRAMES_PER_MSG];
 100        u16 len[VETH_MAX_FRAMES_PER_MSG];
 101        u32 eofmask;
 102};
 103#define VETH_EOF_SHIFT          (32-VETH_MAX_FRAMES_PER_MSG)
 104
 105struct veth_frames_ack_data {
 106        u16 token[VETH_MAX_ACKS_PER_MSG];
 107};
 108
 109struct veth_cap_data {
 110        u8 caps_version;
 111        u8 rsvd1;
 112        u16 num_buffers;
 113        u16 ack_threshold;
 114        u16 rsvd2;
 115        u32 ack_timeout;
 116        u32 rsvd3;
 117        u64 rsvd4[3];
 118};
 119
 120struct veth_lpevent {
 121        struct HvLpEvent base_event;
 122        union {
 123                struct veth_cap_data caps_data;
 124                struct veth_frames_data frames_data;
 125                struct veth_frames_ack_data frames_ack_data;
 126        } u;
 127
 128};
 129
 130#define DRV_NAME        "iseries_veth"
 131#define DRV_VERSION     "2.0"
 132
 133#define VETH_NUMBUFFERS         (120)
 134#define VETH_ACKTIMEOUT         (1000000) /* microseconds */
 135#define VETH_MAX_MCAST          (12)
 136
 137#define VETH_MAX_MTU            (9000)
 138
 139#if VETH_NUMBUFFERS < 10
 140#define ACK_THRESHOLD           (1)
 141#elif VETH_NUMBUFFERS < 20
 142#define ACK_THRESHOLD           (4)
 143#elif VETH_NUMBUFFERS < 40
 144#define ACK_THRESHOLD           (10)
 145#else
 146#define ACK_THRESHOLD           (20)
 147#endif
 148
 149#define VETH_STATE_SHUTDOWN     (0x0001)
 150#define VETH_STATE_OPEN         (0x0002)
 151#define VETH_STATE_RESET        (0x0004)
 152#define VETH_STATE_SENTMON      (0x0008)
 153#define VETH_STATE_SENTCAPS     (0x0010)
 154#define VETH_STATE_GOTCAPACK    (0x0020)
 155#define VETH_STATE_GOTCAPS      (0x0040)
 156#define VETH_STATE_SENTCAPACK   (0x0080)
 157#define VETH_STATE_READY        (0x0100)
 158
 159struct veth_msg {
 160        struct veth_msg *next;
 161        struct veth_frames_data data;
 162        int token;
 163        int in_use;
 164        struct sk_buff *skb;
 165        struct device *dev;
 166};
 167
 168struct veth_lpar_connection {
 169        HvLpIndex remote_lp;
 170        struct delayed_work statemachine_wq;
 171        struct veth_msg *msgs;
 172        int num_events;
 173        struct veth_cap_data local_caps;
 174
 175        struct kobject kobject;
 176        struct timer_list ack_timer;
 177
 178        struct timer_list reset_timer;
 179        unsigned int reset_timeout;
 180        unsigned long last_contact;
 181        int outstanding_tx;
 182
 183        spinlock_t lock;
 184        unsigned long state;
 185        HvLpInstanceId src_inst;
 186        HvLpInstanceId dst_inst;
 187        struct veth_lpevent cap_event, cap_ack_event;
 188        u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
 189        u32 num_pending_acks;
 190
 191        int num_ack_events;
 192        struct veth_cap_data remote_caps;
 193        u32 ack_timeout;
 194
 195        struct veth_msg *msg_stack_head;
 196};
 197
 198struct veth_port {
 199        struct device *dev;
 200        u64 mac_addr;
 201        HvLpIndexMap lpar_map;
 202
 203        /* queue_lock protects the stopped_map and dev's queue. */
 204        spinlock_t queue_lock;
 205        HvLpIndexMap stopped_map;
 206
 207        /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */
 208        rwlock_t mcast_gate;
 209        int promiscuous;
 210        int num_mcast;
 211        u64 mcast_addr[VETH_MAX_MCAST];
 212
 213        struct kobject kobject;
 214};
 215
 216static HvLpIndex this_lp;
 217static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
 218static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
 219
 220static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
 221static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
 222static void veth_wake_queues(struct veth_lpar_connection *cnx);
 223static void veth_stop_queues(struct veth_lpar_connection *cnx);
 224static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *);
 225static void veth_release_connection(struct kobject *kobject);
 226static void veth_timed_ack(unsigned long ptr);
 227static void veth_timed_reset(unsigned long ptr);
 228
 229/*
 230 * Utility functions
 231 */
 232
 233#define veth_info(fmt, args...) \
 234        printk(KERN_INFO DRV_NAME ": " fmt, ## args)
 235
 236#define veth_error(fmt, args...) \
 237        printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args)
 238
 239#ifdef DEBUG
 240#define veth_debug(fmt, args...) \
 241        printk(KERN_DEBUG DRV_NAME ": " fmt, ## args)
 242#else
 243#define veth_debug(fmt, args...) do {} while (0)
 244#endif
 245
 246/* You must hold the connection's lock when you call this function. */
 247static inline void veth_stack_push(struct veth_lpar_connection *cnx,
 248                                   struct veth_msg *msg)
 249{
 250        msg->next = cnx->msg_stack_head;
 251        cnx->msg_stack_head = msg;
 252}
 253
 254/* You must hold the connection's lock when you call this function. */
 255static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
 256{
 257        struct veth_msg *msg;
 258
 259        msg = cnx->msg_stack_head;
 260        if (msg)
 261                cnx->msg_stack_head = cnx->msg_stack_head->next;
 262
 263        return msg;
 264}
 265
 266/* You must hold the connection's lock when you call this function. */
 267static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx)
 268{
 269        return cnx->msg_stack_head == NULL;
 270}
 271
 272static inline HvLpEvent_Rc
 273veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
 274                 HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
 275                 u64 token,
 276                 u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
 277{
 278        return HvCallEvent_signalLpEventFast(cnx->remote_lp,
 279                                             HvLpEvent_Type_VirtualLan,
 280                                             subtype, ackind, acktype,
 281                                             cnx->src_inst,
 282                                             cnx->dst_inst,
 283                                             token, data1, data2, data3,
 284                                             data4, data5);
 285}
 286
 287static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
 288                                           u16 subtype, u64 token, void *data)
 289{
 290        u64 *p = (u64 *) data;
 291
 292        return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
 293                                HvLpEvent_AckType_ImmediateAck,
 294                                token, p[0], p[1], p[2], p[3], p[4]);
 295}
 296
 297struct veth_allocation {
 298        struct completion c;
 299        int num;
 300};
 301
 302static void veth_complete_allocation(void *parm, int number)
 303{
 304        struct veth_allocation *vc = (struct veth_allocation *)parm;
 305
 306        vc->num = number;
 307        complete(&vc->c);
 308}
 309
 310static int veth_allocate_events(HvLpIndex rlp, int number)
 311{
 312        struct veth_allocation vc =
 313                { COMPLETION_INITIALIZER_ONSTACK(vc.c), 0 };
 314
 315        mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
 316                            sizeof(struct veth_lpevent), number,
 317                            &veth_complete_allocation, &vc);
 318        wait_for_completion(&vc.c);
 319
 320        return vc.num;
 321}
 322
 323/*
 324 * sysfs support
 325 */
 326
 327struct veth_cnx_attribute {
 328        struct attribute attr;
 329        ssize_t (*show)(struct veth_lpar_connection *, char *buf);
 330        ssize_t (*store)(struct veth_lpar_connection *, const char *buf);
 331};
 332
 333static ssize_t veth_cnx_attribute_show(struct kobject *kobj,
 334                struct attribute *attr, char *buf)
 335{
 336        struct veth_cnx_attribute *cnx_attr;
 337        struct veth_lpar_connection *cnx;
 338
 339        cnx_attr = container_of(attr, struct veth_cnx_attribute, attr);
 340        cnx = container_of(kobj, struct veth_lpar_connection, kobject);
 341
 342        if (!cnx_attr->show)
 343                return -EIO;
 344
 345        return cnx_attr->show(cnx, buf);
 346}
 347
 348#define CUSTOM_CNX_ATTR(_name, _format, _expression)                    \
 349static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\
 350{                                                                       \
 351        return sprintf(buf, _format, _expression);                      \
 352}                                                                       \
 353struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)
 354
 355#define SIMPLE_CNX_ATTR(_name)  \
 356        CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)
 357
 358SIMPLE_CNX_ATTR(outstanding_tx);
 359SIMPLE_CNX_ATTR(remote_lp);
 360SIMPLE_CNX_ATTR(num_events);
 361SIMPLE_CNX_ATTR(src_inst);
 362SIMPLE_CNX_ATTR(dst_inst);
 363SIMPLE_CNX_ATTR(num_pending_acks);
 364SIMPLE_CNX_ATTR(num_ack_events);
 365CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));
 366CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));
 367CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);
 368CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ?
 369                jiffies_to_msecs(jiffies - cnx->last_contact) : 0);
 370
 371#define GET_CNX_ATTR(_name)     (&veth_cnx_attr_##_name.attr)
 372
 373static struct attribute *veth_cnx_default_attrs[] = {
 374        GET_CNX_ATTR(outstanding_tx),
 375        GET_CNX_ATTR(remote_lp),
 376        GET_CNX_ATTR(num_events),
 377        GET_CNX_ATTR(reset_timeout),
 378        GET_CNX_ATTR(last_contact),
 379        GET_CNX_ATTR(state),
 380        GET_CNX_ATTR(src_inst),
 381        GET_CNX_ATTR(dst_inst),
 382        GET_CNX_ATTR(num_pending_acks),
 383        GET_CNX_ATTR(num_ack_events),
 384        GET_CNX_ATTR(ack_timeout),
 385        NULL
 386};
 387
 388static const struct sysfs_ops veth_cnx_sysfs_ops = {
 389                .show = veth_cnx_attribute_show
 390};
 391
 392static struct kobj_type veth_lpar_connection_ktype = {
 393        .release        = veth_release_connection,
 394        .sysfs_ops      = &veth_cnx_sysfs_ops,
 395        .default_attrs  = veth_cnx_default_attrs
 396};
 397
 398struct veth_port_attribute {
 399        struct attribute attr;
 400        ssize_t (*show)(struct veth_port *, char *buf);
 401        ssize_t (*store)(struct veth_port *, const char *buf);
 402};
 403
 404static ssize_t veth_port_attribute_show(struct kobject *kobj,
 405                struct attribute *attr, char *buf)
 406{
 407        struct veth_port_attribute *port_attr;
 408        struct veth_port *port;
 409
 410        port_attr = container_of(attr, struct veth_port_attribute, attr);
 411        port = container_of(kobj, struct veth_port, kobject);
 412
 413        if (!port_attr->show)
 414                return -EIO;
 415
 416        return port_attr->show(port, buf);
 417}
 418
 419#define CUSTOM_PORT_ATTR(_name, _format, _expression)                   \
 420static ssize_t _name##_show(struct veth_port *port, char *buf)          \
 421{                                                                       \
 422        return sprintf(buf, _format, _expression);                      \
 423}                                                                       \
 424struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name)
 425
 426#define SIMPLE_PORT_ATTR(_name) \
 427        CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name)
 428
 429SIMPLE_PORT_ATTR(promiscuous);
 430SIMPLE_PORT_ATTR(num_mcast);
 431CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map);
 432CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map);
 433CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr);
 434
 435#define GET_PORT_ATTR(_name)    (&veth_port_attr_##_name.attr)
 436static struct attribute *veth_port_default_attrs[] = {
 437        GET_PORT_ATTR(mac_addr),
 438        GET_PORT_ATTR(lpar_map),
 439        GET_PORT_ATTR(stopped_map),
 440        GET_PORT_ATTR(promiscuous),
 441        GET_PORT_ATTR(num_mcast),
 442        NULL
 443};
 444
 445static const struct sysfs_ops veth_port_sysfs_ops = {
 446        .show = veth_port_attribute_show
 447};
 448
 449static struct kobj_type veth_port_ktype = {
 450        .sysfs_ops      = &veth_port_sysfs_ops,
 451        .default_attrs  = veth_port_default_attrs
 452};
 453
 454/*
 455 * LPAR connection code
 456 */
 457
 458static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
 459{
 460        schedule_delayed_work(&cnx->statemachine_wq, 0);
 461}
 462
 463static void veth_take_cap(struct veth_lpar_connection *cnx,
 464                          struct veth_lpevent *event)
 465{
 466        unsigned long flags;
 467
 468        spin_lock_irqsave(&cnx->lock, flags);
 469        /* Receiving caps may mean the other end has just come up, so
 470         * we need to reload the instance ID of the far end */
 471        cnx->dst_inst =
 472                HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
 473                                                  HvLpEvent_Type_VirtualLan);
 474
 475        if (cnx->state & VETH_STATE_GOTCAPS) {
 476                veth_error("Received a second capabilities from LPAR %d.\n",
 477                           cnx->remote_lp);
 478                event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
 479                HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
 480        } else {
 481                memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
 482                cnx->state |= VETH_STATE_GOTCAPS;
 483                veth_kick_statemachine(cnx);
 484        }
 485        spin_unlock_irqrestore(&cnx->lock, flags);
 486}
 487
 488static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
 489                              struct veth_lpevent *event)
 490{
 491        unsigned long flags;
 492
 493        spin_lock_irqsave(&cnx->lock, flags);
 494        if (cnx->state & VETH_STATE_GOTCAPACK) {
 495                veth_error("Received a second capabilities ack from LPAR %d.\n",
 496                           cnx->remote_lp);
 497        } else {
 498                memcpy(&cnx->cap_ack_event, event,
 499                       sizeof(cnx->cap_ack_event));
 500                cnx->state |= VETH_STATE_GOTCAPACK;
 501                veth_kick_statemachine(cnx);
 502        }
 503        spin_unlock_irqrestore(&cnx->lock, flags);
 504}
 505
 506static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
 507                                  struct veth_lpevent *event)
 508{
 509        unsigned long flags;
 510
 511        spin_lock_irqsave(&cnx->lock, flags);
 512        veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
 513
 514        /* Avoid kicking the statemachine once we're shutdown.
 515         * It's unnecessary and it could break veth_stop_connection(). */
 516
 517        if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
 518                cnx->state |= VETH_STATE_RESET;
 519                veth_kick_statemachine(cnx);
 520        }
 521        spin_unlock_irqrestore(&cnx->lock, flags);
 522}
 523
 524static void veth_handle_ack(struct veth_lpevent *event)
 525{
 526        HvLpIndex rlp = event->base_event.xTargetLp;
 527        struct veth_lpar_connection *cnx = veth_cnx[rlp];
 528
 529        BUG_ON(! cnx);
 530
 531        switch (event->base_event.xSubtype) {
 532        case VETH_EVENT_CAP:
 533                veth_take_cap_ack(cnx, event);
 534                break;
 535        case VETH_EVENT_MONITOR:
 536                veth_take_monitor_ack(cnx, event);
 537                break;
 538        default:
 539                veth_error("Unknown ack type %d from LPAR %d.\n",
 540                                event->base_event.xSubtype, rlp);
 541        };
 542}
 543
 544static void veth_handle_int(struct veth_lpevent *event)
 545{
 546        HvLpIndex rlp = event->base_event.xSourceLp;
 547        struct veth_lpar_connection *cnx = veth_cnx[rlp];
 548        unsigned long flags;
 549        int i, acked = 0;
 550
 551        BUG_ON(! cnx);
 552
 553        switch (event->base_event.xSubtype) {
 554        case VETH_EVENT_CAP:
 555                veth_take_cap(cnx, event);
 556                break;
 557        case VETH_EVENT_MONITOR:
 558                /* do nothing... this'll hang out here til we're dead,
 559                 * and the hypervisor will return it for us. */
 560                break;
 561        case VETH_EVENT_FRAMES_ACK:
 562                spin_lock_irqsave(&cnx->lock, flags);
 563
 564                for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
 565                        u16 msgnum = event->u.frames_ack_data.token[i];
 566
 567                        if (msgnum < VETH_NUMBUFFERS) {
 568                                veth_recycle_msg(cnx, cnx->msgs + msgnum);
 569                                cnx->outstanding_tx--;
 570                                acked++;
 571                        }
 572                }
 573
 574                if (acked > 0) {
 575                        cnx->last_contact = jiffies;
 576                        veth_wake_queues(cnx);
 577                }
 578
 579                spin_unlock_irqrestore(&cnx->lock, flags);
 580                break;
 581        case VETH_EVENT_FRAMES:
 582                veth_receive(cnx, event);
 583                break;
 584        default:
 585                veth_error("Unknown interrupt type %d from LPAR %d.\n",
 586                                event->base_event.xSubtype, rlp);
 587        };
 588}
 589
 590static void veth_handle_event(struct HvLpEvent *event)
 591{
 592        struct veth_lpevent *veth_event = (struct veth_lpevent *)event;
 593
 594        if (hvlpevent_is_ack(event))
 595                veth_handle_ack(veth_event);
 596        else
 597                veth_handle_int(veth_event);
 598}
 599
 600static int veth_process_caps(struct veth_lpar_connection *cnx)
 601{
 602        struct veth_cap_data *remote_caps = &cnx->remote_caps;
 603        int num_acks_needed;
 604
 605        /* Convert timer to jiffies */
 606        cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
 607
 608        if ( (remote_caps->num_buffers == 0) ||
 609             (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG) ||
 610             (remote_caps->ack_threshold == 0) ||
 611             (cnx->ack_timeout == 0) ) {
 612                veth_error("Received incompatible capabilities from LPAR %d.\n",
 613                                cnx->remote_lp);
 614                return HvLpEvent_Rc_InvalidSubtypeData;
 615        }
 616
 617        num_acks_needed = (remote_caps->num_buffers
 618                           / remote_caps->ack_threshold) + 1;
 619
 620        /* FIXME: locking on num_ack_events? */
 621        if (cnx->num_ack_events < num_acks_needed) {
 622                int num;
 623
 624                num = veth_allocate_events(cnx->remote_lp,
 625                                           num_acks_needed-cnx->num_ack_events);
 626                if (num > 0)
 627                        cnx->num_ack_events += num;
 628
 629                if (cnx->num_ack_events < num_acks_needed) {
 630                        veth_error("Couldn't allocate enough ack events "
 631                                        "for LPAR %d.\n", cnx->remote_lp);
 632
 633                        return HvLpEvent_Rc_BufferNotAvailable;
 634                }
 635        }
 636
 637
 638        return HvLpEvent_Rc_Good;
 639}
 640
 641/* FIXME: The gotos here are a bit dubious */
 642static void veth_statemachine(struct work_struct *work)
 643{
 644        struct veth_lpar_connection *cnx =
 645                container_of(work, struct veth_lpar_connection,
 646                             statemachine_wq.work);
 647        int rlp = cnx->remote_lp;
 648        int rc;
 649
 650        spin_lock_irq(&cnx->lock);
 651
 652 restart:
 653        if (cnx->state & VETH_STATE_RESET) {
 654                if (cnx->state & VETH_STATE_OPEN)
 655                        HvCallEvent_closeLpEventPath(cnx->remote_lp,
 656                                                     HvLpEvent_Type_VirtualLan);
 657
 658                /*
 659                 * Reset ack data. This prevents the ack_timer actually
 660                 * doing anything, even if it runs one more time when
 661                 * we drop the lock below.
 662                 */
 663                memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
 664                cnx->num_pending_acks = 0;
 665
 666                cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
 667                                | VETH_STATE_OPEN | VETH_STATE_SENTCAPS
 668                                | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
 669                                | VETH_STATE_SENTCAPACK | VETH_STATE_READY);
 670
 671                /* Clean up any leftover messages */
 672                if (cnx->msgs) {
 673                        int i;
 674                        for (i = 0; i < VETH_NUMBUFFERS; ++i)
 675                                veth_recycle_msg(cnx, cnx->msgs + i);
 676                }
 677
 678                cnx->outstanding_tx = 0;
 679                veth_wake_queues(cnx);
 680
 681                /* Drop the lock so we can do stuff that might sleep or
 682                 * take other locks. */
 683                spin_unlock_irq(&cnx->lock);
 684
 685                del_timer_sync(&cnx->ack_timer);
 686                del_timer_sync(&cnx->reset_timer);
 687
 688                spin_lock_irq(&cnx->lock);
 689
 690                if (cnx->state & VETH_STATE_RESET)
 691                        goto restart;
 692
 693                /* Hack, wait for the other end to reset itself. */
 694                if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
 695                        schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
 696                        goto out;
 697                }
 698        }
 699
 700        if (cnx->state & VETH_STATE_SHUTDOWN)
 701                /* It's all over, do nothing */
 702                goto out;
 703
 704        if ( !(cnx->state & VETH_STATE_OPEN) ) {
 705                if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
 706                        goto cant_cope;
 707
 708                HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
 709                cnx->src_inst =
 710                        HvCallEvent_getSourceLpInstanceId(rlp,
 711                                                          HvLpEvent_Type_VirtualLan);
 712                cnx->dst_inst =
 713                        HvCallEvent_getTargetLpInstanceId(rlp,
 714                                                          HvLpEvent_Type_VirtualLan);
 715                cnx->state |= VETH_STATE_OPEN;
 716        }
 717
 718        if ( (cnx->state & VETH_STATE_OPEN) &&
 719             !(cnx->state & VETH_STATE_SENTMON) ) {
 720                rc = veth_signalevent(cnx, VETH_EVENT_MONITOR,
 721                                      HvLpEvent_AckInd_DoAck,
 722                                      HvLpEvent_AckType_DeferredAck,
 723                                      0, 0, 0, 0, 0, 0);
 724
 725                if (rc == HvLpEvent_Rc_Good) {
 726                        cnx->state |= VETH_STATE_SENTMON;
 727                } else {
 728                        if ( (rc != HvLpEvent_Rc_PartitionDead) &&
 729                             (rc != HvLpEvent_Rc_PathClosed) )
 730                                veth_error("Error sending monitor to LPAR %d, "
 731                                                "rc = %d\n", rlp, rc);
 732
 733                        /* Oh well, hope we get a cap from the other
 734                         * end and do better when that kicks us */
 735                        goto out;
 736                }
 737        }
 738
 739        if ( (cnx->state & VETH_STATE_OPEN) &&
 740             !(cnx->state & VETH_STATE_SENTCAPS)) {
 741                u64 *rawcap = (u64 *)&cnx->local_caps;
 742
 743                rc = veth_signalevent(cnx, VETH_EVENT_CAP,
 744                                      HvLpEvent_AckInd_DoAck,
 745                                      HvLpEvent_AckType_ImmediateAck,
 746                                      0, rawcap[0], rawcap[1], rawcap[2],
 747                                      rawcap[3], rawcap[4]);
 748
 749                if (rc == HvLpEvent_Rc_Good) {
 750                        cnx->state |= VETH_STATE_SENTCAPS;
 751                } else {
 752                        if ( (rc != HvLpEvent_Rc_PartitionDead) &&
 753                             (rc != HvLpEvent_Rc_PathClosed) )
 754                                veth_error("Error sending caps to LPAR %d, "
 755                                                "rc = %d\n", rlp, rc);
 756
 757                        /* Oh well, hope we get a cap from the other
 758                         * end and do better when that kicks us */
 759                        goto out;
 760                }
 761        }
 762
 763        if ((cnx->state & VETH_STATE_GOTCAPS) &&
 764            !(cnx->state & VETH_STATE_SENTCAPACK)) {
 765                struct veth_cap_data *remote_caps = &cnx->remote_caps;
 766
 767                memcpy(remote_caps, &cnx->cap_event.u.caps_data,
 768                       sizeof(*remote_caps));
 769
 770                spin_unlock_irq(&cnx->lock);
 771                rc = veth_process_caps(cnx);
 772                spin_lock_irq(&cnx->lock);
 773
 774                /* We dropped the lock, so recheck for anything which
 775                 * might mess us up */
 776                if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
 777                        goto restart;
 778
 779                cnx->cap_event.base_event.xRc = rc;
 780                HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
 781                if (rc == HvLpEvent_Rc_Good)
 782                        cnx->state |= VETH_STATE_SENTCAPACK;
 783                else
 784                        goto cant_cope;
 785        }
 786
 787        if ((cnx->state & VETH_STATE_GOTCAPACK) &&
 788            (cnx->state & VETH_STATE_GOTCAPS) &&
 789            !(cnx->state & VETH_STATE_READY)) {
 790                if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
 791                        /* Start the ACK timer */
 792                        cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
 793                        add_timer(&cnx->ack_timer);
 794                        cnx->state |= VETH_STATE_READY;
 795                } else {
 796                        veth_error("Caps rejected by LPAR %d, rc = %d\n",
 797                                        rlp, cnx->cap_ack_event.base_event.xRc);
 798                        goto cant_cope;
 799                }
 800        }
 801
 802 out:
 803        spin_unlock_irq(&cnx->lock);
 804        return;
 805
 806 cant_cope:
 807        /* FIXME: we get here if something happens we really can't
 808         * cope with.  The link will never work once we get here, and
 809         * all we can do is not lock the rest of the system up */
 810        veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
 811                        " (state = 0x%04lx)\n", rlp, cnx->state);
 812        cnx->state |= VETH_STATE_SHUTDOWN;
 813        spin_unlock_irq(&cnx->lock);
 814}
 815
 816static int veth_init_connection(u8 rlp)
 817{
 818        struct veth_lpar_connection *cnx;
 819        struct veth_msg *msgs;
 820        int i;
 821
 822        if ( (rlp == this_lp) ||
 823             ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
 824                return 0;
 825
 826        cnx = kzalloc(sizeof(*cnx), GFP_KERNEL);
 827        if (! cnx)
 828                return -ENOMEM;
 829
 830        cnx->remote_lp = rlp;
 831        spin_lock_init(&cnx->lock);
 832        INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine);
 833
 834        init_timer(&cnx->ack_timer);
 835        cnx->ack_timer.function = veth_timed_ack;
 836        cnx->ack_timer.data = (unsigned long) cnx;
 837
 838        init_timer(&cnx->reset_timer);
 839        cnx->reset_timer.function = veth_timed_reset;
 840        cnx->reset_timer.data = (unsigned long) cnx;
 841        cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000);
 842
 843        memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
 844
 845        veth_cnx[rlp] = cnx;
 846
 847        /* This gets us 1 reference, which is held on behalf of the driver
 848         * infrastructure. It's released at module unload. */
 849        kobject_init(&cnx->kobject, &veth_lpar_connection_ktype);
 850
 851        msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL);
 852        if (! msgs) {
 853                veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
 854                return -ENOMEM;
 855        }
 856
 857        cnx->msgs = msgs;
 858
 859        for (i = 0; i < VETH_NUMBUFFERS; i++) {
 860                msgs[i].token = i;
 861                veth_stack_push(cnx, msgs + i);
 862        }
 863
 864        cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
 865
 866        if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
 867                veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
 868                return -ENOMEM;
 869        }
 870
 871        cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
 872        cnx->local_caps.ack_threshold = ACK_THRESHOLD;
 873        cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
 874
 875        return 0;
 876}
 877
 878static void veth_stop_connection(struct veth_lpar_connection *cnx)
 879{
 880        if (!cnx)
 881                return;
 882
 883        spin_lock_irq(&cnx->lock);
 884        cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
 885        veth_kick_statemachine(cnx);
 886        spin_unlock_irq(&cnx->lock);
 887
 888        /* ensure the statemachine runs now and waits for its completion */
 889        flush_delayed_work_sync(&cnx->statemachine_wq);
 890}
 891
 892static void veth_destroy_connection(struct veth_lpar_connection *cnx)
 893{
 894        if (!cnx)
 895                return;
 896
 897        if (cnx->num_events > 0)
 898                mf_deallocate_lp_events(cnx->remote_lp,
 899                                      HvLpEvent_Type_VirtualLan,
 900                                      cnx->num_events,
 901                                      NULL, NULL);
 902        if (cnx->num_ack_events > 0)
 903                mf_deallocate_lp_events(cnx->remote_lp,
 904                                      HvLpEvent_Type_VirtualLan,
 905                                      cnx->num_ack_events,
 906                                      NULL, NULL);
 907
 908        kfree(cnx->msgs);
 909        veth_cnx[cnx->remote_lp] = NULL;
 910        kfree(cnx);
 911}
 912
 913static void veth_release_connection(struct kobject *kobj)
 914{
 915        struct veth_lpar_connection *cnx;
 916        cnx = container_of(kobj, struct veth_lpar_connection, kobject);
 917        veth_stop_connection(cnx);
 918        veth_destroy_connection(cnx);
 919}
 920
 921/*
 922 * net_device code
 923 */
 924
 925static int veth_open(struct net_device *dev)
 926{
 927        netif_start_queue(dev);
 928        return 0;
 929}
 930
 931static int veth_close(struct net_device *dev)
 932{
 933        netif_stop_queue(dev);
 934        return 0;
 935}
 936
 937static int veth_change_mtu(struct net_device *dev, int new_mtu)
 938{
 939        if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
 940                return -EINVAL;
 941        dev->mtu = new_mtu;
 942        return 0;
 943}
 944
 945static void veth_set_multicast_list(struct net_device *dev)
 946{
 947        struct veth_port *port = netdev_priv(dev);
 948        unsigned long flags;
 949
 950        write_lock_irqsave(&port->mcast_gate, flags);
 951
 952        if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
 953                        (netdev_mc_count(dev) > VETH_MAX_MCAST)) {
 954                port->promiscuous = 1;
 955        } else {
 956                struct netdev_hw_addr *ha;
 957
 958                port->promiscuous = 0;
 959
 960                /* Update table */
 961                port->num_mcast = 0;
 962
 963                netdev_for_each_mc_addr(ha, dev) {
 964                        u8 *addr = ha->addr;
 965                        u64 xaddr = 0;
 966
 967                        if (addr[0] & 0x01) {/* multicast address? */
 968                                memcpy(&xaddr, addr, ETH_ALEN);
 969                                port->mcast_addr[port->num_mcast] = xaddr;
 970                                port->num_mcast++;
 971                        }
 972                }
 973        }
 974
 975        write_unlock_irqrestore(&port->mcast_gate, flags);
 976}
 977
 978static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 979{
 980        strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1);
 981        info->driver[sizeof(info->driver) - 1] = '\0';
 982        strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1);
 983        info->version[sizeof(info->version) - 1] = '\0';
 984}
 985
 986static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 987{
 988        ecmd->supported = (SUPPORTED_1000baseT_Full
 989                          | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
 990        ecmd->advertising = (SUPPORTED_1000baseT_Full
 991                            | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
 992        ecmd->port = PORT_FIBRE;
 993        ecmd->transceiver = XCVR_INTERNAL;
 994        ecmd->phy_address = 0;
 995        ecmd->speed = SPEED_1000;
 996        ecmd->duplex = DUPLEX_FULL;
 997        ecmd->autoneg = AUTONEG_ENABLE;
 998        ecmd->maxtxpkt = 120;
 999        ecmd->maxrxpkt = 120;
1000        return 0;
1001}
1002
1003static const struct ethtool_ops ops = {
1004        .get_drvinfo = veth_get_drvinfo,
1005        .get_settings = veth_get_settings,
1006        .get_link = ethtool_op_get_link,
1007};
1008
1009static const struct net_device_ops veth_netdev_ops = {
1010        .ndo_open               = veth_open,
1011        .ndo_stop               = veth_close,
1012        .ndo_start_xmit         = veth_start_xmit,
1013        .ndo_change_mtu         = veth_change_mtu,
1014        .ndo_set_multicast_list = veth_set_multicast_list,
1015        .ndo_set_mac_address    = NULL,
1016        .ndo_validate_addr      = eth_validate_addr,
1017};
1018
1019static struct net_device *veth_probe_one(int vlan,
1020                struct vio_dev *vio_dev)
1021{
1022        struct net_device *dev;
1023        struct veth_port *port;
1024        struct device *vdev = &vio_dev->dev;
1025        int i, rc;
1026        const unsigned char *mac_addr;
1027
1028        mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL);
1029        if (mac_addr == NULL)
1030                mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL);
1031        if (mac_addr == NULL) {
1032                veth_error("Unable to fetch MAC address from device tree.\n");
1033                return NULL;
1034        }
1035
1036        dev = alloc_etherdev(sizeof (struct veth_port));
1037        if (! dev) {
1038                veth_error("Unable to allocate net_device structure!\n");
1039                return NULL;
1040        }
1041
1042        port = netdev_priv(dev);
1043
1044        spin_lock_init(&port->queue_lock);
1045        rwlock_init(&port->mcast_gate);
1046        port->stopped_map = 0;
1047
1048        for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1049                HvLpVirtualLanIndexMap map;
1050
1051                if (i == this_lp)
1052                        continue;
1053                map = HvLpConfig_getVirtualLanIndexMapForLp(i);
1054                if (map & (0x8000 >> vlan))
1055                        port->lpar_map |= (1 << i);
1056        }
1057        port->dev = vdev;
1058
1059        memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
1060
1061        dev->mtu = VETH_MAX_MTU;
1062
1063        memcpy(&port->mac_addr, mac_addr, ETH_ALEN);
1064
1065        dev->netdev_ops = &veth_netdev_ops;
1066        SET_ETHTOOL_OPS(dev, &ops);
1067
1068        SET_NETDEV_DEV(dev, vdev);
1069
1070        rc = register_netdev(dev);
1071        if (rc != 0) {
1072                veth_error("Failed registering net device for vlan%d.\n", vlan);
1073                free_netdev(dev);
1074                return NULL;
1075        }
1076
1077        kobject_init(&port->kobject, &veth_port_ktype);
1078        if (0 != kobject_add(&port->kobject, &dev->dev.kobj, "veth_port"))
1079                veth_error("Failed adding port for %s to sysfs.\n", dev->name);
1080
1081        veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
1082                        dev->name, vlan, port->lpar_map);
1083
1084        return dev;
1085}
1086
1087/*
1088 * Tx path
1089 */
1090
1091static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
1092                                struct net_device *dev)
1093{
1094        struct veth_lpar_connection *cnx = veth_cnx[rlp];
1095        struct veth_port *port = netdev_priv(dev);
1096        HvLpEvent_Rc rc;
1097        struct veth_msg *msg = NULL;
1098        unsigned long flags;
1099
1100        if (! cnx)
1101                return 0;
1102
1103        spin_lock_irqsave(&cnx->lock, flags);
1104
1105        if (! (cnx->state & VETH_STATE_READY))
1106                goto no_error;
1107
1108        if ((skb->len - ETH_HLEN) > VETH_MAX_MTU)
1109                goto drop;
1110
1111        msg = veth_stack_pop(cnx);
1112        if (! msg)
1113                goto drop;
1114
1115        msg->in_use = 1;
1116        msg->skb = skb_get(skb);
1117
1118        msg->data.addr[0] = dma_map_single(port->dev, skb->data,
1119                                skb->len, DMA_TO_DEVICE);
1120
1121        if (dma_mapping_error(port->dev, msg->data.addr[0]))
1122                goto recycle_and_drop;
1123
1124        msg->dev = port->dev;
1125        msg->data.len[0] = skb->len;
1126        msg->data.eofmask = 1 << VETH_EOF_SHIFT;
1127
1128        rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data);
1129
1130        if (rc != HvLpEvent_Rc_Good)
1131                goto recycle_and_drop;
1132
1133        /* If the timer's not already running, start it now. */
1134        if (0 == cnx->outstanding_tx)
1135                mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout);
1136
1137        cnx->last_contact = jiffies;
1138        cnx->outstanding_tx++;
1139
1140        if (veth_stack_is_empty(cnx))
1141                veth_stop_queues(cnx);
1142
1143 no_error:
1144        spin_unlock_irqrestore(&cnx->lock, flags);
1145        return 0;
1146
1147 recycle_and_drop:
1148        veth_recycle_msg(cnx, msg);
1149 drop:
1150        spin_unlock_irqrestore(&cnx->lock, flags);
1151        return 1;
1152}
1153
1154static void veth_transmit_to_many(struct sk_buff *skb,
1155                                          HvLpIndexMap lpmask,
1156                                          struct net_device *dev)
1157{
1158        int i, success, error;
1159
1160        success = error = 0;
1161
1162        for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1163                if ((lpmask & (1 << i)) == 0)
1164                        continue;
1165
1166                if (veth_transmit_to_one(skb, i, dev))
1167                        error = 1;
1168                else
1169                        success = 1;
1170        }
1171
1172        if (error)
1173                dev->stats.tx_errors++;
1174
1175        if (success) {
1176                dev->stats.tx_packets++;
1177                dev->stats.tx_bytes += skb->len;
1178        }
1179}
1180
1181static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1182{
1183        unsigned char *frame = skb->data;
1184        struct veth_port *port = netdev_priv(dev);
1185        HvLpIndexMap lpmask;
1186
1187        if (! (frame[0] & 0x01)) {
1188                /* unicast packet */
1189                HvLpIndex rlp = frame[5];
1190
1191                if ( ! ((1 << rlp) & port->lpar_map) ) {
1192                        dev_kfree_skb(skb);
1193                        return NETDEV_TX_OK;
1194                }
1195
1196                lpmask = 1 << rlp;
1197        } else {
1198                lpmask = port->lpar_map;
1199        }
1200
1201        veth_transmit_to_many(skb, lpmask, dev);
1202
1203        dev_kfree_skb(skb);
1204
1205        return NETDEV_TX_OK;
1206}
1207
1208/* You must hold the connection's lock when you call this function. */
1209static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1210                             struct veth_msg *msg)
1211{
1212        u32 dma_address, dma_length;
1213
1214        if (msg->in_use) {
1215                msg->in_use = 0;
1216                dma_address = msg->data.addr[0];
1217                dma_length = msg->data.len[0];
1218
1219                if (!dma_mapping_error(msg->dev, dma_address))
1220                        dma_unmap_single(msg->dev, dma_address, dma_length,
1221                                        DMA_TO_DEVICE);
1222
1223                if (msg->skb) {
1224                        dev_kfree_skb_any(msg->skb);
1225                        msg->skb = NULL;
1226                }
1227
1228                memset(&msg->data, 0, sizeof(msg->data));
1229                veth_stack_push(cnx, msg);
1230        } else if (cnx->state & VETH_STATE_OPEN) {
1231                veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1232                                cnx->remote_lp, msg->token);
1233        }
1234}
1235
1236static void veth_wake_queues(struct veth_lpar_connection *cnx)
1237{
1238        int i;
1239
1240        for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1241                struct net_device *dev = veth_dev[i];
1242                struct veth_port *port;
1243                unsigned long flags;
1244
1245                if (! dev)
1246                        continue;
1247
1248                port = netdev_priv(dev);
1249
1250                if (! (port->lpar_map & (1<<cnx->remote_lp)))
1251                        continue;
1252
1253                spin_lock_irqsave(&port->queue_lock, flags);
1254
1255                port->stopped_map &= ~(1 << cnx->remote_lp);
1256
1257                if (0 == port->stopped_map && netif_queue_stopped(dev)) {
1258                        veth_debug("cnx %d: woke queue for %s.\n",
1259                                        cnx->remote_lp, dev->name);
1260                        netif_wake_queue(dev);
1261                }
1262                spin_unlock_irqrestore(&port->queue_lock, flags);
1263        }
1264}
1265
1266static void veth_stop_queues(struct veth_lpar_connection *cnx)
1267{
1268        int i;
1269
1270        for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1271                struct net_device *dev = veth_dev[i];
1272                struct veth_port *port;
1273
1274                if (! dev)
1275                        continue;
1276
1277                port = netdev_priv(dev);
1278
1279                /* If this cnx is not on the vlan for this port, continue */
1280                if (! (port->lpar_map & (1 << cnx->remote_lp)))
1281                        continue;
1282
1283                spin_lock(&port->queue_lock);
1284
1285                netif_stop_queue(dev);
1286                port->stopped_map |= (1 << cnx->remote_lp);
1287
1288                veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n",
1289                                cnx->remote_lp, dev->name, port->stopped_map);
1290
1291                spin_unlock(&port->queue_lock);
1292        }
1293}
1294
1295static void veth_timed_reset(unsigned long ptr)
1296{
1297        struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr;
1298        unsigned long trigger_time, flags;
1299
1300        /* FIXME is it possible this fires after veth_stop_connection()?
1301         * That would reschedule the statemachine for 5 seconds and probably
1302         * execute it after the module's been unloaded. Hmm. */
1303
1304        spin_lock_irqsave(&cnx->lock, flags);
1305
1306        if (cnx->outstanding_tx > 0) {
1307                trigger_time = cnx->last_contact + cnx->reset_timeout;
1308
1309                if (trigger_time < jiffies) {
1310                        cnx->state |= VETH_STATE_RESET;
1311                        veth_kick_statemachine(cnx);
1312                        veth_error("%d packets not acked by LPAR %d within %d "
1313                                        "seconds, resetting.\n",
1314                                        cnx->outstanding_tx, cnx->remote_lp,
1315                                        cnx->reset_timeout / HZ);
1316                } else {
1317                        /* Reschedule the timer */
1318                        trigger_time = jiffies + cnx->reset_timeout;
1319                        mod_timer(&cnx->reset_timer, trigger_time);
1320                }
1321        }
1322
1323        spin_unlock_irqrestore(&cnx->lock, flags);
1324}
1325
1326/*
1327 * Rx path
1328 */
1329
1330static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1331{
1332        int wanted = 0;
1333        int i;
1334        unsigned long flags;
1335
1336        if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1337                return 1;
1338
1339        read_lock_irqsave(&port->mcast_gate, flags);
1340
1341        if (port->promiscuous) {
1342                wanted = 1;
1343                goto out;
1344        }
1345
1346        for (i = 0; i < port->num_mcast; ++i) {
1347                if (port->mcast_addr[i] == mac_addr) {
1348                        wanted = 1;
1349                        break;
1350                }
1351        }
1352
1353 out:
1354        read_unlock_irqrestore(&port->mcast_gate, flags);
1355
1356        return wanted;
1357}
1358
1359struct dma_chunk {
1360        u64 addr;
1361        u64 size;
1362};
1363
1364#define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1365
1366static inline void veth_build_dma_list(struct dma_chunk *list,
1367                                       unsigned char *p, unsigned long length)
1368{
1369        unsigned long done;
1370        int i = 1;
1371
1372        /* FIXME: skbs are contiguous in real addresses.  Do we
1373         * really need to break it into PAGE_SIZE chunks, or can we do
1374         * it just at the granularity of iSeries real->absolute
1375         * mapping?  Indeed, given the way the allocator works, can we
1376         * count on them being absolutely contiguous? */
1377        list[0].addr = iseries_hv_addr(p);
1378        list[0].size = min(length,
1379                           PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1380
1381        done = list[0].size;
1382        while (done < length) {
1383                list[i].addr = iseries_hv_addr(p + done);
1384                list[i].size = min(length-done, PAGE_SIZE);
1385                done += list[i].size;
1386                i++;
1387        }
1388}
1389
1390static void veth_flush_acks(struct veth_lpar_connection *cnx)
1391{
1392        HvLpEvent_Rc rc;
1393
1394        rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK,
1395                             0, &cnx->pending_acks);
1396
1397        if (rc != HvLpEvent_Rc_Good)
1398                veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1399                                cnx->remote_lp, (int)rc);
1400
1401        cnx->num_pending_acks = 0;
1402        memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1403}
1404
1405static void veth_receive(struct veth_lpar_connection *cnx,
1406                         struct veth_lpevent *event)
1407{
1408        struct veth_frames_data *senddata = &event->u.frames_data;
1409        int startchunk = 0;
1410        int nchunks;
1411        unsigned long flags;
1412        HvLpDma_Rc rc;
1413
1414        do {
1415                u16 length = 0;
1416                struct sk_buff *skb;
1417                struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1418                struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1419                u64 dest;
1420                HvLpVirtualLanIndex vlan;
1421                struct net_device *dev;
1422                struct veth_port *port;
1423
1424                /* FIXME: do we need this? */
1425                memset(local_list, 0, sizeof(local_list));
1426                memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1427
1428                /* a 0 address marks the end of the valid entries */
1429                if (senddata->addr[startchunk] == 0)
1430                        break;
1431
1432                /* make sure that we have at least 1 EOF entry in the
1433                 * remaining entries */
1434                if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1435                        veth_error("Missing EOF fragment in event "
1436                                        "eofmask = 0x%x startchunk = %d\n",
1437                                        (unsigned)senddata->eofmask,
1438                                        startchunk);
1439                        break;
1440                }
1441
1442                /* build list of chunks in this frame */
1443                nchunks = 0;
1444                do {
1445                        remote_list[nchunks].addr =
1446                                (u64) senddata->addr[startchunk+nchunks] << 32;
1447                        remote_list[nchunks].size =
1448                                senddata->len[startchunk+nchunks];
1449                        length += remote_list[nchunks].size;
1450                } while (! (senddata->eofmask &
1451                            (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1452
1453                /* length == total length of all chunks */
1454                /* nchunks == # of chunks in this frame */
1455
1456                if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1457                        veth_error("Received oversize frame from LPAR %d "
1458                                        "(length = %d)\n",
1459                                        cnx->remote_lp, length);
1460                        continue;
1461                }
1462
1463                skb = alloc_skb(length, GFP_ATOMIC);
1464                if (!skb)
1465                        continue;
1466
1467                veth_build_dma_list(local_list, skb->data, length);
1468
1469                rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1470                                            event->base_event.xSourceLp,
1471                                            HvLpDma_Direction_RemoteToLocal,
1472                                            cnx->src_inst,
1473                                            cnx->dst_inst,
1474                                            HvLpDma_AddressType_RealAddress,
1475                                            HvLpDma_AddressType_TceIndex,
1476                                            iseries_hv_addr(&local_list),
1477                                            iseries_hv_addr(&remote_list),
1478                                            length);
1479                if (rc != HvLpDma_Rc_Good) {
1480                        dev_kfree_skb_irq(skb);
1481                        continue;
1482                }
1483
1484                vlan = skb->data[9];
1485                dev = veth_dev[vlan];
1486                if (! dev) {
1487                        /*
1488                         * Some earlier versions of the driver sent
1489                         * broadcasts down all connections, even to lpars
1490                         * that weren't on the relevant vlan. So ignore
1491                         * packets belonging to a vlan we're not on.
1492                         * We can also be here if we receive packets while
1493                         * the driver is going down, because then dev is NULL.
1494                         */
1495                        dev_kfree_skb_irq(skb);
1496                        continue;
1497                }
1498
1499                port = netdev_priv(dev);
1500                dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1501
1502                if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1503                        dev_kfree_skb_irq(skb);
1504                        continue;
1505                }
1506                if (! veth_frame_wanted(port, dest)) {
1507                        dev_kfree_skb_irq(skb);
1508                        continue;
1509                }
1510
1511                skb_put(skb, length);
1512                skb->protocol = eth_type_trans(skb, dev);
1513                skb_checksum_none_assert(skb);
1514                netif_rx(skb);  /* send it up */
1515                dev->stats.rx_packets++;
1516                dev->stats.rx_bytes += length;
1517        } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1518
1519        /* Ack it */
1520        spin_lock_irqsave(&cnx->lock, flags);
1521        BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1522
1523        cnx->pending_acks[cnx->num_pending_acks++] =
1524                event->base_event.xCorrelationToken;
1525
1526        if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold) ||
1527             (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1528                veth_flush_acks(cnx);
1529
1530        spin_unlock_irqrestore(&cnx->lock, flags);
1531}
1532
1533static void veth_timed_ack(unsigned long ptr)
1534{
1535        struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1536        unsigned long flags;
1537
1538        /* Ack all the events */
1539        spin_lock_irqsave(&cnx->lock, flags);
1540        if (cnx->num_pending_acks > 0)
1541                veth_flush_acks(cnx);
1542
1543        /* Reschedule the timer */
1544        cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1545        add_timer(&cnx->ack_timer);
1546        spin_unlock_irqrestore(&cnx->lock, flags);
1547}
1548
1549static int veth_remove(struct vio_dev *vdev)
1550{
1551        struct veth_lpar_connection *cnx;
1552        struct net_device *dev;
1553        struct veth_port *port;
1554        int i;
1555
1556        dev = veth_dev[vdev->unit_address];
1557
1558        if (! dev)
1559                return 0;
1560
1561        port = netdev_priv(dev);
1562
1563        for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1564                cnx = veth_cnx[i];
1565
1566                if (cnx && (port->lpar_map & (1 << i))) {
1567                        /* Drop our reference to connections on our VLAN */
1568                        kobject_put(&cnx->kobject);
1569                }
1570        }
1571
1572        veth_dev[vdev->unit_address] = NULL;
1573        kobject_del(&port->kobject);
1574        kobject_put(&port->kobject);
1575        unregister_netdev(dev);
1576        free_netdev(dev);
1577
1578        return 0;
1579}
1580
1581static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1582{
1583        int i = vdev->unit_address;
1584        struct net_device *dev;
1585        struct veth_port *port;
1586
1587        dev = veth_probe_one(i, vdev);
1588        if (dev == NULL) {
1589                veth_remove(vdev);
1590                return 1;
1591        }
1592        veth_dev[i] = dev;
1593
1594        port = netdev_priv(dev);
1595
1596        /* Start the state machine on each connection on this vlan. If we're
1597         * the first dev to do so this will commence link negotiation */
1598        for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1599                struct veth_lpar_connection *cnx;
1600
1601                if (! (port->lpar_map & (1 << i)))
1602                        continue;
1603
1604                cnx = veth_cnx[i];
1605                if (!cnx)
1606                        continue;
1607
1608                kobject_get(&cnx->kobject);
1609                veth_kick_statemachine(cnx);
1610        }
1611
1612        return 0;
1613}
1614
1615/**
1616 * veth_device_table: Used by vio.c to match devices that we
1617 * support.
1618 */
1619static struct vio_device_id veth_device_table[] __devinitdata = {
1620        { "network", "IBM,iSeries-l-lan" },
1621        { "", "" }
1622};
1623MODULE_DEVICE_TABLE(vio, veth_device_table);
1624
1625static struct vio_driver veth_driver = {
1626        .id_table = veth_device_table,
1627        .probe = veth_probe,
1628        .remove = veth_remove,
1629        .driver = {
1630                .name = DRV_NAME,
1631                .owner = THIS_MODULE,
1632        }
1633};
1634
1635/*
1636 * Module initialization/cleanup
1637 */
1638
1639static void __exit veth_module_cleanup(void)
1640{
1641        int i;
1642        struct veth_lpar_connection *cnx;
1643
1644        /* Disconnect our "irq" to stop events coming from the Hypervisor. */
1645        HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1646
1647        for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1648                cnx = veth_cnx[i];
1649
1650                if (!cnx)
1651                        continue;
1652
1653                /* Cancel work queued from Hypervisor callbacks */
1654                cancel_delayed_work_sync(&cnx->statemachine_wq);
1655                /* Remove the connection from sysfs */
1656                kobject_del(&cnx->kobject);
1657                /* Drop the driver's reference to the connection */
1658                kobject_put(&cnx->kobject);
1659        }
1660
1661        /* Unregister the driver, which will close all the netdevs and stop
1662         * the connections when they're no longer referenced. */
1663        vio_unregister_driver(&veth_driver);
1664}
1665module_exit(veth_module_cleanup);
1666
1667static int __init veth_module_init(void)
1668{
1669        int i;
1670        int rc;
1671
1672        if (!firmware_has_feature(FW_FEATURE_ISERIES))
1673                return -ENODEV;
1674
1675        this_lp = HvLpConfig_getLpIndex_outline();
1676
1677        for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1678                rc = veth_init_connection(i);
1679                if (rc != 0)
1680                        goto error;
1681        }
1682
1683        HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1684                                  &veth_handle_event);
1685
1686        rc = vio_register_driver(&veth_driver);
1687        if (rc != 0)
1688                goto error;
1689
1690        for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1691                struct kobject *kobj;
1692
1693                if (!veth_cnx[i])
1694                        continue;
1695
1696                kobj = &veth_cnx[i]->kobject;
1697                /* If the add failes, complain but otherwise continue */
1698                if (0 != driver_add_kobj(&veth_driver.driver, kobj,
1699                                        "cnx%.2d", veth_cnx[i]->remote_lp))
1700                        veth_error("cnx %d: Failed adding to sysfs.\n", i);
1701        }
1702
1703        return 0;
1704
1705error:
1706        for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1707                veth_destroy_connection(veth_cnx[i]);
1708        }
1709
1710        return rc;
1711}
1712module_init(veth_module_init);
1713