linux/drivers/staging/unisys/visornic/visornic_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   3 * All rights reserved.
   4 */
   5
   6/* This driver lives in a spar partition, and registers to ethernet io
   7 * channels from the visorbus driver. It creates netdev devices and
   8 * forwards transmit to the IO channel and accepts rcvs from the IO
   9 * Partition via the IO channel.
  10 */
  11
  12#include <linux/debugfs.h>
  13#include <linux/etherdevice.h>
  14#include <linux/module.h>
  15#include <linux/netdevice.h>
  16#include <linux/kthread.h>
  17#include <linux/skbuff.h>
  18#include <linux/rtnetlink.h>
  19#include <linux/visorbus.h>
  20
  21#include "iochannel.h"
  22
  23#define VISORNIC_INFINITE_RSP_WAIT 0
  24
  25/* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  26 *         = 163840 bytes
  27 */
  28#define MAX_BUF 163840
  29#define NAPI_WEIGHT 64
  30
  31/* GUIDS for director channel type supported by this driver.  */
  32/* {8cd5994d-c58e-11da-95a9-00e08161165f} */
  33#define VISOR_VNIC_CHANNEL_GUID \
  34        GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
  35                0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
  36#define VISOR_VNIC_CHANNEL_GUID_STR \
  37        "8cd5994d-c58e-11da-95a9-00e08161165f"
  38
  39static struct visor_channeltype_descriptor visornic_channel_types[] = {
  40        /* Note that the only channel type we expect to be reported by the
  41         * bus driver is the VISOR_VNIC channel.
  42         */
  43        { VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
  44          VISOR_VNIC_CHANNEL_VERSIONID },
  45        {}
  46};
  47MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  48/* FIXME XXX: This next line of code must be fixed and removed before
  49 * acceptance into the 'normal' part of the kernel.  It is only here as a place
  50 * holder to get module autoloading functionality working for visorbus.  Code
  51 * must be added to scripts/mode/file2alias.c, etc., to get this working
  52 * properly.
  53 */
  54MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
  55
  56struct chanstat {
  57        unsigned long got_rcv;
  58        unsigned long got_enbdisack;
  59        unsigned long got_xmit_done;
  60        unsigned long xmit_fail;
  61        unsigned long sent_enbdis;
  62        unsigned long sent_promisc;
  63        unsigned long sent_post;
  64        unsigned long sent_post_failed;
  65        unsigned long sent_xmit;
  66        unsigned long reject_count;
  67        unsigned long extra_rcvbufs_sent;
  68};
  69
  70/* struct visornic_devdata
  71 * @enabled:                        0 disabled 1 enabled to receive.
  72 * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
  73 * @struct *dev:
  74 * @struct *netdev:
  75 * @struct net_stats:
  76 * @interrupt_rcvd:
  77 * @rsp_queue:
  78 * @struct **rcvbuf:
  79 * @incarnation_id:                 incarnation_id lets IOPART know about
  80 *                                  re-birth.
  81 * @old_flags:                      flags as they were prior to
  82 *                                  set_multicast_list.
  83 * @usage:                          count of users.
  84 * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
  85 * @num_rcv_bufs_could_not_alloc:
  86 * @num_rcvbuf_in_iovm:
  87 * @alloc_failed_in_if_needed_cnt:
  88 * @alloc_failed_in_repost_rtn_cnt:
  89 * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
  90 *                                  - should never hit this.
  91 * @upper_threshold_net_xmits:      high water mark for calling
  92 *                                  netif_stop_queue().
  93 * @lower_threshold_net_xmits:      high water mark for calling
  94 *                                  netif_wake_queue().
  95 * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
  96 *                                  sent to the IOPART end.
  97 * @server_down_complete_func:
  98 * @struct timeout_reset:
  99 * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
 100 *                                  buffers.
 101 * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
 102 *                                  active xmit at a time.
 103 * @server_down:                    IOPART is down.
 104 * @server_change_state:            Processing SERVER_CHANGESTATE msg.
 105 * @going_away:                     device is being torn down.
 106 * @struct *eth_debugfs_dir:
 107 * @interrupts_rcvd:
 108 * @interrupts_notme:
 109 * @interrupts_disabled:
 110 * @busy_cnt:
 111 * @priv_lock:                      spinlock to access devdata structures.
 112 * @flow_control_upper_hits:
 113 * @flow_control_lower_hits:
 114 * @n_rcv0:                         # rcvs of 0 buffers.
 115 * @n_rcv1:                         # rcvs of 1 buffers.
 116 * @n_rcv2:                         # rcvs of 2 buffers.
 117 * @n_rcvx:                         # rcvs of >2 buffers.
 118 * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
 119 * @repost_found_skb_cnt:           # of found the skb.
 120 * @n_repost_deficit:               # of lost rcv buffers.
 121 * @bad_rcv_buf:                    # of unknown rcv skb not freed.
 122 * @n_rcv_packets_not_accepted:     # bogs rcv packets.
 123 * @queuefullmsg_logged:
 124 * @struct chstat:
 125 * @struct napi:
 126 * @struct cmdrsp:
 127 */
 128struct visornic_devdata {
 129        unsigned short enabled;
 130        unsigned short enab_dis_acked;
 131
 132        struct visor_device *dev;
 133        struct net_device *netdev;
 134        struct net_device_stats net_stats;
 135        atomic_t interrupt_rcvd;
 136        wait_queue_head_t rsp_queue;
 137        struct sk_buff **rcvbuf;
 138        u64 incarnation_id;
 139        unsigned short old_flags;
 140        atomic_t usage;
 141
 142        int num_rcv_bufs;
 143        int num_rcv_bufs_could_not_alloc;
 144        atomic_t num_rcvbuf_in_iovm;
 145        unsigned long alloc_failed_in_if_needed_cnt;
 146        unsigned long alloc_failed_in_repost_rtn_cnt;
 147
 148        unsigned long max_outstanding_net_xmits;
 149        unsigned long upper_threshold_net_xmits;
 150        unsigned long lower_threshold_net_xmits;
 151        struct sk_buff_head xmitbufhead;
 152
 153        visorbus_state_complete_func server_down_complete_func;
 154        struct work_struct timeout_reset;
 155        struct uiscmdrsp *cmdrsp_rcv;
 156        struct uiscmdrsp *xmit_cmdrsp;
 157        bool server_down;
 158        bool server_change_state;
 159        bool going_away;
 160        struct dentry *eth_debugfs_dir;
 161        u64 interrupts_rcvd;
 162        u64 interrupts_notme;
 163        u64 interrupts_disabled;
 164        u64 busy_cnt;
 165        /* spinlock to access devdata structures. */
 166        spinlock_t priv_lock;
 167
 168        /* flow control counter */
 169        u64 flow_control_upper_hits;
 170        u64 flow_control_lower_hits;
 171
 172        /* debug counters */
 173        unsigned long n_rcv0;
 174        unsigned long n_rcv1;
 175        unsigned long n_rcv2;
 176        unsigned long n_rcvx;
 177        unsigned long found_repost_rcvbuf_cnt;
 178        unsigned long repost_found_skb_cnt;
 179        unsigned long n_repost_deficit;
 180        unsigned long bad_rcv_buf;
 181        unsigned long n_rcv_packets_not_accepted;
 182
 183        int queuefullmsg_logged;
 184        struct chanstat chstat;
 185        struct napi_struct napi;
 186        struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 187};
 188
 189/* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
 190static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
 191                                u16 index, u16 max_pi_arr_entries,
 192                                struct phys_info pi_arr[])
 193{
 194        u16 i, len, firstlen;
 195
 196        firstlen = PI_PAGE_SIZE - inp_off;
 197        if (inp_len <= firstlen) {
 198                /* The input entry spans only one page - add as is. */
 199                if (index >= max_pi_arr_entries)
 200                        return 0;
 201                pi_arr[index].pi_pfn = inp_pfn;
 202                pi_arr[index].pi_off = (u16)inp_off;
 203                pi_arr[index].pi_len = (u16)inp_len;
 204                return index + 1;
 205        }
 206
 207        /* This entry spans multiple pages. */
 208        for (len = inp_len, i = 0; len;
 209                len -= pi_arr[index + i].pi_len, i++) {
 210                if (index + i >= max_pi_arr_entries)
 211                        return 0;
 212                pi_arr[index + i].pi_pfn = inp_pfn + i;
 213                if (i == 0) {
 214                        pi_arr[index].pi_off = inp_off;
 215                        pi_arr[index].pi_len = firstlen;
 216                } else {
 217                        pi_arr[index + i].pi_off = 0;
 218                        pi_arr[index + i].pi_len = min_t(u16, len,
 219                                                         PI_PAGE_SIZE);
 220                }
 221        }
 222        return index + i;
 223}
 224
 225/* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
 226 *                                 array that the IOPART understands
 227 * @skb:          Skbuff that we are pulling the frags from.
 228 * @firstfraglen: Length of first fragment in skb.
 229 * @frags_max:    Max len of frags array.
 230 * @frags:        Frags array filled in on output.
 231 *
 232 * Return: Positive integer indicating number of entries filled in frags on
 233 *         success, negative integer on error.
 234 */
 235static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
 236                                         unsigned int firstfraglen,
 237                                         unsigned int frags_max,
 238                                         struct phys_info frags[])
 239{
 240        unsigned int count = 0, frag, size, offset = 0, numfrags;
 241        unsigned int total_count;
 242
 243        numfrags = skb_shinfo(skb)->nr_frags;
 244
 245        /* Compute the number of fragments this skb has, and if its more than
 246         * frag array can hold, linearize the skb
 247         */
 248        total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 249        if (firstfraglen % PI_PAGE_SIZE)
 250                total_count++;
 251
 252        if (total_count > frags_max) {
 253                if (skb_linearize(skb))
 254                        return -EINVAL;
 255                numfrags = skb_shinfo(skb)->nr_frags;
 256                firstfraglen = 0;
 257        }
 258
 259        while (firstfraglen) {
 260                if (count == frags_max)
 261                        return -EINVAL;
 262
 263                frags[count].pi_pfn =
 264                        page_to_pfn(virt_to_page(skb->data + offset));
 265                frags[count].pi_off =
 266                        (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 267                size = min_t(unsigned int, firstfraglen,
 268                             PI_PAGE_SIZE - frags[count].pi_off);
 269
 270                /* can take smallest of firstfraglen (what's left) OR
 271                 * bytes left in the page
 272                 */
 273                frags[count].pi_len = size;
 274                firstfraglen -= size;
 275                offset += size;
 276                count++;
 277        }
 278        if (numfrags) {
 279                if ((count + numfrags) > frags_max)
 280                        return -EINVAL;
 281
 282                for (frag = 0; frag < numfrags; frag++) {
 283                        count = add_physinfo_entries(page_to_pfn(
 284                                  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 285                                  skb_frag_off(&skb_shinfo(skb)->frags[frag]),
 286                                  skb_frag_size(&skb_shinfo(skb)->frags[frag]),
 287                                  count, frags_max, frags);
 288                        /* add_physinfo_entries only returns
 289                         * zero if the frags array is out of room
 290                         * That should never happen because we
 291                         * fail above, if count+numfrags > frags_max.
 292                         */
 293                        if (!count)
 294                                return -EINVAL;
 295                }
 296        }
 297        if (skb_shinfo(skb)->frag_list) {
 298                struct sk_buff *skbinlist;
 299                int c;
 300
 301                for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 302                     skbinlist = skbinlist->next) {
 303                        c = visor_copy_fragsinfo_from_skb(skbinlist,
 304                                                          skbinlist->len -
 305                                                          skbinlist->data_len,
 306                                                          frags_max - count,
 307                                                          &frags[count]);
 308                        if (c < 0)
 309                                return c;
 310                        count += c;
 311                }
 312        }
 313        return count;
 314}
 315
 316static ssize_t enable_ints_write(struct file *file,
 317                                 const char __user *buffer,
 318                                 size_t count, loff_t *ppos)
 319{
 320        /* Don't want to break ABI here by having a debugfs
 321         * file that no longer exists or is writable, so
 322         * lets just make this a vestigual function
 323         */
 324        return count;
 325}
 326
 327static const struct file_operations debugfs_enable_ints_fops = {
 328        .write = enable_ints_write,
 329};
 330
 331/* visornic_serverdown_complete - pause device following IOPART going down
 332 * @devdata: Device managed by IOPART.
 333 *
 334 * The IO partition has gone down, and we need to do some cleanup for when it
 335 * comes back. Treat the IO partition as the link being down.
 336 */
 337static void visornic_serverdown_complete(struct visornic_devdata *devdata)
 338{
 339        struct net_device *netdev = devdata->netdev;
 340
 341        /* Stop polling for interrupts */
 342        visorbus_disable_channel_interrupts(devdata->dev);
 343
 344        rtnl_lock();
 345        dev_close(netdev);
 346        rtnl_unlock();
 347
 348        atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 349        devdata->chstat.sent_xmit = 0;
 350        devdata->chstat.got_xmit_done = 0;
 351
 352        if (devdata->server_down_complete_func)
 353                (*devdata->server_down_complete_func)(devdata->dev, 0);
 354
 355        devdata->server_down = true;
 356        devdata->server_change_state = false;
 357        devdata->server_down_complete_func = NULL;
 358}
 359
 360/* visornic_serverdown - Command has notified us that IOPART is down
 361 * @devdata:       Device managed by IOPART.
 362 * @complete_func: Function to call when finished.
 363 *
 364 * Schedule the work needed to handle the server down request. Make sure we
 365 * haven't already handled the server change state event.
 366 *
 367 * Return: 0 if we scheduled the work, negative integer on error.
 368 */
 369static int visornic_serverdown(struct visornic_devdata *devdata,
 370                               visorbus_state_complete_func complete_func)
 371{
 372        unsigned long flags;
 373        int err;
 374
 375        spin_lock_irqsave(&devdata->priv_lock, flags);
 376        if (devdata->server_change_state) {
 377                dev_dbg(&devdata->dev->device, "%s changing state\n",
 378                        __func__);
 379                err = -EINVAL;
 380                goto err_unlock;
 381        }
 382        if (devdata->server_down) {
 383                dev_dbg(&devdata->dev->device, "%s already down\n",
 384                        __func__);
 385                err = -EINVAL;
 386                goto err_unlock;
 387        }
 388        if (devdata->going_away) {
 389                dev_dbg(&devdata->dev->device,
 390                        "%s aborting because device removal pending\n",
 391                        __func__);
 392                err = -ENODEV;
 393                goto err_unlock;
 394        }
 395        devdata->server_change_state = true;
 396        devdata->server_down_complete_func = complete_func;
 397        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 398
 399        visornic_serverdown_complete(devdata);
 400        return 0;
 401
 402err_unlock:
 403        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 404        return err;
 405}
 406
 407/* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
 408 * @netdev: Network adapter the rcv bufs are attached too.
 409 *
 410 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 411 * so that it can write rcv data into our memory space.
 412 *
 413 * Return: Pointer to sk_buff.
 414 */
 415static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
 416{
 417        struct sk_buff *skb;
 418
 419        /* NOTE: the first fragment in each rcv buffer is pointed to by
 420         * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 421         * in length, so the first frag is large enough to hold 1514.
 422         */
 423        skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 424        if (!skb)
 425                return NULL;
 426        skb->dev = netdev;
 427        /* current value of mtu doesn't come into play here; large
 428         * packets will just end up using multiple rcv buffers all of
 429         * same size.
 430         */
 431        skb->len = RCVPOST_BUF_SIZE;
 432        /* alloc_skb already zeroes it out for clarification. */
 433        skb->data_len = 0;
 434        return skb;
 435}
 436
 437/* post_skb - post a skb to the IO Partition
 438 * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
 439 * @devdata: visornic_devdata to post the skb to.
 440 * @skb:     Skb to give to the IO partition.
 441 *
 442 * Return: 0 on success, negative integer on error.
 443 */
 444static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
 445                    struct sk_buff *skb)
 446{
 447        int err;
 448
 449        cmdrsp->net.buf = skb;
 450        cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 451        cmdrsp->net.rcvpost.frag.pi_off =
 452                (unsigned long)skb->data & PI_PAGE_MASK;
 453        cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 454        cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 455
 456        if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
 457                return -EINVAL;
 458
 459        cmdrsp->net.type = NET_RCV_POST;
 460        cmdrsp->cmdtype = CMD_NET_TYPE;
 461        err = visorchannel_signalinsert(devdata->dev->visorchannel,
 462                                        IOCHAN_TO_IOPART,
 463                                        cmdrsp);
 464        if (err) {
 465                devdata->chstat.sent_post_failed++;
 466                return err;
 467        }
 468
 469        atomic_inc(&devdata->num_rcvbuf_in_iovm);
 470        devdata->chstat.sent_post++;
 471        return 0;
 472}
 473
 474/* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
 475 * @netdev:  Netdevice we are enabling/disabling, used as context return value.
 476 * @state:   Enable = 1/disable = 0.
 477 * @devdata: Visornic device we are enabling/disabling.
 478 *
 479 * Send the enable/disable message to the IO Partition.
 480 *
 481 * Return: 0 on success, negative integer on error.
 482 */
 483static int send_enbdis(struct net_device *netdev, int state,
 484                       struct visornic_devdata *devdata)
 485{
 486        int err;
 487
 488        devdata->cmdrsp_rcv->net.enbdis.enable = state;
 489        devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 490        devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 491        devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 492        err = visorchannel_signalinsert(devdata->dev->visorchannel,
 493                                        IOCHAN_TO_IOPART,
 494                                        devdata->cmdrsp_rcv);
 495        if (err)
 496                return err;
 497        devdata->chstat.sent_enbdis++;
 498        return 0;
 499}
 500
 501/* visornic_disable_with_timeout - disable network adapter
 502 * @netdev:  netdevice to disable.
 503 * @timeout: Timeout to wait for disable.
 504 *
 505 * Disable the network adapter and inform the IO Partition that we are disabled.
 506 * Reclaim memory from rcv bufs.
 507 *
 508 * Return: 0 on success, negative integer on failure of IO Partition responding.
 509 */
 510static int visornic_disable_with_timeout(struct net_device *netdev,
 511                                         const int timeout)
 512{
 513        struct visornic_devdata *devdata = netdev_priv(netdev);
 514        int i;
 515        unsigned long flags;
 516        int wait = 0;
 517        int err;
 518
 519        /* send a msg telling the other end we are stopping incoming pkts */
 520        spin_lock_irqsave(&devdata->priv_lock, flags);
 521        devdata->enabled = 0;
 522        /* must wait for ack */
 523        devdata->enab_dis_acked = 0;
 524        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 525
 526        /* send disable and wait for ack -- don't hold lock when sending
 527         * disable because if the queue is full, insert might sleep.
 528         * If an error occurs, don't wait for the timeout.
 529         */
 530        err = send_enbdis(netdev, 0, devdata);
 531        if (err)
 532                return err;
 533
 534        /* wait for ack to arrive before we try to free rcv buffers
 535         * NOTE: the other end automatically unposts the rcv buffers
 536         * when it gets a disable.
 537         */
 538        spin_lock_irqsave(&devdata->priv_lock, flags);
 539        while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 540               (wait < timeout)) {
 541                if (devdata->enab_dis_acked)
 542                        break;
 543                if (devdata->server_down || devdata->server_change_state) {
 544                        dev_dbg(&netdev->dev, "%s server went away\n",
 545                                __func__);
 546                        break;
 547                }
 548                set_current_state(TASK_INTERRUPTIBLE);
 549                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 550                wait += schedule_timeout(msecs_to_jiffies(10));
 551                spin_lock_irqsave(&devdata->priv_lock, flags);
 552        }
 553
 554        /* Wait for usage to go to 1 (no other users) before freeing
 555         * rcv buffers
 556         */
 557        if (atomic_read(&devdata->usage) > 1) {
 558                while (1) {
 559                        set_current_state(TASK_INTERRUPTIBLE);
 560                        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 561                        schedule_timeout(msecs_to_jiffies(10));
 562                        spin_lock_irqsave(&devdata->priv_lock, flags);
 563                        if (atomic_read(&devdata->usage))
 564                                break;
 565                }
 566        }
 567        /* we've set enabled to 0, so we can give up the lock. */
 568        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 569
 570        /* stop the transmit queue so nothing more can be transmitted */
 571        netif_stop_queue(netdev);
 572
 573        napi_disable(&devdata->napi);
 574
 575        skb_queue_purge(&devdata->xmitbufhead);
 576
 577        /* Free rcv buffers - other end has automatically unposed them on
 578         * disable
 579         */
 580        for (i = 0; i < devdata->num_rcv_bufs; i++) {
 581                if (devdata->rcvbuf[i]) {
 582                        kfree_skb(devdata->rcvbuf[i]);
 583                        devdata->rcvbuf[i] = NULL;
 584                }
 585        }
 586
 587        return 0;
 588}
 589
 590/* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
 591 * @netdev:  struct netdevice.
 592 * @devdata: visornic_devdata.
 593 *
 594 * Allocate rcv buffers and post them to the IO Partition.
 595 *
 596 * Return: 0 on success, negative integer on failure.
 597 */
 598static int init_rcv_bufs(struct net_device *netdev,
 599                         struct visornic_devdata *devdata)
 600{
 601        int i, j, count, err;
 602
 603        /* allocate fixed number of receive buffers to post to uisnic
 604         * post receive buffers after we've allocated a required amount
 605         */
 606        for (i = 0; i < devdata->num_rcv_bufs; i++) {
 607                devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 608                /* if we failed to allocate one let us stop */
 609                if (!devdata->rcvbuf[i])
 610                        break;
 611        }
 612        /* couldn't even allocate one -- bail out */
 613        if (i == 0)
 614                return -ENOMEM;
 615        count = i;
 616
 617        /* Ensure we can alloc 2/3rd of the requested number of buffers.
 618         * 2/3 is an arbitrary choice; used also in ndis init.c
 619         */
 620        if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 621                /* free receive buffers we did alloc and then bail out */
 622                for (i = 0; i < count; i++) {
 623                        kfree_skb(devdata->rcvbuf[i]);
 624                        devdata->rcvbuf[i] = NULL;
 625                }
 626                return -ENOMEM;
 627        }
 628
 629        /* post receive buffers to receive incoming input - without holding
 630         * lock - we've not enabled nor started the queue so there shouldn't
 631         * be any rcv or xmit activity
 632         */
 633        for (i = 0; i < count; i++) {
 634                err = post_skb(devdata->cmdrsp_rcv, devdata,
 635                               devdata->rcvbuf[i]);
 636                if (!err)
 637                        continue;
 638
 639                /* Error handling -
 640                 * If we posted at least one skb, we should return success,
 641                 * but need to free the resources that we have not successfully
 642                 * posted.
 643                 */
 644                for (j = i; j < count; j++) {
 645                        kfree_skb(devdata->rcvbuf[j]);
 646                        devdata->rcvbuf[j] = NULL;
 647                }
 648                if (i == 0)
 649                        return err;
 650                break;
 651        }
 652
 653        return 0;
 654}
 655
 656/* visornic_enable_with_timeout - send enable to IO Partition
 657 * @netdev:  struct net_device.
 658 * @timeout: Time to wait for the ACK from the enable.
 659 *
 660 * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
 661 * defined in msecs (timeout of 0 specifies infinite wait).
 662 *
 663 * Return: 0 on success, negative integer on failure.
 664 */
 665static int visornic_enable_with_timeout(struct net_device *netdev,
 666                                        const int timeout)
 667{
 668        int err = 0;
 669        struct visornic_devdata *devdata = netdev_priv(netdev);
 670        unsigned long flags;
 671        int wait = 0;
 672
 673        napi_enable(&devdata->napi);
 674
 675        /* NOTE: the other end automatically unposts the rcv buffers when it
 676         * gets a disable.
 677         */
 678        err = init_rcv_bufs(netdev, devdata);
 679        if (err < 0) {
 680                dev_err(&netdev->dev,
 681                        "%s failed to init rcv bufs\n", __func__);
 682                return err;
 683        }
 684
 685        spin_lock_irqsave(&devdata->priv_lock, flags);
 686        devdata->enabled = 1;
 687        devdata->enab_dis_acked = 0;
 688
 689        /* now we're ready, let's send an ENB to uisnic but until we get
 690         * an ACK back from uisnic, we'll drop the packets
 691         */
 692        devdata->n_rcv_packets_not_accepted = 0;
 693        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 694
 695        /* send enable and wait for ack -- don't hold lock when sending enable
 696         * because if the queue is full, insert might sleep. If an error
 697         * occurs error out.
 698         */
 699        err = send_enbdis(netdev, 1, devdata);
 700        if (err)
 701                return err;
 702
 703        spin_lock_irqsave(&devdata->priv_lock, flags);
 704        while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 705               (wait < timeout)) {
 706                if (devdata->enab_dis_acked)
 707                        break;
 708                if (devdata->server_down || devdata->server_change_state) {
 709                        dev_dbg(&netdev->dev, "%s server went away\n",
 710                                __func__);
 711                        break;
 712                }
 713                set_current_state(TASK_INTERRUPTIBLE);
 714                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 715                wait += schedule_timeout(msecs_to_jiffies(10));
 716                spin_lock_irqsave(&devdata->priv_lock, flags);
 717        }
 718
 719        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 720
 721        if (!devdata->enab_dis_acked) {
 722                dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 723                return -EIO;
 724        }
 725
 726        netif_start_queue(netdev);
 727        return 0;
 728}
 729
 730/* visornic_timeout_reset - handle xmit timeout resets
 731 * @work: Work item that scheduled the work.
 732 *
 733 * Transmit timeouts are typically handled by resetting the device for our
 734 * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
 735 * respond, we will trigger a serverdown.
 736 */
 737static void visornic_timeout_reset(struct work_struct *work)
 738{
 739        struct visornic_devdata *devdata;
 740        struct net_device *netdev;
 741        int response = 0;
 742
 743        devdata = container_of(work, struct visornic_devdata, timeout_reset);
 744        netdev = devdata->netdev;
 745
 746        rtnl_lock();
 747        if (!netif_running(netdev)) {
 748                rtnl_unlock();
 749                return;
 750        }
 751
 752        response = visornic_disable_with_timeout(netdev,
 753                                                 VISORNIC_INFINITE_RSP_WAIT);
 754        if (response)
 755                goto call_serverdown;
 756
 757        response = visornic_enable_with_timeout(netdev,
 758                                                VISORNIC_INFINITE_RSP_WAIT);
 759        if (response)
 760                goto call_serverdown;
 761
 762        rtnl_unlock();
 763
 764        return;
 765
 766call_serverdown:
 767        visornic_serverdown(devdata, NULL);
 768        rtnl_unlock();
 769}
 770
 771/* visornic_open - enable the visornic device and mark the queue started
 772 * @netdev: netdevice to start.
 773 *
 774 * Enable the device and start the transmit queue.
 775 *
 776 * Return: 0 on success.
 777 */
 778static int visornic_open(struct net_device *netdev)
 779{
 780        visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 781        return 0;
 782}
 783
 784/* visornic_close - disables the visornic device and stops the queues
 785 * @netdev: netdevice to stop.
 786 *
 787 * Disable the device and stop the transmit queue.
 788 *
 789 * Return 0 on success.
 790 */
 791static int visornic_close(struct net_device *netdev)
 792{
 793        visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 794        return 0;
 795}
 796
 797/* devdata_xmits_outstanding - compute outstanding xmits
 798 * @devdata: visornic_devdata for device
 799 *
 800 * Return: Long integer representing the number of outstanding xmits.
 801 */
 802static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 803{
 804        if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 805                return devdata->chstat.sent_xmit -
 806                        devdata->chstat.got_xmit_done;
 807        return (ULONG_MAX - devdata->chstat.got_xmit_done
 808                + devdata->chstat.sent_xmit + 1);
 809}
 810
 811/* vnic_hit_high_watermark
 812 * @devdata:        Indicates visornic device we are checking.
 813 * @high_watermark: Max num of unacked xmits we will tolerate before we will
 814 *                  start throttling.
 815 *
 816 * Return: True iff the number of unacked xmits sent to the IO Partition is >=
 817 *         high_watermark. False otherwise.
 818 */
 819static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 820                                    ulong high_watermark)
 821{
 822        return (devdata_xmits_outstanding(devdata) >= high_watermark);
 823}
 824
 825/* vnic_hit_low_watermark
 826 * @devdata:       Indicates visornic device we are checking.
 827 * @low_watermark: We will wait until the num of unacked xmits drops to this
 828 *                 value or lower before we start transmitting again.
 829 *
 830 * Return: True iff the number of unacked xmits sent to the IO Partition is <=
 831 *         low_watermark.
 832 */
 833static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 834                                   ulong low_watermark)
 835{
 836        return (devdata_xmits_outstanding(devdata) <= low_watermark);
 837}
 838
 839/* visornic_xmit - send a packet to the IO Partition
 840 * @skb:    Packet to be sent.
 841 * @netdev: Net device the packet is being sent from.
 842 *
 843 * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
 844 * the XMIT command to the IO Partition for processing. This function is
 845 * protected from concurrent calls by a spinlock xmit_lock in the net_device
 846 * struct. As soon as the function returns, it can be called again.
 847 *
 848 * Return: NETDEV_TX_OK.
 849 */
 850static netdev_tx_t visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 851{
 852        struct visornic_devdata *devdata;
 853        int len, firstfraglen, padlen;
 854        struct uiscmdrsp *cmdrsp = NULL;
 855        unsigned long flags;
 856        int err;
 857
 858        devdata = netdev_priv(netdev);
 859        spin_lock_irqsave(&devdata->priv_lock, flags);
 860
 861        if (netif_queue_stopped(netdev) || devdata->server_down ||
 862            devdata->server_change_state) {
 863                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 864                devdata->busy_cnt++;
 865                dev_dbg(&netdev->dev,
 866                        "%s busy - queue stopped\n", __func__);
 867                kfree_skb(skb);
 868                return NETDEV_TX_OK;
 869        }
 870
 871        /* sk_buff struct is used to host network data throughout all the
 872         * linux network subsystems
 873         */
 874        len = skb->len;
 875
 876        /* skb->len is the FULL length of data (including fragmentary portion)
 877         * skb->data_len is the length of the fragment portion in frags
 878         * skb->len - skb->data_len is size of the 1st fragment in skb->data
 879         * calculate the length of the first fragment that skb->data is
 880         * pointing to
 881         */
 882        firstfraglen = skb->len - skb->data_len;
 883        if (firstfraglen < ETH_HLEN) {
 884                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 885                devdata->busy_cnt++;
 886                dev_err(&netdev->dev,
 887                        "%s busy - first frag too small (%d)\n",
 888                        __func__, firstfraglen);
 889                kfree_skb(skb);
 890                return NETDEV_TX_OK;
 891        }
 892
 893        if (len < ETH_MIN_PACKET_SIZE &&
 894            ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 895                /* pad the packet out to minimum size */
 896                padlen = ETH_MIN_PACKET_SIZE - len;
 897                skb_put_zero(skb, padlen);
 898                len += padlen;
 899                firstfraglen += padlen;
 900        }
 901
 902        cmdrsp = devdata->xmit_cmdrsp;
 903        /* clear cmdrsp */
 904        memset(cmdrsp, 0, SIZEOF_CMDRSP);
 905        cmdrsp->net.type = NET_XMIT;
 906        cmdrsp->cmdtype = CMD_NET_TYPE;
 907
 908        /* save the pointer to skb -- we'll need it for completion */
 909        cmdrsp->net.buf = skb;
 910
 911        if (vnic_hit_high_watermark(devdata,
 912                                    devdata->max_outstanding_net_xmits)) {
 913                /* extra NET_XMITs queued over to IOVM - need to wait */
 914                devdata->chstat.reject_count++;
 915                if (!devdata->queuefullmsg_logged &&
 916                    ((devdata->chstat.reject_count & 0x3ff) == 1))
 917                        devdata->queuefullmsg_logged = 1;
 918                netif_stop_queue(netdev);
 919                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 920                devdata->busy_cnt++;
 921                dev_dbg(&netdev->dev,
 922                        "%s busy - waiting for iovm to catch up\n",
 923                        __func__);
 924                kfree_skb(skb);
 925                return NETDEV_TX_OK;
 926        }
 927        if (devdata->queuefullmsg_logged)
 928                devdata->queuefullmsg_logged = 0;
 929
 930        if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 931                cmdrsp->net.xmt.lincsum.valid = 1;
 932                cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 933                if (skb_transport_header(skb) > skb->data) {
 934                        cmdrsp->net.xmt.lincsum.hrawoff =
 935                                skb_transport_header(skb) - skb->data;
 936                        cmdrsp->net.xmt.lincsum.hrawoff = 1;
 937                }
 938                if (skb_network_header(skb) > skb->data) {
 939                        cmdrsp->net.xmt.lincsum.nhrawoff =
 940                                skb_network_header(skb) - skb->data;
 941                        cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 942                }
 943                cmdrsp->net.xmt.lincsum.csum = skb->csum;
 944        } else {
 945                cmdrsp->net.xmt.lincsum.valid = 0;
 946        }
 947
 948        /* save off the length of the entire data packet */
 949        cmdrsp->net.xmt.len = len;
 950
 951        /* copy ethernet header from first frag into ocmdrsp
 952         * - everything else will be pass in frags & DMA'ed
 953         */
 954        memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
 955
 956        /* copy frags info - from skb->data we need to only provide access
 957         * beyond eth header
 958         */
 959        cmdrsp->net.xmt.num_frags =
 960                visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 961                                              MAX_PHYS_INFO,
 962                                              cmdrsp->net.xmt.frags);
 963        if (cmdrsp->net.xmt.num_frags < 0) {
 964                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 965                devdata->busy_cnt++;
 966                dev_err(&netdev->dev,
 967                        "%s busy - copy frags failed\n", __func__);
 968                kfree_skb(skb);
 969                return NETDEV_TX_OK;
 970        }
 971
 972        err = visorchannel_signalinsert(devdata->dev->visorchannel,
 973                                        IOCHAN_TO_IOPART, cmdrsp);
 974        if (err) {
 975                netif_stop_queue(netdev);
 976                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 977                devdata->busy_cnt++;
 978                dev_dbg(&netdev->dev,
 979                        "%s busy - signalinsert failed\n", __func__);
 980                kfree_skb(skb);
 981                return NETDEV_TX_OK;
 982        }
 983
 984        /* Track the skbs that have been sent to the IOVM for XMIT */
 985        skb_queue_head(&devdata->xmitbufhead, skb);
 986
 987        /* update xmt stats */
 988        devdata->net_stats.tx_packets++;
 989        devdata->net_stats.tx_bytes += skb->len;
 990        devdata->chstat.sent_xmit++;
 991
 992        /* check if we have hit the high watermark for netif_stop_queue() */
 993        if (vnic_hit_high_watermark(devdata,
 994                                    devdata->upper_threshold_net_xmits)) {
 995                /* extra NET_XMITs queued over to IOVM - need to wait */
 996                /* stop queue - call netif_wake_queue() after lower threshold */
 997                netif_stop_queue(netdev);
 998                dev_dbg(&netdev->dev,
 999                        "%s busy - invoking iovm flow control\n",
1000                        __func__);
1001                devdata->flow_control_upper_hits++;
1002        }
1003        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1004
1005        /* skb will be freed when we get back NET_XMIT_DONE */
1006        return NETDEV_TX_OK;
1007}
1008
1009/* visornic_get_stats - returns net_stats of the visornic device
1010 * @netdev: netdevice.
1011 *
1012 * Return: Pointer to the net_device_stats struct for the device.
1013 */
1014static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1015{
1016        struct visornic_devdata *devdata = netdev_priv(netdev);
1017
1018        return &devdata->net_stats;
1019}
1020
1021/* visornic_change_mtu - changes mtu of device
1022 * @netdev: netdevice.
1023 * @new_mtu: Value of new mtu.
1024 *
1025 * The device's MTU cannot be changed by system; it must be changed via a
1026 * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1027 * for everything to work. Currently not supported.
1028 *
1029 * Return: -EINVAL.
1030 */
1031static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1032{
1033        return -EINVAL;
1034}
1035
1036/* visornic_set_multi - set visornic device flags
1037 * @netdev: netdevice.
1038 *
1039 * The only flag we currently support is IFF_PROMISC.
1040 */
1041static void visornic_set_multi(struct net_device *netdev)
1042{
1043        struct uiscmdrsp *cmdrsp;
1044        struct visornic_devdata *devdata = netdev_priv(netdev);
1045        int err = 0;
1046
1047        if (devdata->old_flags == netdev->flags)
1048                return;
1049
1050        if ((netdev->flags & IFF_PROMISC) ==
1051            (devdata->old_flags & IFF_PROMISC))
1052                goto out_save_flags;
1053
1054        cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1055        if (!cmdrsp)
1056                return;
1057        cmdrsp->cmdtype = CMD_NET_TYPE;
1058        cmdrsp->net.type = NET_RCV_PROMISC;
1059        cmdrsp->net.enbdis.context = netdev;
1060        cmdrsp->net.enbdis.enable =
1061                netdev->flags & IFF_PROMISC;
1062        err = visorchannel_signalinsert(devdata->dev->visorchannel,
1063                                        IOCHAN_TO_IOPART,
1064                                        cmdrsp);
1065        kfree(cmdrsp);
1066        if (err)
1067                return;
1068
1069out_save_flags:
1070        devdata->old_flags = netdev->flags;
1071}
1072
1073/* visornic_xmit_timeout - request to timeout the xmit
1074 * @netdev: netdevice.
1075 *
1076 * Queue the work and return. Make sure we have not already been informed that
1077 * the IO Partition is gone; if so, we will have already timed-out the xmits.
1078 */
1079static void visornic_xmit_timeout(struct net_device *netdev, unsigned int txqueue)
1080{
1081        struct visornic_devdata *devdata = netdev_priv(netdev);
1082        unsigned long flags;
1083
1084        spin_lock_irqsave(&devdata->priv_lock, flags);
1085        if (devdata->going_away) {
1086                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1087                dev_dbg(&devdata->dev->device,
1088                        "%s aborting because device removal pending\n",
1089                        __func__);
1090                return;
1091        }
1092
1093        /* Ensure that a ServerDown message hasn't been received */
1094        if (!devdata->enabled ||
1095            (devdata->server_down && !devdata->server_change_state)) {
1096                dev_dbg(&netdev->dev, "%s no processing\n",
1097                        __func__);
1098                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1099                return;
1100        }
1101        schedule_work(&devdata->timeout_reset);
1102        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1103}
1104
1105/* repost_return - repost rcv bufs that have come back
1106 * @cmdrsp: IO channel command struct to post.
1107 * @devdata: Visornic devdata for the device.
1108 * @skb: Socket buffer.
1109 * @netdev: netdevice.
1110 *
1111 * Repost rcv buffers that have been returned to us when we are finished
1112 * with them.
1113 *
1114 * Return: 0 for success, negative integer on error.
1115 */
1116static int repost_return(struct uiscmdrsp *cmdrsp,
1117                         struct visornic_devdata *devdata,
1118                         struct sk_buff *skb, struct net_device *netdev)
1119{
1120        struct net_pkt_rcv copy;
1121        int i = 0, cc, numreposted;
1122        int found_skb = 0;
1123        int status = 0;
1124
1125        copy = cmdrsp->net.rcv;
1126        switch (copy.numrcvbufs) {
1127        case 0:
1128                devdata->n_rcv0++;
1129                break;
1130        case 1:
1131                devdata->n_rcv1++;
1132                break;
1133        case 2:
1134                devdata->n_rcv2++;
1135                break;
1136        default:
1137                devdata->n_rcvx++;
1138                break;
1139        }
1140        for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1141                for (i = 0; i < devdata->num_rcv_bufs; i++) {
1142                        if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1143                                continue;
1144
1145                        if ((skb) && devdata->rcvbuf[i] == skb) {
1146                                devdata->found_repost_rcvbuf_cnt++;
1147                                found_skb = 1;
1148                                devdata->repost_found_skb_cnt++;
1149                        }
1150                        devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1151                        if (!devdata->rcvbuf[i]) {
1152                                devdata->num_rcv_bufs_could_not_alloc++;
1153                                devdata->alloc_failed_in_repost_rtn_cnt++;
1154                                status = -ENOMEM;
1155                                break;
1156                        }
1157                        status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1158                        if (status) {
1159                                kfree_skb(devdata->rcvbuf[i]);
1160                                devdata->rcvbuf[i] = NULL;
1161                                break;
1162                        }
1163                        numreposted++;
1164                        break;
1165                }
1166        }
1167        if (numreposted != copy.numrcvbufs) {
1168                devdata->n_repost_deficit++;
1169                status = -EINVAL;
1170        }
1171        if (skb) {
1172                if (found_skb) {
1173                        kfree_skb(skb);
1174                } else {
1175                        status = -EINVAL;
1176                        devdata->bad_rcv_buf++;
1177                }
1178        }
1179        return status;
1180}
1181
1182/* visornic_rx - handle receive packets coming back from IO Partition
1183 * @cmdrsp: Receive packet returned from IO Partition.
1184 *
1185 * Got a receive packet back from the IO Partition; handle it and send it up
1186 * the stack.
1187
1188 * Return: 1 iff an skb was received, otherwise 0.
1189 */
1190static int visornic_rx(struct uiscmdrsp *cmdrsp)
1191{
1192        struct visornic_devdata *devdata;
1193        struct sk_buff *skb, *prev, *curr;
1194        struct net_device *netdev;
1195        int cc, currsize, off;
1196        struct ethhdr *eth;
1197        unsigned long flags;
1198
1199        /* post new rcv buf to the other end using the cmdrsp we have at hand
1200         * post it without holding lock - but we'll use the signal lock to
1201         * synchronize the queue insert the cmdrsp that contains the net.rcv
1202         * is the one we are using to repost, so copy the info we need from it.
1203         */
1204        skb = cmdrsp->net.buf;
1205        netdev = skb->dev;
1206
1207        devdata = netdev_priv(netdev);
1208
1209        spin_lock_irqsave(&devdata->priv_lock, flags);
1210        atomic_dec(&devdata->num_rcvbuf_in_iovm);
1211
1212        /* set length to how much was ACTUALLY received -
1213         * NOTE: rcv_done_len includes actual length of data rcvd
1214         * including ethhdr
1215         */
1216        skb->len = cmdrsp->net.rcv.rcv_done_len;
1217
1218        /* update rcv stats - call it with priv_lock held */
1219        devdata->net_stats.rx_packets++;
1220        devdata->net_stats.rx_bytes += skb->len;
1221
1222        /* test enabled while holding lock */
1223        if (!(devdata->enabled && devdata->enab_dis_acked)) {
1224                /* don't process it unless we're in enable mode and until
1225                 * we've gotten an ACK saying the other end got our RCV enable
1226                 */
1227                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1228                repost_return(cmdrsp, devdata, skb, netdev);
1229                return 0;
1230        }
1231
1232        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1233
1234        /* when skb was allocated, skb->dev, skb->data, skb->len and
1235         * skb->data_len were setup. AND, data has already put into the
1236         * skb (both first frag and in frags pages)
1237         * NOTE: firstfragslen is the amount of data in skb->data and that
1238         * which is not in nr_frags or frag_list. This is now simply
1239         * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1240         * firstfrag & set data_len to show rest see if we have to chain
1241         * frag_list.
1242         */
1243        /* do PRECAUTIONARY check */
1244        if (skb->len > RCVPOST_BUF_SIZE) {
1245                if (cmdrsp->net.rcv.numrcvbufs < 2) {
1246                        if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1247                                dev_err(&devdata->netdev->dev,
1248                                        "repost_return failed");
1249                        return 0;
1250                }
1251                /* length rcvd is greater than firstfrag in this skb rcv buf  */
1252                /* amount in skb->data */
1253                skb->tail += RCVPOST_BUF_SIZE;
1254                /* amount that will be in frag_list */
1255                skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1256        } else {
1257                /* data fits in this skb - no chaining - do
1258                 * PRECAUTIONARY check
1259                 */
1260                /* should be 1 */
1261                if (cmdrsp->net.rcv.numrcvbufs != 1) {
1262                        if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1263                                dev_err(&devdata->netdev->dev,
1264                                        "repost_return failed");
1265                        return 0;
1266                }
1267                skb->tail += skb->len;
1268                /* nothing rcvd in frag_list */
1269                skb->data_len = 0;
1270        }
1271        off = skb_tail_pointer(skb) - skb->data;
1272
1273        /* amount we bumped tail by in the head skb
1274         * it is used to calculate the size of each chained skb below
1275         * it is also used to index into bufline to continue the copy
1276         * (for chansocktwopc)
1277         * if necessary chain the rcv skbs together.
1278         * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1279         * chain the rest to that one.
1280         * - do PRECAUTIONARY check
1281         */
1282        if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1283                if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1284                        dev_err(&devdata->netdev->dev, "repost_return failed");
1285                return 0;
1286        }
1287
1288        if (cmdrsp->net.rcv.numrcvbufs > 1) {
1289                /* chain the various rcv buffers into the skb's frag_list. */
1290                /* Note: off was initialized above  */
1291                for (cc = 1, prev = NULL;
1292                     cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1293                        curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1294                        curr->next = NULL;
1295                        /* start of list- set head */
1296                        if (!prev)
1297                                skb_shinfo(skb)->frag_list = curr;
1298                        else
1299                                prev->next = curr;
1300                        prev = curr;
1301
1302                        /* should we set skb->len and skb->data_len for each
1303                         * buffer being chained??? can't hurt!
1304                         */
1305                        currsize = min(skb->len - off,
1306                                       (unsigned int)RCVPOST_BUF_SIZE);
1307                        curr->len = currsize;
1308                        curr->tail += currsize;
1309                        curr->data_len = 0;
1310                        off += currsize;
1311                }
1312                /* assert skb->len == off */
1313                if (skb->len != off) {
1314                        netdev_err(devdata->netdev,
1315                                   "something wrong; skb->len:%d != off:%d\n",
1316                                   skb->len, off);
1317                }
1318        }
1319
1320        /* set up packet's protocol type using ethernet header - this
1321         * sets up skb->pkt_type & it also PULLS out the eth header
1322         */
1323        skb->protocol = eth_type_trans(skb, netdev);
1324        eth = eth_hdr(skb);
1325        skb->csum = 0;
1326        skb->ip_summed = CHECKSUM_NONE;
1327
1328        do {
1329                /* accept all packets */
1330                if (netdev->flags & IFF_PROMISC)
1331                        break;
1332                if (skb->pkt_type == PACKET_BROADCAST) {
1333                        /* accept all broadcast packets */
1334                        if (netdev->flags & IFF_BROADCAST)
1335                                break;
1336                } else if (skb->pkt_type == PACKET_MULTICAST) {
1337                        if ((netdev->flags & IFF_MULTICAST) &&
1338                            (netdev_mc_count(netdev))) {
1339                                struct netdev_hw_addr *ha;
1340                                int found_mc = 0;
1341
1342                                /* only accept multicast packets that we can
1343                                 * find in our multicast address list
1344                                 */
1345                                netdev_for_each_mc_addr(ha, netdev) {
1346                                        if (ether_addr_equal(eth->h_dest,
1347                                                             ha->addr)) {
1348                                                found_mc = 1;
1349                                                break;
1350                                        }
1351                                }
1352                                /* accept pkt, dest matches a multicast addr */
1353                                if (found_mc)
1354                                        break;
1355                        }
1356                /* accept packet, h_dest must match vnic  mac address */
1357                } else if (skb->pkt_type == PACKET_HOST) {
1358                        break;
1359                } else if (skb->pkt_type == PACKET_OTHERHOST) {
1360                        /* something is not right */
1361                        dev_err(&devdata->netdev->dev,
1362                                "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1363                                netdev->name, eth->h_dest, netdev->dev_addr);
1364                }
1365                /* drop packet - don't forward it up to OS */
1366                devdata->n_rcv_packets_not_accepted++;
1367                repost_return(cmdrsp, devdata, skb, netdev);
1368                return 0;
1369        } while (0);
1370
1371        netif_receive_skb(skb);
1372        /* netif_rx returns various values, but "in practice most drivers
1373         * ignore the return value
1374         */
1375
1376        skb = NULL;
1377        /* whether the packet got dropped or handled, the skb is freed by
1378         * kernel code, so we shouldn't free it. but we should repost a
1379         * new rcv buffer.
1380         */
1381        repost_return(cmdrsp, devdata, skb, netdev);
1382        return 1;
1383}
1384
1385/* devdata_initialize - initialize devdata structure
1386 * @devdata: visornic_devdata structure to initialize.
1387 * @dev:     visorbus_device it belongs to.
1388 *
1389 * Setup initial values for the visornic, based on channel and default values.
1390 *
1391 * Return: A pointer to the devdata structure.
1392 */
1393static struct visornic_devdata *devdata_initialize(
1394                                        struct visornic_devdata *devdata,
1395                                        struct visor_device *dev)
1396{
1397        devdata->dev = dev;
1398        devdata->incarnation_id = get_jiffies_64();
1399        return devdata;
1400}
1401
1402/* devdata_release - free up references in devdata
1403 * @devdata: Struct to clean up.
1404 */
1405static void devdata_release(struct visornic_devdata *devdata)
1406{
1407        kfree(devdata->rcvbuf);
1408        kfree(devdata->cmdrsp_rcv);
1409        kfree(devdata->xmit_cmdrsp);
1410}
1411
1412static const struct net_device_ops visornic_dev_ops = {
1413        .ndo_open = visornic_open,
1414        .ndo_stop = visornic_close,
1415        .ndo_start_xmit = visornic_xmit,
1416        .ndo_get_stats = visornic_get_stats,
1417        .ndo_change_mtu = visornic_change_mtu,
1418        .ndo_tx_timeout = visornic_xmit_timeout,
1419        .ndo_set_rx_mode = visornic_set_multi,
1420};
1421
1422/* DebugFS code */
1423static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1424                                 size_t len, loff_t *offset)
1425{
1426        ssize_t bytes_read = 0;
1427        int str_pos = 0;
1428        struct visornic_devdata *devdata;
1429        struct net_device *dev;
1430        char *vbuf;
1431
1432        if (len > MAX_BUF)
1433                len = MAX_BUF;
1434        vbuf = kzalloc(len, GFP_KERNEL);
1435        if (!vbuf)
1436                return -ENOMEM;
1437
1438        /* for each vnic channel dump out channel specific data */
1439        rcu_read_lock();
1440        for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1441                /* Only consider netdevs that are visornic, and are open */
1442                if (dev->netdev_ops != &visornic_dev_ops ||
1443                    (!netif_queue_stopped(dev)))
1444                        continue;
1445
1446                devdata = netdev_priv(dev);
1447                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1448                                     "netdev = %s (0x%p), MAC Addr %pM\n",
1449                                     dev->name,
1450                                     dev,
1451                                     dev->dev_addr);
1452                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1453                                     "VisorNic Dev Info = 0x%p\n", devdata);
1454                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1455                                     " num_rcv_bufs = %d\n",
1456                                     devdata->num_rcv_bufs);
1457                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1458                                     " max_outstanding_next_xmits = %lu\n",
1459                                    devdata->max_outstanding_net_xmits);
1460                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1461                                     " upper_threshold_net_xmits = %lu\n",
1462                                     devdata->upper_threshold_net_xmits);
1463                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1464                                     " lower_threshold_net_xmits = %lu\n",
1465                                     devdata->lower_threshold_net_xmits);
1466                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1467                                     " queuefullmsg_logged = %d\n",
1468                                     devdata->queuefullmsg_logged);
1469                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1470                                     " chstat.got_rcv = %lu\n",
1471                                     devdata->chstat.got_rcv);
1472                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1473                                     " chstat.got_enbdisack = %lu\n",
1474                                     devdata->chstat.got_enbdisack);
1475                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1476                                     " chstat.got_xmit_done = %lu\n",
1477                                     devdata->chstat.got_xmit_done);
1478                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1479                                     " chstat.xmit_fail = %lu\n",
1480                                     devdata->chstat.xmit_fail);
1481                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1482                                     " chstat.sent_enbdis = %lu\n",
1483                                     devdata->chstat.sent_enbdis);
1484                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1485                                     " chstat.sent_promisc = %lu\n",
1486                                     devdata->chstat.sent_promisc);
1487                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1488                                     " chstat.sent_post = %lu\n",
1489                                     devdata->chstat.sent_post);
1490                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1491                                     " chstat.sent_post_failed = %lu\n",
1492                                     devdata->chstat.sent_post_failed);
1493                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1494                                     " chstat.sent_xmit = %lu\n",
1495                                     devdata->chstat.sent_xmit);
1496                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1497                                     " chstat.reject_count = %lu\n",
1498                                     devdata->chstat.reject_count);
1499                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1500                                     " chstat.extra_rcvbufs_sent = %lu\n",
1501                                     devdata->chstat.extra_rcvbufs_sent);
1502                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1503                                     " n_rcv0 = %lu\n", devdata->n_rcv0);
1504                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505                                     " n_rcv1 = %lu\n", devdata->n_rcv1);
1506                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                     " n_rcv2 = %lu\n", devdata->n_rcv2);
1508                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509                                     " n_rcvx = %lu\n", devdata->n_rcvx);
1510                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511                                     " num_rcvbuf_in_iovm = %d\n",
1512                                     atomic_read(&devdata->num_rcvbuf_in_iovm));
1513                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1514                                     " alloc_failed_in_if_needed_cnt = %lu\n",
1515                                     devdata->alloc_failed_in_if_needed_cnt);
1516                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1517                                     " alloc_failed_in_repost_rtn_cnt = %lu\n",
1518                                     devdata->alloc_failed_in_repost_rtn_cnt);
1519                /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520                 *                   " inner_loop_limit_reached_cnt = %lu\n",
1521                 *                   devdata->inner_loop_limit_reached_cnt);
1522                 */
1523                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524                                     " found_repost_rcvbuf_cnt = %lu\n",
1525                                     devdata->found_repost_rcvbuf_cnt);
1526                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1527                                     " repost_found_skb_cnt = %lu\n",
1528                                     devdata->repost_found_skb_cnt);
1529                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1530                                     " n_repost_deficit = %lu\n",
1531                                     devdata->n_repost_deficit);
1532                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1533                                     " bad_rcv_buf = %lu\n",
1534                                     devdata->bad_rcv_buf);
1535                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1536                                     " n_rcv_packets_not_accepted = %lu\n",
1537                                     devdata->n_rcv_packets_not_accepted);
1538                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1539                                     " interrupts_rcvd = %llu\n",
1540                                     devdata->interrupts_rcvd);
1541                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1542                                     " interrupts_notme = %llu\n",
1543                                     devdata->interrupts_notme);
1544                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1545                                     " interrupts_disabled = %llu\n",
1546                                     devdata->interrupts_disabled);
1547                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1548                                     " busy_cnt = %llu\n",
1549                                     devdata->busy_cnt);
1550                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1551                                     " flow_control_upper_hits = %llu\n",
1552                                     devdata->flow_control_upper_hits);
1553                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1554                                     " flow_control_lower_hits = %llu\n",
1555                                     devdata->flow_control_lower_hits);
1556                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1557                                     " netif_queue = %s\n",
1558                                     netif_queue_stopped(devdata->netdev) ?
1559                                     "stopped" : "running");
1560                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561                                     " xmits_outstanding = %lu\n",
1562                                     devdata_xmits_outstanding(devdata));
1563        }
1564        rcu_read_unlock();
1565        bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1566        kfree(vbuf);
1567        return bytes_read;
1568}
1569
1570static struct dentry *visornic_debugfs_dir;
1571static const struct file_operations debugfs_info_fops = {
1572        .read = info_debugfs_read,
1573};
1574
1575/* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1576 * @devdata: Visornic device.
1577 */
1578static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1579{
1580        int i;
1581        struct net_device *netdev;
1582        struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1583        int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1584        int err;
1585
1586        /* don't do this until vnic is marked ready */
1587        if (!(devdata->enabled && devdata->enab_dis_acked))
1588                return;
1589
1590        netdev = devdata->netdev;
1591        rcv_bufs_allocated = 0;
1592        /* this code is trying to prevent getting stuck here forever,
1593         * but still retry it if you cant allocate them all this time.
1594         */
1595        cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1596        while (cur_num_rcv_bufs_to_alloc > 0) {
1597                cur_num_rcv_bufs_to_alloc--;
1598                for (i = 0; i < devdata->num_rcv_bufs; i++) {
1599                        if (devdata->rcvbuf[i])
1600                                continue;
1601                        devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1602                        if (!devdata->rcvbuf[i]) {
1603                                devdata->alloc_failed_in_if_needed_cnt++;
1604                                break;
1605                        }
1606                        rcv_bufs_allocated++;
1607                        err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1608                        if (err) {
1609                                kfree_skb(devdata->rcvbuf[i]);
1610                                devdata->rcvbuf[i] = NULL;
1611                                break;
1612                        }
1613                        devdata->chstat.extra_rcvbufs_sent++;
1614                }
1615        }
1616        devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1617}
1618
1619/* drain_resp_queue - drains and ignores all messages from the resp queue
1620 * @cmdrsp:  IO channel command response message.
1621 * @devdata: Visornic device to drain.
1622 */
1623static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1624                             struct visornic_devdata *devdata)
1625{
1626        while (!visorchannel_signalremove(devdata->dev->visorchannel,
1627                                          IOCHAN_FROM_IOPART,
1628                                          cmdrsp))
1629                ;
1630}
1631
1632/* service_resp_queue - drain the response queue
1633 * @cmdrsp:  IO channel command response message.
1634 * @devdata: Visornic device to drain.
1635 * @rx_work_done:
1636 * @budget:
1637 *
1638 * Drain the response queue of any responses from the IO Partition. Process the
1639 * responses as we get them.
1640 */
1641static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1642                               struct visornic_devdata *devdata,
1643                               int *rx_work_done, int budget)
1644{
1645        unsigned long flags;
1646        struct net_device *netdev;
1647
1648        while (*rx_work_done < budget) {
1649                /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1650                 * moment
1651                 */
1652                /* queue empty */
1653                if (visorchannel_signalremove(devdata->dev->visorchannel,
1654                                              IOCHAN_FROM_IOPART,
1655                                              cmdrsp))
1656                        break;
1657
1658                switch (cmdrsp->net.type) {
1659                case NET_RCV:
1660                        devdata->chstat.got_rcv++;
1661                        /* process incoming packet */
1662                        *rx_work_done += visornic_rx(cmdrsp);
1663                        break;
1664                case NET_XMIT_DONE:
1665                        spin_lock_irqsave(&devdata->priv_lock, flags);
1666                        devdata->chstat.got_xmit_done++;
1667                        if (cmdrsp->net.xmtdone.xmt_done_result)
1668                                devdata->chstat.xmit_fail++;
1669                        /* only call queue wake if we stopped it */
1670                        netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1671                        /* ASSERT netdev == vnicinfo->netdev; */
1672                        if (netdev == devdata->netdev &&
1673                            netif_queue_stopped(netdev)) {
1674                                /* check if we have crossed the lower watermark
1675                                 * for netif_wake_queue()
1676                                 */
1677                                if (vnic_hit_low_watermark
1678                                    (devdata,
1679                                     devdata->lower_threshold_net_xmits)) {
1680                                        /* enough NET_XMITs completed
1681                                         * so can restart netif queue
1682                                         */
1683                                        netif_wake_queue(netdev);
1684                                        devdata->flow_control_lower_hits++;
1685                                }
1686                        }
1687                        skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1688                        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1689                        kfree_skb(cmdrsp->net.buf);
1690                        break;
1691                case NET_RCV_ENBDIS_ACK:
1692                        devdata->chstat.got_enbdisack++;
1693                        netdev = (struct net_device *)
1694                        cmdrsp->net.enbdis.context;
1695                        spin_lock_irqsave(&devdata->priv_lock, flags);
1696                        devdata->enab_dis_acked = 1;
1697                        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1698
1699                        if (devdata->server_down &&
1700                            devdata->server_change_state) {
1701                                /* Inform Linux that the link is up */
1702                                devdata->server_down = false;
1703                                devdata->server_change_state = false;
1704                                netif_wake_queue(netdev);
1705                                netif_carrier_on(netdev);
1706                        }
1707                        break;
1708                case NET_CONNECT_STATUS:
1709                        netdev = devdata->netdev;
1710                        if (cmdrsp->net.enbdis.enable == 1) {
1711                                spin_lock_irqsave(&devdata->priv_lock, flags);
1712                                devdata->enabled = cmdrsp->net.enbdis.enable;
1713                                spin_unlock_irqrestore(&devdata->priv_lock,
1714                                                       flags);
1715                                netif_wake_queue(netdev);
1716                                netif_carrier_on(netdev);
1717                        } else {
1718                                netif_stop_queue(netdev);
1719                                netif_carrier_off(netdev);
1720                                spin_lock_irqsave(&devdata->priv_lock, flags);
1721                                devdata->enabled = cmdrsp->net.enbdis.enable;
1722                                spin_unlock_irqrestore(&devdata->priv_lock,
1723                                                       flags);
1724                        }
1725                        break;
1726                default:
1727                        break;
1728                }
1729                /* cmdrsp is now available for reuse  */
1730        }
1731}
1732
1733static int visornic_poll(struct napi_struct *napi, int budget)
1734{
1735        struct visornic_devdata *devdata = container_of(napi,
1736                                                        struct visornic_devdata,
1737                                                        napi);
1738        int rx_count = 0;
1739
1740        send_rcv_posts_if_needed(devdata);
1741        service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1742
1743        /* If there aren't any more packets to receive stop the poll */
1744        if (rx_count < budget)
1745                napi_complete_done(napi, rx_count);
1746
1747        return rx_count;
1748}
1749
1750/* visornic_channel_interrupt   - checks the status of the response queue
1751 *
1752 * Main function of the vnic_incoming thread. Periodically check the response
1753 * queue and drain it if needed.
1754 */
1755static void visornic_channel_interrupt(struct visor_device *dev)
1756{
1757        struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1758
1759        if (!devdata)
1760                return;
1761
1762        if (!visorchannel_signalempty(
1763                                   devdata->dev->visorchannel,
1764                                   IOCHAN_FROM_IOPART))
1765                napi_schedule(&devdata->napi);
1766
1767        atomic_set(&devdata->interrupt_rcvd, 0);
1768
1769}
1770
1771/* visornic_probe - probe function for visornic devices
1772 * @dev: The visor device discovered.
1773 *
1774 * Called when visorbus discovers a visornic device on its bus. It creates a new
1775 * visornic ethernet adapter.
1776 *
1777 * Return: 0 on success, or negative integer on error.
1778 */
1779static int visornic_probe(struct visor_device *dev)
1780{
1781        struct visornic_devdata *devdata = NULL;
1782        struct net_device *netdev = NULL;
1783        int err;
1784        int channel_offset = 0;
1785        u64 features;
1786
1787        netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1788        if (!netdev) {
1789                dev_err(&dev->device,
1790                        "%s alloc_etherdev failed\n", __func__);
1791                return -ENOMEM;
1792        }
1793
1794        netdev->netdev_ops = &visornic_dev_ops;
1795        netdev->watchdog_timeo = 5 * HZ;
1796        SET_NETDEV_DEV(netdev, &dev->device);
1797
1798        /* Get MAC address from channel and read it into the device. */
1799        netdev->addr_len = ETH_ALEN;
1800        channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1801        err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1802                                    ETH_ALEN);
1803        if (err < 0) {
1804                dev_err(&dev->device,
1805                        "%s failed to get mac addr from chan (%d)\n",
1806                        __func__, err);
1807                goto cleanup_netdev;
1808        }
1809
1810        devdata = devdata_initialize(netdev_priv(netdev), dev);
1811        if (!devdata) {
1812                dev_err(&dev->device,
1813                        "%s devdata_initialize failed\n", __func__);
1814                err = -ENOMEM;
1815                goto cleanup_netdev;
1816        }
1817        /* don't trust messages laying around in the channel */
1818        drain_resp_queue(devdata->cmdrsp, devdata);
1819
1820        devdata->netdev = netdev;
1821        dev_set_drvdata(&dev->device, devdata);
1822        init_waitqueue_head(&devdata->rsp_queue);
1823        spin_lock_init(&devdata->priv_lock);
1824        /* not yet */
1825        devdata->enabled = 0;
1826        atomic_set(&devdata->usage, 1);
1827
1828        /* Setup rcv bufs */
1829        channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1830        err = visorbus_read_channel(dev, channel_offset,
1831                                    &devdata->num_rcv_bufs, 4);
1832        if (err) {
1833                dev_err(&dev->device,
1834                        "%s failed to get #rcv bufs from chan (%d)\n",
1835                        __func__, err);
1836                goto cleanup_netdev;
1837        }
1838
1839        devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1840                                  sizeof(struct sk_buff *), GFP_KERNEL);
1841        if (!devdata->rcvbuf) {
1842                err = -ENOMEM;
1843                goto cleanup_netdev;
1844        }
1845
1846        /* set the net_xmit outstanding threshold
1847         * always leave two slots open but you should have 3 at a minimum
1848         * note that max_outstanding_net_xmits must be > 0
1849         */
1850        devdata->max_outstanding_net_xmits =
1851                max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1852        devdata->upper_threshold_net_xmits =
1853                max_t(unsigned long,
1854                      2, (devdata->max_outstanding_net_xmits - 1));
1855        devdata->lower_threshold_net_xmits =
1856                max_t(unsigned long,
1857                      1, (devdata->max_outstanding_net_xmits / 2));
1858
1859        skb_queue_head_init(&devdata->xmitbufhead);
1860
1861        /* create a cmdrsp we can use to post and unpost rcv buffers */
1862        devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1863        if (!devdata->cmdrsp_rcv) {
1864                err = -ENOMEM;
1865                goto cleanup_rcvbuf;
1866        }
1867        devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1868        if (!devdata->xmit_cmdrsp) {
1869                err = -ENOMEM;
1870                goto cleanup_cmdrsp_rcv;
1871        }
1872        INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1873        devdata->server_down = false;
1874        devdata->server_change_state = false;
1875
1876        /*set the default mtu */
1877        channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1878        err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1879        if (err) {
1880                dev_err(&dev->device,
1881                        "%s failed to get mtu from chan (%d)\n",
1882                        __func__, err);
1883                goto cleanup_xmit_cmdrsp;
1884        }
1885
1886        /* TODO: Setup Interrupt information */
1887        /* Let's start our threads to get responses */
1888        netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1889
1890        channel_offset = offsetof(struct visor_io_channel,
1891                                  channel_header.features);
1892        err = visorbus_read_channel(dev, channel_offset, &features, 8);
1893        if (err) {
1894                dev_err(&dev->device,
1895                        "%s failed to get features from chan (%d)\n",
1896                        __func__, err);
1897                goto cleanup_napi_add;
1898        }
1899
1900        features |= VISOR_CHANNEL_IS_POLLING;
1901        features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1902        err = visorbus_write_channel(dev, channel_offset, &features, 8);
1903        if (err) {
1904                dev_err(&dev->device,
1905                        "%s failed to set features in chan (%d)\n",
1906                        __func__, err);
1907                goto cleanup_napi_add;
1908        }
1909
1910        /* Note: Interrupts have to be enable before the while
1911         * loop below because the napi routine is responsible for
1912         * setting enab_dis_acked
1913         */
1914        visorbus_enable_channel_interrupts(dev);
1915
1916        err = register_netdev(netdev);
1917        if (err) {
1918                dev_err(&dev->device,
1919                        "%s register_netdev failed (%d)\n", __func__, err);
1920                goto cleanup_napi_add;
1921        }
1922
1923        /* create debug/sysfs directories */
1924        devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1925                                                      visornic_debugfs_dir);
1926        if (!devdata->eth_debugfs_dir) {
1927                dev_err(&dev->device,
1928                        "%s debugfs_create_dir %s failed\n",
1929                        __func__, netdev->name);
1930                err = -ENOMEM;
1931                goto cleanup_register_netdev;
1932        }
1933
1934        dev_info(&dev->device, "%s success netdev=%s\n",
1935                 __func__, netdev->name);
1936        return 0;
1937
1938cleanup_register_netdev:
1939        unregister_netdev(netdev);
1940
1941cleanup_napi_add:
1942        visorbus_disable_channel_interrupts(dev);
1943        netif_napi_del(&devdata->napi);
1944
1945cleanup_xmit_cmdrsp:
1946        kfree(devdata->xmit_cmdrsp);
1947
1948cleanup_cmdrsp_rcv:
1949        kfree(devdata->cmdrsp_rcv);
1950
1951cleanup_rcvbuf:
1952        kfree(devdata->rcvbuf);
1953
1954cleanup_netdev:
1955        free_netdev(netdev);
1956        return err;
1957}
1958
1959/* host_side_disappeared - IO Partition is gone
1960 * @devdata: Device object.
1961 *
1962 * IO partition servicing this device is gone; do cleanup.
1963 */
1964static void host_side_disappeared(struct visornic_devdata *devdata)
1965{
1966        unsigned long flags;
1967
1968        spin_lock_irqsave(&devdata->priv_lock, flags);
1969        /* indicate device destroyed */
1970        devdata->dev = NULL;
1971        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1972}
1973
1974/* visornic_remove - called when visornic dev goes away
1975 * @dev: Visornic device that is being removed.
1976 *
1977 * Called when DEVICE_DESTROY gets called to remove device.
1978 */
1979static void visornic_remove(struct visor_device *dev)
1980{
1981        struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1982        struct net_device *netdev;
1983        unsigned long flags;
1984
1985        if (!devdata) {
1986                dev_err(&dev->device, "%s no devdata\n", __func__);
1987                return;
1988        }
1989        spin_lock_irqsave(&devdata->priv_lock, flags);
1990        if (devdata->going_away) {
1991                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1992                dev_err(&dev->device, "%s already being removed\n", __func__);
1993                return;
1994        }
1995        devdata->going_away = true;
1996        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1997        netdev = devdata->netdev;
1998        if (!netdev) {
1999                dev_err(&dev->device, "%s not net device\n", __func__);
2000                return;
2001        }
2002
2003        /* going_away prevents new items being added to the workqueues */
2004        cancel_work_sync(&devdata->timeout_reset);
2005
2006        debugfs_remove_recursive(devdata->eth_debugfs_dir);
2007        /* this will call visornic_close() */
2008        unregister_netdev(netdev);
2009
2010        visorbus_disable_channel_interrupts(devdata->dev);
2011        netif_napi_del(&devdata->napi);
2012
2013        dev_set_drvdata(&dev->device, NULL);
2014        host_side_disappeared(devdata);
2015        devdata_release(devdata);
2016        free_netdev(netdev);
2017}
2018
2019/* visornic_pause - called when IO Part disappears
2020 * @dev:           Visornic device that is being serviced.
2021 * @complete_func: Call when finished.
2022 *
2023 * Called when the IO Partition has gone down. Need to free up resources and
2024 * wait for IO partition to come back. Mark link as down and don't attempt any
2025 * DMA. When we have freed memory, call the complete_func so that Command knows
2026 * we are done. If we don't call complete_func, the IO Partition will never
2027 * come back.
2028 *
2029 * Return: 0 on success.
2030 */
2031static int visornic_pause(struct visor_device *dev,
2032                          visorbus_state_complete_func complete_func)
2033{
2034        struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2035
2036        visornic_serverdown(devdata, complete_func);
2037        return 0;
2038}
2039
2040/* visornic_resume - called when IO Partition has recovered
2041 * @dev:           Visornic device that is being serviced.
2042 * @compelte_func: Call when finished.
2043 *
2044 * Called when the IO partition has recovered. Re-establish connection to the IO
2045 * Partition and set the link up. Okay to do DMA again.
2046 *
2047 * Returns 0 for success, negative integer on error.
2048 */
2049static int visornic_resume(struct visor_device *dev,
2050                           visorbus_state_complete_func complete_func)
2051{
2052        struct visornic_devdata *devdata;
2053        struct net_device *netdev;
2054        unsigned long flags;
2055
2056        devdata = dev_get_drvdata(&dev->device);
2057        if (!devdata) {
2058                dev_err(&dev->device, "%s no devdata\n", __func__);
2059                return -EINVAL;
2060        }
2061
2062        netdev = devdata->netdev;
2063
2064        spin_lock_irqsave(&devdata->priv_lock, flags);
2065        if (devdata->server_change_state) {
2066                spin_unlock_irqrestore(&devdata->priv_lock, flags);
2067                dev_err(&dev->device, "%s server already changing state\n",
2068                        __func__);
2069                return -EINVAL;
2070        }
2071        if (!devdata->server_down) {
2072                spin_unlock_irqrestore(&devdata->priv_lock, flags);
2073                dev_err(&dev->device, "%s server not down\n", __func__);
2074                complete_func(dev, 0);
2075                return 0;
2076        }
2077        devdata->server_change_state = true;
2078        spin_unlock_irqrestore(&devdata->priv_lock, flags);
2079
2080        /* Must transition channel to ATTACHED state BEFORE
2081         * we can start using the device again.
2082         * TODO: State transitions
2083         */
2084        visorbus_enable_channel_interrupts(dev);
2085
2086        rtnl_lock();
2087        dev_open(netdev, NULL);
2088        rtnl_unlock();
2089
2090        complete_func(dev, 0);
2091        return 0;
2092}
2093
2094/* This is used to tell the visorbus driver which types of visor devices
2095 * we support, and what functions to call when a visor device that we support
2096 * is attached or removed.
2097 */
2098static struct visor_driver visornic_driver = {
2099        .name = "visornic",
2100        .owner = THIS_MODULE,
2101        .channel_types = visornic_channel_types,
2102        .probe = visornic_probe,
2103        .remove = visornic_remove,
2104        .pause = visornic_pause,
2105        .resume = visornic_resume,
2106        .channel_interrupt = visornic_channel_interrupt,
2107};
2108
2109/* visornic_init - init function
2110 *
2111 * Init function for the visornic driver. Do initial driver setup and wait
2112 * for devices.
2113 *
2114 * Return: 0 on success, negative integer on error.
2115 */
2116static int visornic_init(void)
2117{
2118        int err;
2119
2120        visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2121
2122        debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2123                            &debugfs_info_fops);
2124        debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir, NULL,
2125                            &debugfs_enable_ints_fops);
2126
2127        err = visorbus_register_visor_driver(&visornic_driver);
2128        if (err)
2129                debugfs_remove_recursive(visornic_debugfs_dir);
2130
2131        return err;
2132}
2133
2134/* visornic_cleanup - driver exit routine
2135 *
2136 * Unregister driver from the bus and free up memory.
2137 */
2138static void visornic_cleanup(void)
2139{
2140        visorbus_unregister_visor_driver(&visornic_driver);
2141        debugfs_remove_recursive(visornic_debugfs_dir);
2142}
2143
2144module_init(visornic_init);
2145module_exit(visornic_cleanup);
2146
2147MODULE_AUTHOR("Unisys");
2148MODULE_LICENSE("GPL");
2149MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");
2150