linux/drivers/staging/unisys/visornic/visornic_main.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   3 * All rights reserved.
   4 */
   5
   6/* This driver lives in a spar partition, and registers to ethernet io
   7 * channels from the visorbus driver. It creates netdev devices and
   8 * forwards transmit to the IO channel and accepts rcvs from the IO
   9 * Partition via the IO channel.
  10 */
  11
  12#include <linux/debugfs.h>
  13#include <linux/etherdevice.h>
  14#include <linux/module.h>
  15#include <linux/netdevice.h>
  16#include <linux/kthread.h>
  17#include <linux/skbuff.h>
  18#include <linux/rtnetlink.h>
  19#include <linux/visorbus.h>
  20
  21#include "iochannel.h"
  22
  23#define VISORNIC_INFINITE_RSP_WAIT 0
  24
  25/* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  26 *         = 163840 bytes
  27 */
  28#define MAX_BUF 163840
  29#define NAPI_WEIGHT 64
  30
  31/* GUIDS for director channel type supported by this driver.  */
  32/* {8cd5994d-c58e-11da-95a9-00e08161165f} */
  33#define VISOR_VNIC_CHANNEL_GUID \
  34        GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
  35                0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
  36#define VISOR_VNIC_CHANNEL_GUID_STR \
  37        "8cd5994d-c58e-11da-95a9-00e08161165f"
  38
  39static struct visor_channeltype_descriptor visornic_channel_types[] = {
  40        /* Note that the only channel type we expect to be reported by the
  41         * bus driver is the VISOR_VNIC channel.
  42         */
  43        { VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
  44          VISOR_VNIC_CHANNEL_VERSIONID },
  45        {}
  46};
  47MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  48/* FIXME XXX: This next line of code must be fixed and removed before
  49 * acceptance into the 'normal' part of the kernel.  It is only here as a place
  50 * holder to get module autoloading functionality working for visorbus.  Code
  51 * must be added to scripts/mode/file2alias.c, etc., to get this working
  52 * properly.
  53 */
  54MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
  55
  56struct chanstat {
  57        unsigned long got_rcv;
  58        unsigned long got_enbdisack;
  59        unsigned long got_xmit_done;
  60        unsigned long xmit_fail;
  61        unsigned long sent_enbdis;
  62        unsigned long sent_promisc;
  63        unsigned long sent_post;
  64        unsigned long sent_post_failed;
  65        unsigned long sent_xmit;
  66        unsigned long reject_count;
  67        unsigned long extra_rcvbufs_sent;
  68};
  69
  70/* struct visornic_devdata
  71 * @enabled:                        0 disabled 1 enabled to receive.
  72 * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
  73 * @struct *dev:
  74 * @struct *netdev:
  75 * @struct net_stats:
  76 * @interrupt_rcvd:
  77 * @rsp_queue:
  78 * @struct **rcvbuf:
  79 * @incarnation_id:                 incarnation_id lets IOPART know about
  80 *                                  re-birth.
  81 * @old_flags:                      flags as they were prior to
  82 *                                  set_multicast_list.
  83 * @usage:                          count of users.
  84 * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
  85 * @num_rcv_bufs_could_not_alloc:
  86 * @num_rcvbuf_in_iovm:
  87 * @alloc_failed_in_if_needed_cnt:
  88 * @alloc_failed_in_repost_rtn_cnt:
  89 * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
  90 *                                  - should never hit this.
  91 * @upper_threshold_net_xmits:      high water mark for calling
  92 *                                  netif_stop_queue().
  93 * @lower_threshold_net_xmits:      high water mark for calling
  94 *                                  netif_wake_queue().
  95 * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
  96 *                                  sent to the IOPART end.
  97 * @server_down_complete_func:
  98 * @struct timeout_reset:
  99 * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
 100 *                                  buffers.
 101 * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
 102 *                                  active xmit at a time.
 103 * @server_down:                    IOPART is down.
 104 * @server_change_state:            Processing SERVER_CHANGESTATE msg.
 105 * @going_away:                     device is being torn down.
 106 * @struct *eth_debugfs_dir:
 107 * @interrupts_rcvd:
 108 * @interrupts_notme:
 109 * @interrupts_disabled:
 110 * @busy_cnt:
 111 * @priv_lock:                      spinlock to access devdata structures.
 112 * @flow_control_upper_hits:
 113 * @flow_control_lower_hits:
 114 * @n_rcv0:                         # rcvs of 0 buffers.
 115 * @n_rcv1:                         # rcvs of 1 buffers.
 116 * @n_rcv2:                         # rcvs of 2 buffers.
 117 * @n_rcvx:                         # rcvs of >2 buffers.
 118 * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
 119 * @repost_found_skb_cnt:           # of found the skb.
 120 * @n_repost_deficit:               # of lost rcv buffers.
 121 * @bad_rcv_buf:                    # of unknown rcv skb not freed.
 122 * @n_rcv_packets_not_accepted:     # bogs rcv packets.
 123 * @queuefullmsg_logged:
 124 * @struct chstat:
 125 * @struct irq_poll_timer:
 126 * @struct napi:
 127 * @struct cmdrsp:
 128 */
 129struct visornic_devdata {
 130        unsigned short enabled;
 131        unsigned short enab_dis_acked;
 132
 133        struct visor_device *dev;
 134        struct net_device *netdev;
 135        struct net_device_stats net_stats;
 136        atomic_t interrupt_rcvd;
 137        wait_queue_head_t rsp_queue;
 138        struct sk_buff **rcvbuf;
 139        u64 incarnation_id;
 140        unsigned short old_flags;
 141        atomic_t usage;
 142
 143        int num_rcv_bufs;
 144        int num_rcv_bufs_could_not_alloc;
 145        atomic_t num_rcvbuf_in_iovm;
 146        unsigned long alloc_failed_in_if_needed_cnt;
 147        unsigned long alloc_failed_in_repost_rtn_cnt;
 148
 149        unsigned long max_outstanding_net_xmits;
 150        unsigned long upper_threshold_net_xmits;
 151        unsigned long lower_threshold_net_xmits;
 152        struct sk_buff_head xmitbufhead;
 153
 154        visorbus_state_complete_func server_down_complete_func;
 155        struct work_struct timeout_reset;
 156        struct uiscmdrsp *cmdrsp_rcv;
 157        struct uiscmdrsp *xmit_cmdrsp;
 158        bool server_down;
 159        bool server_change_state;
 160        bool going_away;
 161        struct dentry *eth_debugfs_dir;
 162        u64 interrupts_rcvd;
 163        u64 interrupts_notme;
 164        u64 interrupts_disabled;
 165        u64 busy_cnt;
 166        /* spinlock to access devdata structures. */
 167        spinlock_t priv_lock;
 168
 169        /* flow control counter */
 170        u64 flow_control_upper_hits;
 171        u64 flow_control_lower_hits;
 172
 173        /* debug counters */
 174        unsigned long n_rcv0;
 175        unsigned long n_rcv1;
 176        unsigned long n_rcv2;
 177        unsigned long n_rcvx;
 178        unsigned long found_repost_rcvbuf_cnt;
 179        unsigned long repost_found_skb_cnt;
 180        unsigned long n_repost_deficit;
 181        unsigned long bad_rcv_buf;
 182        unsigned long n_rcv_packets_not_accepted;
 183
 184        int queuefullmsg_logged;
 185        struct chanstat chstat;
 186        struct timer_list irq_poll_timer;
 187        struct napi_struct napi;
 188        struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 189};
 190
 191/* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
 192static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
 193                                u16 index, u16 max_pi_arr_entries,
 194                                struct phys_info pi_arr[])
 195{
 196        u16 i, len, firstlen;
 197
 198        firstlen = PI_PAGE_SIZE - inp_off;
 199        if (inp_len <= firstlen) {
 200                /* The input entry spans only one page - add as is. */
 201                if (index >= max_pi_arr_entries)
 202                        return 0;
 203                pi_arr[index].pi_pfn = inp_pfn;
 204                pi_arr[index].pi_off = (u16)inp_off;
 205                pi_arr[index].pi_len = (u16)inp_len;
 206                return index + 1;
 207        }
 208
 209        /* This entry spans multiple pages. */
 210        for (len = inp_len, i = 0; len;
 211                len -= pi_arr[index + i].pi_len, i++) {
 212                if (index + i >= max_pi_arr_entries)
 213                        return 0;
 214                pi_arr[index + i].pi_pfn = inp_pfn + i;
 215                if (i == 0) {
 216                        pi_arr[index].pi_off = inp_off;
 217                        pi_arr[index].pi_len = firstlen;
 218                } else {
 219                        pi_arr[index + i].pi_off = 0;
 220                        pi_arr[index + i].pi_len = min_t(u16, len,
 221                                                         PI_PAGE_SIZE);
 222                }
 223        }
 224        return index + i;
 225}
 226
 227/* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
 228 *                                 array that the IOPART understands
 229 * @skb:          Skbuff that we are pulling the frags from.
 230 * @firstfraglen: Length of first fragment in skb.
 231 * @frags_max:    Max len of frags array.
 232 * @frags:        Frags array filled in on output.
 233 *
 234 * Return: Positive integer indicating number of entries filled in frags on
 235 *         success, negative integer on error.
 236 */
 237static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
 238                                         unsigned int firstfraglen,
 239                                         unsigned int frags_max,
 240                                         struct phys_info frags[])
 241{
 242        unsigned int count = 0, frag, size, offset = 0, numfrags;
 243        unsigned int total_count;
 244
 245        numfrags = skb_shinfo(skb)->nr_frags;
 246
 247        /* Compute the number of fragments this skb has, and if its more than
 248         * frag array can hold, linearize the skb
 249         */
 250        total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 251        if (firstfraglen % PI_PAGE_SIZE)
 252                total_count++;
 253
 254        if (total_count > frags_max) {
 255                if (skb_linearize(skb))
 256                        return -EINVAL;
 257                numfrags = skb_shinfo(skb)->nr_frags;
 258                firstfraglen = 0;
 259        }
 260
 261        while (firstfraglen) {
 262                if (count == frags_max)
 263                        return -EINVAL;
 264
 265                frags[count].pi_pfn =
 266                        page_to_pfn(virt_to_page(skb->data + offset));
 267                frags[count].pi_off =
 268                        (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 269                size = min_t(unsigned int, firstfraglen,
 270                             PI_PAGE_SIZE - frags[count].pi_off);
 271
 272                /* can take smallest of firstfraglen (what's left) OR
 273                 * bytes left in the page
 274                 */
 275                frags[count].pi_len = size;
 276                firstfraglen -= size;
 277                offset += size;
 278                count++;
 279        }
 280        if (numfrags) {
 281                if ((count + numfrags) > frags_max)
 282                        return -EINVAL;
 283
 284                for (frag = 0; frag < numfrags; frag++) {
 285                        count = add_physinfo_entries(page_to_pfn(
 286                                  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 287                                  skb_shinfo(skb)->frags[frag].page_offset,
 288                                  skb_shinfo(skb)->frags[frag].size, count,
 289                                  frags_max, frags);
 290                        /* add_physinfo_entries only returns
 291                         * zero if the frags array is out of room
 292                         * That should never happen because we
 293                         * fail above, if count+numfrags > frags_max.
 294                         */
 295                        if (!count)
 296                                return -EINVAL;
 297                }
 298        }
 299        if (skb_shinfo(skb)->frag_list) {
 300                struct sk_buff *skbinlist;
 301                int c;
 302
 303                for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 304                     skbinlist = skbinlist->next) {
 305                        c = visor_copy_fragsinfo_from_skb(skbinlist,
 306                                                          skbinlist->len -
 307                                                          skbinlist->data_len,
 308                                                          frags_max - count,
 309                                                          &frags[count]);
 310                        if (c < 0)
 311                                return c;
 312                        count += c;
 313                }
 314        }
 315        return count;
 316}
 317
 318static ssize_t enable_ints_write(struct file *file,
 319                                 const char __user *buffer,
 320                                 size_t count, loff_t *ppos)
 321{
 322        /* Don't want to break ABI here by having a debugfs
 323         * file that no longer exists or is writable, so
 324         * lets just make this a vestigual function
 325         */
 326        return count;
 327}
 328
 329static const struct file_operations debugfs_enable_ints_fops = {
 330        .write = enable_ints_write,
 331};
 332
 333/* visornic_serverdown_complete - pause device following IOPART going down
 334 * @devdata: Device managed by IOPART.
 335 *
 336 * The IO partition has gone down, and we need to do some cleanup for when it
 337 * comes back. Treat the IO partition as the link being down.
 338 */
 339static void visornic_serverdown_complete(struct visornic_devdata *devdata)
 340{
 341        struct net_device *netdev = devdata->netdev;
 342
 343        /* Stop polling for interrupts */
 344        del_timer_sync(&devdata->irq_poll_timer);
 345
 346        rtnl_lock();
 347        dev_close(netdev);
 348        rtnl_unlock();
 349
 350        atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 351        devdata->chstat.sent_xmit = 0;
 352        devdata->chstat.got_xmit_done = 0;
 353
 354        if (devdata->server_down_complete_func)
 355                (*devdata->server_down_complete_func)(devdata->dev, 0);
 356
 357        devdata->server_down = true;
 358        devdata->server_change_state = false;
 359        devdata->server_down_complete_func = NULL;
 360}
 361
 362/* visornic_serverdown - Command has notified us that IOPART is down
 363 * @devdata:       Device managed by IOPART.
 364 * @complete_func: Function to call when finished.
 365 *
 366 * Schedule the work needed to handle the server down request. Make sure we
 367 * haven't already handled the server change state event.
 368 *
 369 * Return: 0 if we scheduled the work, negative integer on error.
 370 */
 371static int visornic_serverdown(struct visornic_devdata *devdata,
 372                               visorbus_state_complete_func complete_func)
 373{
 374        unsigned long flags;
 375        int err;
 376
 377        spin_lock_irqsave(&devdata->priv_lock, flags);
 378        if (devdata->server_change_state) {
 379                dev_dbg(&devdata->dev->device, "%s changing state\n",
 380                        __func__);
 381                err = -EINVAL;
 382                goto err_unlock;
 383        }
 384        if (devdata->server_down) {
 385                dev_dbg(&devdata->dev->device, "%s already down\n",
 386                        __func__);
 387                err = -EINVAL;
 388                goto err_unlock;
 389        }
 390        if (devdata->going_away) {
 391                dev_dbg(&devdata->dev->device,
 392                        "%s aborting because device removal pending\n",
 393                        __func__);
 394                err = -ENODEV;
 395                goto err_unlock;
 396        }
 397        devdata->server_change_state = true;
 398        devdata->server_down_complete_func = complete_func;
 399        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 400
 401        visornic_serverdown_complete(devdata);
 402        return 0;
 403
 404err_unlock:
 405        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 406        return err;
 407}
 408
 409/* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
 410 * @netdev: Network adapter the rcv bufs are attached too.
 411 *
 412 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 413 * so that it can write rcv data into our memory space.
 414 *
 415 * Return: Pointer to sk_buff.
 416 */
 417static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
 418{
 419        struct sk_buff *skb;
 420
 421        /* NOTE: the first fragment in each rcv buffer is pointed to by
 422         * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 423         * in length, so the first frag is large enough to hold 1514.
 424         */
 425        skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 426        if (!skb)
 427                return NULL;
 428        skb->dev = netdev;
 429        /* current value of mtu doesn't come into play here; large
 430         * packets will just end up using multiple rcv buffers all of
 431         * same size.
 432         */
 433        skb->len = RCVPOST_BUF_SIZE;
 434        /* alloc_skb already zeroes it out for clarification. */
 435        skb->data_len = 0;
 436        return skb;
 437}
 438
 439/* post_skb - post a skb to the IO Partition
 440 * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
 441 * @devdata: visornic_devdata to post the skb to.
 442 * @skb:     Skb to give to the IO partition.
 443 *
 444 * Return: 0 on success, negative integer on error.
 445 */
 446static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
 447                    struct sk_buff *skb)
 448{
 449        int err;
 450
 451        cmdrsp->net.buf = skb;
 452        cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 453        cmdrsp->net.rcvpost.frag.pi_off =
 454                (unsigned long)skb->data & PI_PAGE_MASK;
 455        cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 456        cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 457
 458        if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
 459                return -EINVAL;
 460
 461        cmdrsp->net.type = NET_RCV_POST;
 462        cmdrsp->cmdtype = CMD_NET_TYPE;
 463        err = visorchannel_signalinsert(devdata->dev->visorchannel,
 464                                        IOCHAN_TO_IOPART,
 465                                        cmdrsp);
 466        if (err) {
 467                devdata->chstat.sent_post_failed++;
 468                return err;
 469        }
 470
 471        atomic_inc(&devdata->num_rcvbuf_in_iovm);
 472        devdata->chstat.sent_post++;
 473        return 0;
 474}
 475
 476/* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
 477 * @netdev:  Netdevice we are enabling/disabling, used as context return value.
 478 * @state:   Enable = 1/disable = 0.
 479 * @devdata: Visornic device we are enabling/disabling.
 480 *
 481 * Send the enable/disable message to the IO Partition.
 482 *
 483 * Return: 0 on success, negative integer on error.
 484 */
 485static int send_enbdis(struct net_device *netdev, int state,
 486                       struct visornic_devdata *devdata)
 487{
 488        int err;
 489
 490        devdata->cmdrsp_rcv->net.enbdis.enable = state;
 491        devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 492        devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 493        devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 494        err = visorchannel_signalinsert(devdata->dev->visorchannel,
 495                                        IOCHAN_TO_IOPART,
 496                                        devdata->cmdrsp_rcv);
 497        if (err)
 498                return err;
 499        devdata->chstat.sent_enbdis++;
 500        return 0;
 501}
 502
 503/* visornic_disable_with_timeout - disable network adapter
 504 * @netdev:  netdevice to disable.
 505 * @timeout: Timeout to wait for disable.
 506 *
 507 * Disable the network adapter and inform the IO Partition that we are disabled.
 508 * Reclaim memory from rcv bufs.
 509 *
 510 * Return: 0 on success, negative integer on failure of IO Partition responding.
 511 */
 512static int visornic_disable_with_timeout(struct net_device *netdev,
 513                                         const int timeout)
 514{
 515        struct visornic_devdata *devdata = netdev_priv(netdev);
 516        int i;
 517        unsigned long flags;
 518        int wait = 0;
 519        int err;
 520
 521        /* send a msg telling the other end we are stopping incoming pkts */
 522        spin_lock_irqsave(&devdata->priv_lock, flags);
 523        devdata->enabled = 0;
 524        /* must wait for ack */
 525        devdata->enab_dis_acked = 0;
 526        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 527
 528        /* send disable and wait for ack -- don't hold lock when sending
 529         * disable because if the queue is full, insert might sleep.
 530         * If an error occurs, don't wait for the timeout.
 531         */
 532        err = send_enbdis(netdev, 0, devdata);
 533        if (err)
 534                return err;
 535
 536        /* wait for ack to arrive before we try to free rcv buffers
 537         * NOTE: the other end automatically unposts the rcv buffers when
 538         * when it gets a disable.
 539         */
 540        spin_lock_irqsave(&devdata->priv_lock, flags);
 541        while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 542               (wait < timeout)) {
 543                if (devdata->enab_dis_acked)
 544                        break;
 545                if (devdata->server_down || devdata->server_change_state) {
 546                        dev_dbg(&netdev->dev, "%s server went away\n",
 547                                __func__);
 548                        break;
 549                }
 550                set_current_state(TASK_INTERRUPTIBLE);
 551                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 552                wait += schedule_timeout(msecs_to_jiffies(10));
 553                spin_lock_irqsave(&devdata->priv_lock, flags);
 554        }
 555
 556        /* Wait for usage to go to 1 (no other users) before freeing
 557         * rcv buffers
 558         */
 559        if (atomic_read(&devdata->usage) > 1) {
 560                while (1) {
 561                        set_current_state(TASK_INTERRUPTIBLE);
 562                        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 563                        schedule_timeout(msecs_to_jiffies(10));
 564                        spin_lock_irqsave(&devdata->priv_lock, flags);
 565                        if (atomic_read(&devdata->usage))
 566                                break;
 567                }
 568        }
 569        /* we've set enabled to 0, so we can give up the lock. */
 570        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 571
 572        /* stop the transmit queue so nothing more can be transmitted */
 573        netif_stop_queue(netdev);
 574
 575        napi_disable(&devdata->napi);
 576
 577        skb_queue_purge(&devdata->xmitbufhead);
 578
 579        /* Free rcv buffers - other end has automatically unposed them on
 580         * disable
 581         */
 582        for (i = 0; i < devdata->num_rcv_bufs; i++) {
 583                if (devdata->rcvbuf[i]) {
 584                        kfree_skb(devdata->rcvbuf[i]);
 585                        devdata->rcvbuf[i] = NULL;
 586                }
 587        }
 588
 589        return 0;
 590}
 591
 592/* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
 593 * @netdev:  struct netdevice.
 594 * @devdata: visornic_devdata.
 595 *
 596 * Allocate rcv buffers and post them to the IO Partition.
 597 *
 598 * Return: 0 on success, negative integer on failure.
 599 */
 600static int init_rcv_bufs(struct net_device *netdev,
 601                         struct visornic_devdata *devdata)
 602{
 603        int i, j, count, err;
 604
 605        /* allocate fixed number of receive buffers to post to uisnic
 606         * post receive buffers after we've allocated a required amount
 607         */
 608        for (i = 0; i < devdata->num_rcv_bufs; i++) {
 609                devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 610                /* if we failed to allocate one let us stop */
 611                if (!devdata->rcvbuf[i])
 612                        break;
 613        }
 614        /* couldn't even allocate one -- bail out */
 615        if (i == 0)
 616                return -ENOMEM;
 617        count = i;
 618
 619        /* Ensure we can alloc 2/3rd of the requested number of buffers.
 620         * 2/3 is an arbitrary choice; used also in ndis init.c
 621         */
 622        if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 623                /* free receive buffers we did alloc and then bail out */
 624                for (i = 0; i < count; i++) {
 625                        kfree_skb(devdata->rcvbuf[i]);
 626                        devdata->rcvbuf[i] = NULL;
 627                }
 628                return -ENOMEM;
 629        }
 630
 631        /* post receive buffers to receive incoming input - without holding
 632         * lock - we've not enabled nor started the queue so there shouldn't
 633         * be any rcv or xmit activity
 634         */
 635        for (i = 0; i < count; i++) {
 636                err = post_skb(devdata->cmdrsp_rcv, devdata,
 637                               devdata->rcvbuf[i]);
 638                if (!err)
 639                        continue;
 640
 641                /* Error handling -
 642                 * If we posted at least one skb, we should return success,
 643                 * but need to free the resources that we have not successfully
 644                 * posted.
 645                 */
 646                for (j = i; j < count; j++) {
 647                        kfree_skb(devdata->rcvbuf[j]);
 648                        devdata->rcvbuf[j] = NULL;
 649                }
 650                if (i == 0)
 651                        return err;
 652                break;
 653        }
 654
 655        return 0;
 656}
 657
 658/* visornic_enable_with_timeout - send enable to IO Partition
 659 * @netdev:  struct net_device.
 660 * @timeout: Time to wait for the ACK from the enable.
 661 *
 662 * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
 663 * defined in msecs (timeout of 0 specifies infinite wait).
 664 *
 665 * Return: 0 on success, negative integer on failure.
 666 */
 667static int visornic_enable_with_timeout(struct net_device *netdev,
 668                                        const int timeout)
 669{
 670        int err = 0;
 671        struct visornic_devdata *devdata = netdev_priv(netdev);
 672        unsigned long flags;
 673        int wait = 0;
 674
 675        napi_enable(&devdata->napi);
 676
 677        /* NOTE: the other end automatically unposts the rcv buffers when it
 678         * gets a disable.
 679         */
 680        err = init_rcv_bufs(netdev, devdata);
 681        if (err < 0) {
 682                dev_err(&netdev->dev,
 683                        "%s failed to init rcv bufs\n", __func__);
 684                return err;
 685        }
 686
 687        spin_lock_irqsave(&devdata->priv_lock, flags);
 688        devdata->enabled = 1;
 689        devdata->enab_dis_acked = 0;
 690
 691        /* now we're ready, let's send an ENB to uisnic but until we get
 692         * an ACK back from uisnic, we'll drop the packets
 693         */
 694        devdata->n_rcv_packets_not_accepted = 0;
 695        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 696
 697        /* send enable and wait for ack -- don't hold lock when sending enable
 698         * because if the queue is full, insert might sleep. If an error
 699         * occurs error out.
 700         */
 701        err = send_enbdis(netdev, 1, devdata);
 702        if (err)
 703                return err;
 704
 705        spin_lock_irqsave(&devdata->priv_lock, flags);
 706        while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 707               (wait < timeout)) {
 708                if (devdata->enab_dis_acked)
 709                        break;
 710                if (devdata->server_down || devdata->server_change_state) {
 711                        dev_dbg(&netdev->dev, "%s server went away\n",
 712                                __func__);
 713                        break;
 714                }
 715                set_current_state(TASK_INTERRUPTIBLE);
 716                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 717                wait += schedule_timeout(msecs_to_jiffies(10));
 718                spin_lock_irqsave(&devdata->priv_lock, flags);
 719        }
 720
 721        spin_unlock_irqrestore(&devdata->priv_lock, flags);
 722
 723        if (!devdata->enab_dis_acked) {
 724                dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 725                return -EIO;
 726        }
 727
 728        netif_start_queue(netdev);
 729        return 0;
 730}
 731
 732/* visornic_timeout_reset - handle xmit timeout resets
 733 * @work: Work item that scheduled the work.
 734 *
 735 * Transmit timeouts are typically handled by resetting the device for our
 736 * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
 737 * respond, we will trigger a serverdown.
 738 */
 739static void visornic_timeout_reset(struct work_struct *work)
 740{
 741        struct visornic_devdata *devdata;
 742        struct net_device *netdev;
 743        int response = 0;
 744
 745        devdata = container_of(work, struct visornic_devdata, timeout_reset);
 746        netdev = devdata->netdev;
 747
 748        rtnl_lock();
 749        if (!netif_running(netdev)) {
 750                rtnl_unlock();
 751                return;
 752        }
 753
 754        response = visornic_disable_with_timeout(netdev,
 755                                                 VISORNIC_INFINITE_RSP_WAIT);
 756        if (response)
 757                goto call_serverdown;
 758
 759        response = visornic_enable_with_timeout(netdev,
 760                                                VISORNIC_INFINITE_RSP_WAIT);
 761        if (response)
 762                goto call_serverdown;
 763
 764        rtnl_unlock();
 765
 766        return;
 767
 768call_serverdown:
 769        visornic_serverdown(devdata, NULL);
 770        rtnl_unlock();
 771}
 772
 773/* visornic_open - enable the visornic device and mark the queue started
 774 * @netdev: netdevice to start.
 775 *
 776 * Enable the device and start the transmit queue.
 777 *
 778 * Return: 0 on success.
 779 */
 780static int visornic_open(struct net_device *netdev)
 781{
 782        visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 783        return 0;
 784}
 785
 786/* visornic_close - disables the visornic device and stops the queues
 787 * @netdev: netdevice to stop.
 788 *
 789 * Disable the device and stop the transmit queue.
 790 *
 791 * Return 0 on success.
 792 */
 793static int visornic_close(struct net_device *netdev)
 794{
 795        visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 796        return 0;
 797}
 798
 799/* devdata_xmits_outstanding - compute outstanding xmits
 800 * @devdata: visornic_devdata for device
 801 *
 802 * Return: Long integer representing the number of outstanding xmits.
 803 */
 804static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 805{
 806        if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 807                return devdata->chstat.sent_xmit -
 808                        devdata->chstat.got_xmit_done;
 809        return (ULONG_MAX - devdata->chstat.got_xmit_done
 810                + devdata->chstat.sent_xmit + 1);
 811}
 812
 813/* vnic_hit_high_watermark
 814 * @devdata:        Indicates visornic device we are checking.
 815 * @high_watermark: Max num of unacked xmits we will tolerate before we will
 816 *                  start throttling.
 817 *
 818 * Return: True iff the number of unacked xmits sent to the IO Partition is >=
 819 *         high_watermark. False otherwise.
 820 */
 821static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 822                                    ulong high_watermark)
 823{
 824        return (devdata_xmits_outstanding(devdata) >= high_watermark);
 825}
 826
 827/* vnic_hit_low_watermark
 828 * @devdata:       Indicates visornic device we are checking.
 829 * @low_watermark: We will wait until the num of unacked xmits drops to this
 830 *                 value or lower before we start transmitting again.
 831 *
 832 * Return: True iff the number of unacked xmits sent to the IO Partition is <=
 833 *         low_watermark.
 834 */
 835static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 836                                   ulong low_watermark)
 837{
 838        return (devdata_xmits_outstanding(devdata) <= low_watermark);
 839}
 840
 841/* visornic_xmit - send a packet to the IO Partition
 842 * @skb:    Packet to be sent.
 843 * @netdev: Net device the packet is being sent from.
 844 *
 845 * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
 846 * the XMIT command to the IO Partition for processing. This function is
 847 * protected from concurrent calls by a spinlock xmit_lock in the net_device
 848 * struct. As soon as the function returns, it can be called again.
 849 *
 850 * Return: NETDEV_TX_OK.
 851 */
 852static int visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 853{
 854        struct visornic_devdata *devdata;
 855        int len, firstfraglen, padlen;
 856        struct uiscmdrsp *cmdrsp = NULL;
 857        unsigned long flags;
 858        int err;
 859
 860        devdata = netdev_priv(netdev);
 861        spin_lock_irqsave(&devdata->priv_lock, flags);
 862
 863        if (netif_queue_stopped(netdev) || devdata->server_down ||
 864            devdata->server_change_state) {
 865                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 866                devdata->busy_cnt++;
 867                dev_dbg(&netdev->dev,
 868                        "%s busy - queue stopped\n", __func__);
 869                kfree_skb(skb);
 870                return NETDEV_TX_OK;
 871        }
 872
 873        /* sk_buff struct is used to host network data throughout all the
 874         * linux network subsystems
 875         */
 876        len = skb->len;
 877
 878        /* skb->len is the FULL length of data (including fragmentary portion)
 879         * skb->data_len is the length of the fragment portion in frags
 880         * skb->len - skb->data_len is size of the 1st fragment in skb->data
 881         * calculate the length of the first fragment that skb->data is
 882         * pointing to
 883         */
 884        firstfraglen = skb->len - skb->data_len;
 885        if (firstfraglen < ETH_HLEN) {
 886                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 887                devdata->busy_cnt++;
 888                dev_err(&netdev->dev,
 889                        "%s busy - first frag too small (%d)\n",
 890                        __func__, firstfraglen);
 891                kfree_skb(skb);
 892                return NETDEV_TX_OK;
 893        }
 894
 895        if (len < ETH_MIN_PACKET_SIZE &&
 896            ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 897                /* pad the packet out to minimum size */
 898                padlen = ETH_MIN_PACKET_SIZE - len;
 899                memset(&skb->data[len], 0, padlen);
 900                skb->tail += padlen;
 901                skb->len += padlen;
 902                len += padlen;
 903                firstfraglen += padlen;
 904        }
 905
 906        cmdrsp = devdata->xmit_cmdrsp;
 907        /* clear cmdrsp */
 908        memset(cmdrsp, 0, SIZEOF_CMDRSP);
 909        cmdrsp->net.type = NET_XMIT;
 910        cmdrsp->cmdtype = CMD_NET_TYPE;
 911
 912        /* save the pointer to skb -- we'll need it for completion */
 913        cmdrsp->net.buf = skb;
 914
 915        if (vnic_hit_high_watermark(devdata,
 916                                    devdata->max_outstanding_net_xmits)) {
 917                /* extra NET_XMITs queued over to IOVM - need to wait */
 918                devdata->chstat.reject_count++;
 919                if (!devdata->queuefullmsg_logged &&
 920                    ((devdata->chstat.reject_count & 0x3ff) == 1))
 921                        devdata->queuefullmsg_logged = 1;
 922                netif_stop_queue(netdev);
 923                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 924                devdata->busy_cnt++;
 925                dev_dbg(&netdev->dev,
 926                        "%s busy - waiting for iovm to catch up\n",
 927                        __func__);
 928                kfree_skb(skb);
 929                return NETDEV_TX_OK;
 930        }
 931        if (devdata->queuefullmsg_logged)
 932                devdata->queuefullmsg_logged = 0;
 933
 934        if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 935                cmdrsp->net.xmt.lincsum.valid = 1;
 936                cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 937                if (skb_transport_header(skb) > skb->data) {
 938                        cmdrsp->net.xmt.lincsum.hrawoff =
 939                                skb_transport_header(skb) - skb->data;
 940                        cmdrsp->net.xmt.lincsum.hrawoff = 1;
 941                }
 942                if (skb_network_header(skb) > skb->data) {
 943                        cmdrsp->net.xmt.lincsum.nhrawoff =
 944                                skb_network_header(skb) - skb->data;
 945                        cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 946                }
 947                cmdrsp->net.xmt.lincsum.csum = skb->csum;
 948        } else {
 949                cmdrsp->net.xmt.lincsum.valid = 0;
 950        }
 951
 952        /* save off the length of the entire data packet */
 953        cmdrsp->net.xmt.len = len;
 954
 955        /* copy ethernet header from first frag into ocmdrsp
 956         * - everything else will be pass in frags & DMA'ed
 957         */
 958        memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
 959
 960        /* copy frags info - from skb->data we need to only provide access
 961         * beyond eth header
 962         */
 963        cmdrsp->net.xmt.num_frags =
 964                visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 965                                              MAX_PHYS_INFO,
 966                                              cmdrsp->net.xmt.frags);
 967        if (cmdrsp->net.xmt.num_frags < 0) {
 968                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 969                devdata->busy_cnt++;
 970                dev_err(&netdev->dev,
 971                        "%s busy - copy frags failed\n", __func__);
 972                kfree_skb(skb);
 973                return NETDEV_TX_OK;
 974        }
 975
 976        err = visorchannel_signalinsert(devdata->dev->visorchannel,
 977                                        IOCHAN_TO_IOPART, cmdrsp);
 978        if (err) {
 979                netif_stop_queue(netdev);
 980                spin_unlock_irqrestore(&devdata->priv_lock, flags);
 981                devdata->busy_cnt++;
 982                dev_dbg(&netdev->dev,
 983                        "%s busy - signalinsert failed\n", __func__);
 984                kfree_skb(skb);
 985                return NETDEV_TX_OK;
 986        }
 987
 988        /* Track the skbs that have been sent to the IOVM for XMIT */
 989        skb_queue_head(&devdata->xmitbufhead, skb);
 990
 991        /* update xmt stats */
 992        devdata->net_stats.tx_packets++;
 993        devdata->net_stats.tx_bytes += skb->len;
 994        devdata->chstat.sent_xmit++;
 995
 996        /* check if we have hit the high watermark for netif_stop_queue() */
 997        if (vnic_hit_high_watermark(devdata,
 998                                    devdata->upper_threshold_net_xmits)) {
 999                /* extra NET_XMITs queued over to IOVM - need to wait */
1000                /* stop queue - call netif_wake_queue() after lower threshold */
1001                netif_stop_queue(netdev);
1002                dev_dbg(&netdev->dev,
1003                        "%s busy - invoking iovm flow control\n",
1004                        __func__);
1005                devdata->flow_control_upper_hits++;
1006        }
1007        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1008
1009        /* skb will be freed when we get back NET_XMIT_DONE */
1010        return NETDEV_TX_OK;
1011}
1012
1013/* visornic_get_stats - returns net_stats of the visornic device
1014 * @netdev: netdevice.
1015 *
1016 * Return: Pointer to the net_device_stats struct for the device.
1017 */
1018static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1019{
1020        struct visornic_devdata *devdata = netdev_priv(netdev);
1021
1022        return &devdata->net_stats;
1023}
1024
1025/* visornic_change_mtu - changes mtu of device
1026 * @netdev: netdevice.
1027 * @new_mtu: Value of new mtu.
1028 *
1029 * The device's MTU cannot be changed by system; it must be changed via a
1030 * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1031 * for everything to work. Currently not supported.
1032 *
1033 * Return: -EINVAL.
1034 */
1035static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1036{
1037        return -EINVAL;
1038}
1039
1040/* visornic_set_multi - set visornic device flags
1041 * @netdev: netdevice.
1042 *
1043 * The only flag we currently support is IFF_PROMISC.
1044 */
1045static void visornic_set_multi(struct net_device *netdev)
1046{
1047        struct uiscmdrsp *cmdrsp;
1048        struct visornic_devdata *devdata = netdev_priv(netdev);
1049        int err = 0;
1050
1051        if (devdata->old_flags == netdev->flags)
1052                return;
1053
1054        if ((netdev->flags & IFF_PROMISC) ==
1055            (devdata->old_flags & IFF_PROMISC))
1056                goto out_save_flags;
1057
1058        cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1059        if (!cmdrsp)
1060                return;
1061        cmdrsp->cmdtype = CMD_NET_TYPE;
1062        cmdrsp->net.type = NET_RCV_PROMISC;
1063        cmdrsp->net.enbdis.context = netdev;
1064        cmdrsp->net.enbdis.enable =
1065                netdev->flags & IFF_PROMISC;
1066        err = visorchannel_signalinsert(devdata->dev->visorchannel,
1067                                        IOCHAN_TO_IOPART,
1068                                        cmdrsp);
1069        kfree(cmdrsp);
1070        if (err)
1071                return;
1072
1073out_save_flags:
1074        devdata->old_flags = netdev->flags;
1075}
1076
1077/* visornic_xmit_timeout - request to timeout the xmit
1078 * @netdev: netdevice.
1079 *
1080 * Queue the work and return. Make sure we have not already been informed that
1081 * the IO Partition is gone; if so, we will have already timed-out the xmits.
1082 */
1083static void visornic_xmit_timeout(struct net_device *netdev)
1084{
1085        struct visornic_devdata *devdata = netdev_priv(netdev);
1086        unsigned long flags;
1087
1088        spin_lock_irqsave(&devdata->priv_lock, flags);
1089        if (devdata->going_away) {
1090                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1091                dev_dbg(&devdata->dev->device,
1092                        "%s aborting because device removal pending\n",
1093                        __func__);
1094                return;
1095        }
1096
1097        /* Ensure that a ServerDown message hasn't been received */
1098        if (!devdata->enabled ||
1099            (devdata->server_down && !devdata->server_change_state)) {
1100                dev_dbg(&netdev->dev, "%s no processing\n",
1101                        __func__);
1102                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1103                return;
1104        }
1105        schedule_work(&devdata->timeout_reset);
1106        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1107}
1108
1109/* repost_return - repost rcv bufs that have come back
1110 * @cmdrsp: IO channel command struct to post.
1111 * @devdata: Visornic devdata for the device.
1112 * @skb: Socket buffer.
1113 * @netdev: netdevice.
1114 *
1115 * Repost rcv buffers that have been returned to us when we are finished
1116 * with them.
1117 *
1118 * Return: 0 for success, negative integer on error.
1119 */
1120static int repost_return(struct uiscmdrsp *cmdrsp,
1121                         struct visornic_devdata *devdata,
1122                         struct sk_buff *skb, struct net_device *netdev)
1123{
1124        struct net_pkt_rcv copy;
1125        int i = 0, cc, numreposted;
1126        int found_skb = 0;
1127        int status = 0;
1128
1129        copy = cmdrsp->net.rcv;
1130        switch (copy.numrcvbufs) {
1131        case 0:
1132                devdata->n_rcv0++;
1133                break;
1134        case 1:
1135                devdata->n_rcv1++;
1136                break;
1137        case 2:
1138                devdata->n_rcv2++;
1139                break;
1140        default:
1141                devdata->n_rcvx++;
1142                break;
1143        }
1144        for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1145                for (i = 0; i < devdata->num_rcv_bufs; i++) {
1146                        if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1147                                continue;
1148
1149                        if ((skb) && devdata->rcvbuf[i] == skb) {
1150                                devdata->found_repost_rcvbuf_cnt++;
1151                                found_skb = 1;
1152                                devdata->repost_found_skb_cnt++;
1153                        }
1154                        devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1155                        if (!devdata->rcvbuf[i]) {
1156                                devdata->num_rcv_bufs_could_not_alloc++;
1157                                devdata->alloc_failed_in_repost_rtn_cnt++;
1158                                status = -ENOMEM;
1159                                break;
1160                        }
1161                        status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1162                        if (status) {
1163                                kfree_skb(devdata->rcvbuf[i]);
1164                                devdata->rcvbuf[i] = NULL;
1165                                break;
1166                        }
1167                        numreposted++;
1168                        break;
1169                }
1170        }
1171        if (numreposted != copy.numrcvbufs) {
1172                devdata->n_repost_deficit++;
1173                status = -EINVAL;
1174        }
1175        if (skb) {
1176                if (found_skb) {
1177                        kfree_skb(skb);
1178                } else {
1179                        status = -EINVAL;
1180                        devdata->bad_rcv_buf++;
1181                }
1182        }
1183        return status;
1184}
1185
1186/* visornic_rx - handle receive packets coming back from IO Partition
1187 * @cmdrsp: Receive packet returned from IO Partition.
1188 *
1189 * Got a receive packet back from the IO Partition; handle it and send it up
1190 * the stack.
1191
1192 * Return: 1 iff an skb was received, otherwise 0.
1193 */
1194static int visornic_rx(struct uiscmdrsp *cmdrsp)
1195{
1196        struct visornic_devdata *devdata;
1197        struct sk_buff *skb, *prev, *curr;
1198        struct net_device *netdev;
1199        int cc, currsize, off;
1200        struct ethhdr *eth;
1201        unsigned long flags;
1202
1203        /* post new rcv buf to the other end using the cmdrsp we have at hand
1204         * post it without holding lock - but we'll use the signal lock to
1205         * synchronize the queue insert the cmdrsp that contains the net.rcv
1206         * is the one we are using to repost, so copy the info we need from it.
1207         */
1208        skb = cmdrsp->net.buf;
1209        netdev = skb->dev;
1210
1211        devdata = netdev_priv(netdev);
1212
1213        spin_lock_irqsave(&devdata->priv_lock, flags);
1214        atomic_dec(&devdata->num_rcvbuf_in_iovm);
1215
1216        /* set length to how much was ACTUALLY received -
1217         * NOTE: rcv_done_len includes actual length of data rcvd
1218         * including ethhdr
1219         */
1220        skb->len = cmdrsp->net.rcv.rcv_done_len;
1221
1222        /* update rcv stats - call it with priv_lock held */
1223        devdata->net_stats.rx_packets++;
1224        devdata->net_stats.rx_bytes += skb->len;
1225
1226        /* test enabled while holding lock */
1227        if (!(devdata->enabled && devdata->enab_dis_acked)) {
1228                /* don't process it unless we're in enable mode and until
1229                 * we've gotten an ACK saying the other end got our RCV enable
1230                 */
1231                spin_unlock_irqrestore(&devdata->priv_lock, flags);
1232                repost_return(cmdrsp, devdata, skb, netdev);
1233                return 0;
1234        }
1235
1236        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1237
1238        /* when skb was allocated, skb->dev, skb->data, skb->len and
1239         * skb->data_len were setup. AND, data has already put into the
1240         * skb (both first frag and in frags pages)
1241         * NOTE: firstfragslen is the amount of data in skb->data and that
1242         * which is not in nr_frags or frag_list. This is now simply
1243         * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1244         * firstfrag & set data_len to show rest see if we have to chain
1245         * frag_list.
1246         */
1247        /* do PRECAUTIONARY check */
1248        if (skb->len > RCVPOST_BUF_SIZE) {
1249                if (cmdrsp->net.rcv.numrcvbufs < 2) {
1250                        if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1251                                dev_err(&devdata->netdev->dev,
1252                                        "repost_return failed");
1253                        return 0;
1254                }
1255                /* length rcvd is greater than firstfrag in this skb rcv buf  */
1256                /* amount in skb->data */
1257                skb->tail += RCVPOST_BUF_SIZE;
1258                /* amount that will be in frag_list */
1259                skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1260        } else {
1261                /* data fits in this skb - no chaining - do
1262                 * PRECAUTIONARY check
1263                 */
1264                /* should be 1 */
1265                if (cmdrsp->net.rcv.numrcvbufs != 1) {
1266                        if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1267                                dev_err(&devdata->netdev->dev,
1268                                        "repost_return failed");
1269                        return 0;
1270                }
1271                skb->tail += skb->len;
1272                /* nothing rcvd in frag_list */
1273                skb->data_len = 0;
1274        }
1275        off = skb_tail_pointer(skb) - skb->data;
1276
1277        /* amount we bumped tail by in the head skb
1278         * it is used to calculate the size of each chained skb below
1279         * it is also used to index into bufline to continue the copy
1280         * (for chansocktwopc)
1281         * if necessary chain the rcv skbs together.
1282         * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1283         * chain the rest to that one.
1284         * - do PRECAUTIONARY check
1285         */
1286        if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1287                if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1288                        dev_err(&devdata->netdev->dev, "repost_return failed");
1289                return 0;
1290        }
1291
1292        if (cmdrsp->net.rcv.numrcvbufs > 1) {
1293                /* chain the various rcv buffers into the skb's frag_list. */
1294                /* Note: off was initialized above  */
1295                for (cc = 1, prev = NULL;
1296                     cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1297                        curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1298                        curr->next = NULL;
1299                        /* start of list- set head */
1300                        if (!prev)
1301                                skb_shinfo(skb)->frag_list = curr;
1302                        else
1303                                prev->next = curr;
1304                        prev = curr;
1305
1306                        /* should we set skb->len and skb->data_len for each
1307                         * buffer being chained??? can't hurt!
1308                         */
1309                        currsize = min(skb->len - off,
1310                                       (unsigned int)RCVPOST_BUF_SIZE);
1311                        curr->len = currsize;
1312                        curr->tail += currsize;
1313                        curr->data_len = 0;
1314                        off += currsize;
1315                }
1316                /* assert skb->len == off */
1317                if (skb->len != off) {
1318                        netdev_err(devdata->netdev,
1319                                   "something wrong; skb->len:%d != off:%d\n",
1320                                   skb->len, off);
1321                }
1322        }
1323
1324        /* set up packet's protocol type using ethernet header - this
1325         * sets up skb->pkt_type & it also PULLS out the eth header
1326         */
1327        skb->protocol = eth_type_trans(skb, netdev);
1328        eth = eth_hdr(skb);
1329        skb->csum = 0;
1330        skb->ip_summed = CHECKSUM_NONE;
1331
1332        do {
1333                /* accept all packets */
1334                if (netdev->flags & IFF_PROMISC)
1335                        break;
1336                if (skb->pkt_type == PACKET_BROADCAST) {
1337                        /* accept all broadcast packets */
1338                        if (netdev->flags & IFF_BROADCAST)
1339                                break;
1340                } else if (skb->pkt_type == PACKET_MULTICAST) {
1341                        if ((netdev->flags & IFF_MULTICAST) &&
1342                            (netdev_mc_count(netdev))) {
1343                                struct netdev_hw_addr *ha;
1344                                int found_mc = 0;
1345
1346                                /* only accept multicast packets that we can
1347                                 * find in our multicast address list
1348                                 */
1349                                netdev_for_each_mc_addr(ha, netdev) {
1350                                        if (ether_addr_equal(eth->h_dest,
1351                                                             ha->addr)) {
1352                                                found_mc = 1;
1353                                                break;
1354                                        }
1355                                }
1356                                /* accept pkt, dest matches a multicast addr */
1357                                if (found_mc)
1358                                        break;
1359                        }
1360                /* accept packet, h_dest must match vnic  mac address */
1361                } else if (skb->pkt_type == PACKET_HOST) {
1362                        break;
1363                } else if (skb->pkt_type == PACKET_OTHERHOST) {
1364                        /* something is not right */
1365                        dev_err(&devdata->netdev->dev,
1366                                "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1367                                netdev->name, eth->h_dest, netdev->dev_addr);
1368                }
1369                /* drop packet - don't forward it up to OS */
1370                devdata->n_rcv_packets_not_accepted++;
1371                repost_return(cmdrsp, devdata, skb, netdev);
1372                return 0;
1373        } while (0);
1374
1375        netif_receive_skb(skb);
1376        /* netif_rx returns various values, but "in practice most drivers
1377         * ignore the return value
1378         */
1379
1380        skb = NULL;
1381        /* whether the packet got dropped or handled, the skb is freed by
1382         * kernel code, so we shouldn't free it. but we should repost a
1383         * new rcv buffer.
1384         */
1385        repost_return(cmdrsp, devdata, skb, netdev);
1386        return 1;
1387}
1388
1389/* devdata_initialize - initialize devdata structure
1390 * @devdata: visornic_devdata structure to initialize.
1391 * @dev:     visorbus_device it belongs to.
1392 *
1393 * Setup initial values for the visornic, based on channel and default values.
1394 *
1395 * Return: A pointer to the devdata structure.
1396 */
1397static struct visornic_devdata *devdata_initialize(
1398                                        struct visornic_devdata *devdata,
1399                                        struct visor_device *dev)
1400{
1401        devdata->dev = dev;
1402        devdata->incarnation_id = get_jiffies_64();
1403        return devdata;
1404}
1405
1406/* devdata_release - free up references in devdata
1407 * @devdata: Struct to clean up.
1408 */
1409static void devdata_release(struct visornic_devdata *devdata)
1410{
1411        kfree(devdata->rcvbuf);
1412        kfree(devdata->cmdrsp_rcv);
1413        kfree(devdata->xmit_cmdrsp);
1414}
1415
1416static const struct net_device_ops visornic_dev_ops = {
1417        .ndo_open = visornic_open,
1418        .ndo_stop = visornic_close,
1419        .ndo_start_xmit = visornic_xmit,
1420        .ndo_get_stats = visornic_get_stats,
1421        .ndo_change_mtu = visornic_change_mtu,
1422        .ndo_tx_timeout = visornic_xmit_timeout,
1423        .ndo_set_rx_mode = visornic_set_multi,
1424};
1425
1426/* DebugFS code */
1427static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1428                                 size_t len, loff_t *offset)
1429{
1430        ssize_t bytes_read = 0;
1431        int str_pos = 0;
1432        struct visornic_devdata *devdata;
1433        struct net_device *dev;
1434        char *vbuf;
1435
1436        if (len > MAX_BUF)
1437                len = MAX_BUF;
1438        vbuf = kzalloc(len, GFP_KERNEL);
1439        if (!vbuf)
1440                return -ENOMEM;
1441
1442        /* for each vnic channel dump out channel specific data */
1443        rcu_read_lock();
1444        for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1445                /* Only consider netdevs that are visornic, and are open */
1446                if (dev->netdev_ops != &visornic_dev_ops ||
1447                    (!netif_queue_stopped(dev)))
1448                        continue;
1449
1450                devdata = netdev_priv(dev);
1451                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1452                                     "netdev = %s (0x%p), MAC Addr %pM\n",
1453                                     dev->name,
1454                                     dev,
1455                                     dev->dev_addr);
1456                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457                                     "VisorNic Dev Info = 0x%p\n", devdata);
1458                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1459                                     " num_rcv_bufs = %d\n",
1460                                     devdata->num_rcv_bufs);
1461                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462                                     " max_outstanding_next_xmits = %lu\n",
1463                                    devdata->max_outstanding_net_xmits);
1464                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1465                                     " upper_threshold_net_xmits = %lu\n",
1466                                     devdata->upper_threshold_net_xmits);
1467                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1468                                     " lower_threshold_net_xmits = %lu\n",
1469                                     devdata->lower_threshold_net_xmits);
1470                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1471                                     " queuefullmsg_logged = %d\n",
1472                                     devdata->queuefullmsg_logged);
1473                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1474                                     " chstat.got_rcv = %lu\n",
1475                                     devdata->chstat.got_rcv);
1476                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1477                                     " chstat.got_enbdisack = %lu\n",
1478                                     devdata->chstat.got_enbdisack);
1479                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1480                                     " chstat.got_xmit_done = %lu\n",
1481                                     devdata->chstat.got_xmit_done);
1482                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1483                                     " chstat.xmit_fail = %lu\n",
1484                                     devdata->chstat.xmit_fail);
1485                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1486                                     " chstat.sent_enbdis = %lu\n",
1487                                     devdata->chstat.sent_enbdis);
1488                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1489                                     " chstat.sent_promisc = %lu\n",
1490                                     devdata->chstat.sent_promisc);
1491                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1492                                     " chstat.sent_post = %lu\n",
1493                                     devdata->chstat.sent_post);
1494                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1495                                     " chstat.sent_post_failed = %lu\n",
1496                                     devdata->chstat.sent_post_failed);
1497                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1498                                     " chstat.sent_xmit = %lu\n",
1499                                     devdata->chstat.sent_xmit);
1500                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1501                                     " chstat.reject_count = %lu\n",
1502                                     devdata->chstat.reject_count);
1503                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1504                                     " chstat.extra_rcvbufs_sent = %lu\n",
1505                                     devdata->chstat.extra_rcvbufs_sent);
1506                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507                                     " n_rcv0 = %lu\n", devdata->n_rcv0);
1508                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509                                     " n_rcv1 = %lu\n", devdata->n_rcv1);
1510                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511                                     " n_rcv2 = %lu\n", devdata->n_rcv2);
1512                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513                                     " n_rcvx = %lu\n", devdata->n_rcvx);
1514                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1515                                     " num_rcvbuf_in_iovm = %d\n",
1516                                     atomic_read(&devdata->num_rcvbuf_in_iovm));
1517                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1518                                     " alloc_failed_in_if_needed_cnt = %lu\n",
1519                                     devdata->alloc_failed_in_if_needed_cnt);
1520                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1521                                     " alloc_failed_in_repost_rtn_cnt = %lu\n",
1522                                     devdata->alloc_failed_in_repost_rtn_cnt);
1523                /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524                 *                   " inner_loop_limit_reached_cnt = %lu\n",
1525                 *                   devdata->inner_loop_limit_reached_cnt);
1526                 */
1527                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1528                                     " found_repost_rcvbuf_cnt = %lu\n",
1529                                     devdata->found_repost_rcvbuf_cnt);
1530                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1531                                     " repost_found_skb_cnt = %lu\n",
1532                                     devdata->repost_found_skb_cnt);
1533                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1534                                     " n_repost_deficit = %lu\n",
1535                                     devdata->n_repost_deficit);
1536                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1537                                     " bad_rcv_buf = %lu\n",
1538                                     devdata->bad_rcv_buf);
1539                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1540                                     " n_rcv_packets_not_accepted = %lu\n",
1541                                     devdata->n_rcv_packets_not_accepted);
1542                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1543                                     " interrupts_rcvd = %llu\n",
1544                                     devdata->interrupts_rcvd);
1545                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1546                                     " interrupts_notme = %llu\n",
1547                                     devdata->interrupts_notme);
1548                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1549                                     " interrupts_disabled = %llu\n",
1550                                     devdata->interrupts_disabled);
1551                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1552                                     " busy_cnt = %llu\n",
1553                                     devdata->busy_cnt);
1554                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1555                                     " flow_control_upper_hits = %llu\n",
1556                                     devdata->flow_control_upper_hits);
1557                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1558                                     " flow_control_lower_hits = %llu\n",
1559                                     devdata->flow_control_lower_hits);
1560                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561                                     " netif_queue = %s\n",
1562                                     netif_queue_stopped(devdata->netdev) ?
1563                                     "stopped" : "running");
1564                str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1565                                     " xmits_outstanding = %lu\n",
1566                                     devdata_xmits_outstanding(devdata));
1567        }
1568        rcu_read_unlock();
1569        bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1570        kfree(vbuf);
1571        return bytes_read;
1572}
1573
1574static struct dentry *visornic_debugfs_dir;
1575static const struct file_operations debugfs_info_fops = {
1576        .read = info_debugfs_read,
1577};
1578
1579/* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1580 * @devdata: Visornic device.
1581 */
1582static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1583{
1584        int i;
1585        struct net_device *netdev;
1586        struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1587        int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1588        int err;
1589
1590        /* don't do this until vnic is marked ready */
1591        if (!(devdata->enabled && devdata->enab_dis_acked))
1592                return;
1593
1594        netdev = devdata->netdev;
1595        rcv_bufs_allocated = 0;
1596        /* this code is trying to prevent getting stuck here forever,
1597         * but still retry it if you cant allocate them all this time.
1598         */
1599        cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1600        while (cur_num_rcv_bufs_to_alloc > 0) {
1601                cur_num_rcv_bufs_to_alloc--;
1602                for (i = 0; i < devdata->num_rcv_bufs; i++) {
1603                        if (devdata->rcvbuf[i])
1604                                continue;
1605                        devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1606                        if (!devdata->rcvbuf[i]) {
1607                                devdata->alloc_failed_in_if_needed_cnt++;
1608                                break;
1609                        }
1610                        rcv_bufs_allocated++;
1611                        err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1612                        if (err) {
1613                                kfree_skb(devdata->rcvbuf[i]);
1614                                devdata->rcvbuf[i] = NULL;
1615                                break;
1616                        }
1617                        devdata->chstat.extra_rcvbufs_sent++;
1618                }
1619        }
1620        devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1621}
1622
1623/* drain_resp_queue - drains and ignores all messages from the resp queue
1624 * @cmdrsp:  IO channel command response message.
1625 * @devdata: Visornic device to drain.
1626 */
1627static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1628                             struct visornic_devdata *devdata)
1629{
1630        while (!visorchannel_signalremove(devdata->dev->visorchannel,
1631                                          IOCHAN_FROM_IOPART,
1632                                          cmdrsp))
1633                ;
1634}
1635
1636/* service_resp_queue - drain the response queue
1637 * @cmdrsp:  IO channel command response message.
1638 * @devdata: Visornic device to drain.
1639 * @rx_work_done:
1640 * @budget:
1641 *
1642 * Drain the response queue of any responses from the IO Partition. Process the
1643 * responses as we get them.
1644 */
1645static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1646                               struct visornic_devdata *devdata,
1647                               int *rx_work_done, int budget)
1648{
1649        unsigned long flags;
1650        struct net_device *netdev;
1651
1652        while (*rx_work_done < budget) {
1653                /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1654                 * moment
1655                 */
1656                /* queue empty */
1657                if (visorchannel_signalremove(devdata->dev->visorchannel,
1658                                              IOCHAN_FROM_IOPART,
1659                                              cmdrsp))
1660                        break;
1661
1662                switch (cmdrsp->net.type) {
1663                case NET_RCV:
1664                        devdata->chstat.got_rcv++;
1665                        /* process incoming packet */
1666                        *rx_work_done += visornic_rx(cmdrsp);
1667                        break;
1668                case NET_XMIT_DONE:
1669                        spin_lock_irqsave(&devdata->priv_lock, flags);
1670                        devdata->chstat.got_xmit_done++;
1671                        if (cmdrsp->net.xmtdone.xmt_done_result)
1672                                devdata->chstat.xmit_fail++;
1673                        /* only call queue wake if we stopped it */
1674                        netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1675                        /* ASSERT netdev == vnicinfo->netdev; */
1676                        if (netdev == devdata->netdev &&
1677                            netif_queue_stopped(netdev)) {
1678                                /* check if we have crossed the lower watermark
1679                                 * for netif_wake_queue()
1680                                 */
1681                                if (vnic_hit_low_watermark
1682                                    (devdata,
1683                                     devdata->lower_threshold_net_xmits)) {
1684                                        /* enough NET_XMITs completed
1685                                         * so can restart netif queue
1686                                         */
1687                                        netif_wake_queue(netdev);
1688                                        devdata->flow_control_lower_hits++;
1689                                }
1690                        }
1691                        skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1692                        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1693                        kfree_skb(cmdrsp->net.buf);
1694                        break;
1695                case NET_RCV_ENBDIS_ACK:
1696                        devdata->chstat.got_enbdisack++;
1697                        netdev = (struct net_device *)
1698                        cmdrsp->net.enbdis.context;
1699                        spin_lock_irqsave(&devdata->priv_lock, flags);
1700                        devdata->enab_dis_acked = 1;
1701                        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1702
1703                        if (devdata->server_down &&
1704                            devdata->server_change_state) {
1705                                /* Inform Linux that the link is up */
1706                                devdata->server_down = false;
1707                                devdata->server_change_state = false;
1708                                netif_wake_queue(netdev);
1709                                netif_carrier_on(netdev);
1710                        }
1711                        break;
1712                case NET_CONNECT_STATUS:
1713                        netdev = devdata->netdev;
1714                        if (cmdrsp->net.enbdis.enable == 1) {
1715                                spin_lock_irqsave(&devdata->priv_lock, flags);
1716                                devdata->enabled = cmdrsp->net.enbdis.enable;
1717                                spin_unlock_irqrestore(&devdata->priv_lock,
1718                                                       flags);
1719                                netif_wake_queue(netdev);
1720                                netif_carrier_on(netdev);
1721                        } else {
1722                                netif_stop_queue(netdev);
1723                                netif_carrier_off(netdev);
1724                                spin_lock_irqsave(&devdata->priv_lock, flags);
1725                                devdata->enabled = cmdrsp->net.enbdis.enable;
1726                                spin_unlock_irqrestore(&devdata->priv_lock,
1727                                                       flags);
1728                        }
1729                        break;
1730                default:
1731                        break;
1732                }
1733                /* cmdrsp is now available for reuse  */
1734        }
1735}
1736
1737static int visornic_poll(struct napi_struct *napi, int budget)
1738{
1739        struct visornic_devdata *devdata = container_of(napi,
1740                                                        struct visornic_devdata,
1741                                                        napi);
1742        int rx_count = 0;
1743
1744        send_rcv_posts_if_needed(devdata);
1745        service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1746
1747        /* If there aren't any more packets to receive stop the poll */
1748        if (rx_count < budget)
1749                napi_complete_done(napi, rx_count);
1750
1751        return rx_count;
1752}
1753
1754/* poll_for_irq - checks the status of the response queue
1755 * @v: Void pointer to the visronic devdata struct.
1756 *
1757 * Main function of the vnic_incoming thread. Periodically check the response
1758 * queue and drain it if needed.
1759 */
1760static void poll_for_irq(struct timer_list *t)
1761{
1762        struct visornic_devdata *devdata = from_timer(devdata, t,
1763                                                      irq_poll_timer);
1764
1765        if (!visorchannel_signalempty(
1766                                   devdata->dev->visorchannel,
1767                                   IOCHAN_FROM_IOPART))
1768                napi_schedule(&devdata->napi);
1769
1770        atomic_set(&devdata->interrupt_rcvd, 0);
1771
1772        mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1773}
1774
1775/* visornic_probe - probe function for visornic devices
1776 * @dev: The visor device discovered.
1777 *
1778 * Called when visorbus discovers a visornic device on its bus. It creates a new
1779 * visornic ethernet adapter.
1780 *
1781 * Return: 0 on success, or negative integer on error.
1782 */
1783static int visornic_probe(struct visor_device *dev)
1784{
1785        struct visornic_devdata *devdata = NULL;
1786        struct net_device *netdev = NULL;
1787        int err;
1788        int channel_offset = 0;
1789        u64 features;
1790
1791        netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1792        if (!netdev) {
1793                dev_err(&dev->device,
1794                        "%s alloc_etherdev failed\n", __func__);
1795                return -ENOMEM;
1796        }
1797
1798        netdev->netdev_ops = &visornic_dev_ops;
1799        netdev->watchdog_timeo = 5 * HZ;
1800        SET_NETDEV_DEV(netdev, &dev->device);
1801
1802        /* Get MAC address from channel and read it into the device. */
1803        netdev->addr_len = ETH_ALEN;
1804        channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1805        err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1806                                    ETH_ALEN);
1807        if (err < 0) {
1808                dev_err(&dev->device,
1809                        "%s failed to get mac addr from chan (%d)\n",
1810                        __func__, err);
1811                goto cleanup_netdev;
1812        }
1813
1814        devdata = devdata_initialize(netdev_priv(netdev), dev);
1815        if (!devdata) {
1816                dev_err(&dev->device,
1817                        "%s devdata_initialize failed\n", __func__);
1818                err = -ENOMEM;
1819                goto cleanup_netdev;
1820        }
1821        /* don't trust messages laying around in the channel */
1822        drain_resp_queue(devdata->cmdrsp, devdata);
1823
1824        devdata->netdev = netdev;
1825        dev_set_drvdata(&dev->device, devdata);
1826        init_waitqueue_head(&devdata->rsp_queue);
1827        spin_lock_init(&devdata->priv_lock);
1828        /* not yet */
1829        devdata->enabled = 0;
1830        atomic_set(&devdata->usage, 1);
1831
1832        /* Setup rcv bufs */
1833        channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1834        err = visorbus_read_channel(dev, channel_offset,
1835                                    &devdata->num_rcv_bufs, 4);
1836        if (err) {
1837                dev_err(&dev->device,
1838                        "%s failed to get #rcv bufs from chan (%d)\n",
1839                        __func__, err);
1840                goto cleanup_netdev;
1841        }
1842
1843        devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1844                                  sizeof(struct sk_buff *), GFP_KERNEL);
1845        if (!devdata->rcvbuf) {
1846                err = -ENOMEM;
1847                goto cleanup_netdev;
1848        }
1849
1850        /* set the net_xmit outstanding threshold
1851         * always leave two slots open but you should have 3 at a minimum
1852         * note that max_outstanding_net_xmits must be > 0
1853         */
1854        devdata->max_outstanding_net_xmits =
1855                max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1856        devdata->upper_threshold_net_xmits =
1857                max_t(unsigned long,
1858                      2, (devdata->max_outstanding_net_xmits - 1));
1859        devdata->lower_threshold_net_xmits =
1860                max_t(unsigned long,
1861                      1, (devdata->max_outstanding_net_xmits / 2));
1862
1863        skb_queue_head_init(&devdata->xmitbufhead);
1864
1865        /* create a cmdrsp we can use to post and unpost rcv buffers */
1866        devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1867        if (!devdata->cmdrsp_rcv) {
1868                err = -ENOMEM;
1869                goto cleanup_rcvbuf;
1870        }
1871        devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1872        if (!devdata->xmit_cmdrsp) {
1873                err = -ENOMEM;
1874                goto cleanup_cmdrsp_rcv;
1875        }
1876        INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1877        devdata->server_down = false;
1878        devdata->server_change_state = false;
1879
1880        /*set the default mtu */
1881        channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1882        err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1883        if (err) {
1884                dev_err(&dev->device,
1885                        "%s failed to get mtu from chan (%d)\n",
1886                        __func__, err);
1887                goto cleanup_xmit_cmdrsp;
1888        }
1889
1890        /* TODO: Setup Interrupt information */
1891        /* Let's start our threads to get responses */
1892        netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1893
1894        timer_setup(&devdata->irq_poll_timer, poll_for_irq, 0);
1895        /* Note: This time has to start running before the while
1896         * loop below because the napi routine is responsible for
1897         * setting enab_dis_acked
1898         */
1899        mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1900
1901        channel_offset = offsetof(struct visor_io_channel,
1902                                  channel_header.features);
1903        err = visorbus_read_channel(dev, channel_offset, &features, 8);
1904        if (err) {
1905                dev_err(&dev->device,
1906                        "%s failed to get features from chan (%d)\n",
1907                        __func__, err);
1908                goto cleanup_napi_add;
1909        }
1910
1911        features |= VISOR_CHANNEL_IS_POLLING;
1912        features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1913        err = visorbus_write_channel(dev, channel_offset, &features, 8);
1914        if (err) {
1915                dev_err(&dev->device,
1916                        "%s failed to set features in chan (%d)\n",
1917                        __func__, err);
1918                goto cleanup_napi_add;
1919        }
1920
1921        /* Note: Interrupts have to be enable before the while
1922         * loop below because the napi routine is responsible for
1923         * setting enab_dis_acked
1924         */
1925        visorbus_enable_channel_interrupts(dev);
1926
1927        err = register_netdev(netdev);
1928        if (err) {
1929                dev_err(&dev->device,
1930                        "%s register_netdev failed (%d)\n", __func__, err);
1931                goto cleanup_napi_add;
1932        }
1933
1934        /* create debug/sysfs directories */
1935        devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1936                                                      visornic_debugfs_dir);
1937        if (!devdata->eth_debugfs_dir) {
1938                dev_err(&dev->device,
1939                        "%s debugfs_create_dir %s failed\n",
1940                        __func__, netdev->name);
1941                err = -ENOMEM;
1942                goto cleanup_register_netdev;
1943        }
1944
1945        dev_info(&dev->device, "%s success netdev=%s\n",
1946                 __func__, netdev->name);
1947        return 0;
1948
1949cleanup_register_netdev:
1950        unregister_netdev(netdev);
1951
1952cleanup_napi_add:
1953        del_timer_sync(&devdata->irq_poll_timer);
1954        netif_napi_del(&devdata->napi);
1955
1956cleanup_xmit_cmdrsp:
1957        kfree(devdata->xmit_cmdrsp);
1958
1959cleanup_cmdrsp_rcv:
1960        kfree(devdata->cmdrsp_rcv);
1961
1962cleanup_rcvbuf:
1963        kfree(devdata->rcvbuf);
1964
1965cleanup_netdev:
1966        free_netdev(netdev);
1967        return err;
1968}
1969
1970/* host_side_disappeared - IO Partition is gone
1971 * @devdata: Device object.
1972 *
1973 * IO partition servicing this device is gone; do cleanup.
1974 */
1975static void host_side_disappeared(struct visornic_devdata *devdata)
1976{
1977        unsigned long flags;
1978
1979        spin_lock_irqsave(&devdata->priv_lock, flags);
1980        /* indicate device destroyed */
1981        devdata->dev = NULL;
1982        spin_unlock_irqrestore(&devdata->priv_lock, flags);
1983}
1984
1985/* visornic_remove - called when visornic dev goes away
1986 * @dev: Visornic device that is being removed.
1987 *
1988 * Called when DEVICE_DESTROY gets called to remove device.
1989 */
1990static void visornic_remove(struct visor_device *dev)
1991{
1992        struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1993        struct net_device *netdev;
1994        unsigned long flags;
1995
1996        if (!devdata) {
1997                dev_err(&dev->device, "%s no devdata\n", __func__);
1998                return;
1999        }
2000        spin_lock_irqsave(&devdata->priv_lock, flags);
2001        if (devdata->going_away) {
2002                spin_unlock_irqrestore(&devdata->priv_lock, flags);
2003                dev_err(&dev->device, "%s already being removed\n", __func__);
2004                return;
2005        }
2006        devdata->going_away = true;
2007        spin_unlock_irqrestore(&devdata->priv_lock, flags);
2008        netdev = devdata->netdev;
2009        if (!netdev) {
2010                dev_err(&dev->device, "%s not net device\n", __func__);
2011                return;
2012        }
2013
2014        /* going_away prevents new items being added to the workqueues */
2015        cancel_work_sync(&devdata->timeout_reset);
2016
2017        debugfs_remove_recursive(devdata->eth_debugfs_dir);
2018        /* this will call visornic_close() */
2019        unregister_netdev(netdev);
2020
2021        del_timer_sync(&devdata->irq_poll_timer);
2022        netif_napi_del(&devdata->napi);
2023
2024        dev_set_drvdata(&dev->device, NULL);
2025        host_side_disappeared(devdata);
2026        devdata_release(devdata);
2027        free_netdev(netdev);
2028}
2029
2030/* visornic_pause - called when IO Part disappears
2031 * @dev:           Visornic device that is being serviced.
2032 * @complete_func: Call when finished.
2033 *
2034 * Called when the IO Partition has gone down. Need to free up resources and
2035 * wait for IO partition to come back. Mark link as down and don't attempt any
2036 * DMA. When we have freed memory, call the complete_func so that Command knows
2037 * we are done. If we don't call complete_func, the IO Partition will never
2038 * come back.
2039 *
2040 * Return: 0 on success.
2041 */
2042static int visornic_pause(struct visor_device *dev,
2043                          visorbus_state_complete_func complete_func)
2044{
2045        struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2046
2047        visornic_serverdown(devdata, complete_func);
2048        return 0;
2049}
2050
2051/* visornic_resume - called when IO Partition has recovered
2052 * @dev:           Visornic device that is being serviced.
2053 * @compelte_func: Call when finished.
2054 *
2055 * Called when the IO partition has recovered. Re-establish connection to the IO
2056 * Partition and set the link up. Okay to do DMA again.
2057 *
2058 * Returns 0 for success, negative integer on error.
2059 */
2060static int visornic_resume(struct visor_device *dev,
2061                           visorbus_state_complete_func complete_func)
2062{
2063        struct visornic_devdata *devdata;
2064        struct net_device *netdev;
2065        unsigned long flags;
2066
2067        devdata = dev_get_drvdata(&dev->device);
2068        if (!devdata) {
2069                dev_err(&dev->device, "%s no devdata\n", __func__);
2070                return -EINVAL;
2071        }
2072
2073        netdev = devdata->netdev;
2074
2075        spin_lock_irqsave(&devdata->priv_lock, flags);
2076        if (devdata->server_change_state) {
2077                spin_unlock_irqrestore(&devdata->priv_lock, flags);
2078                dev_err(&dev->device, "%s server already changing state\n",
2079                        __func__);
2080                return -EINVAL;
2081        }
2082        if (!devdata->server_down) {
2083                spin_unlock_irqrestore(&devdata->priv_lock, flags);
2084                dev_err(&dev->device, "%s server not down\n", __func__);
2085                complete_func(dev, 0);
2086                return 0;
2087        }
2088        devdata->server_change_state = true;
2089        spin_unlock_irqrestore(&devdata->priv_lock, flags);
2090
2091        /* Must transition channel to ATTACHED state BEFORE
2092         * we can start using the device again.
2093         * TODO: State transitions
2094         */
2095        mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2096
2097        rtnl_lock();
2098        dev_open(netdev);
2099        rtnl_unlock();
2100
2101        complete_func(dev, 0);
2102        return 0;
2103}
2104
2105/* This is used to tell the visorbus driver which types of visor devices
2106 * we support, and what functions to call when a visor device that we support
2107 * is attached or removed.
2108 */
2109static struct visor_driver visornic_driver = {
2110        .name = "visornic",
2111        .owner = THIS_MODULE,
2112        .channel_types = visornic_channel_types,
2113        .probe = visornic_probe,
2114        .remove = visornic_remove,
2115        .pause = visornic_pause,
2116        .resume = visornic_resume,
2117        .channel_interrupt = NULL,
2118};
2119
2120/* visornic_init - init function
2121 *
2122 * Init function for the visornic driver. Do initial driver setup and wait
2123 * for devices.
2124 *
2125 * Return: 0 on success, negative integer on error.
2126 */
2127static int visornic_init(void)
2128{
2129        struct dentry *ret;
2130        int err = -ENOMEM;
2131
2132        visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2133        if (!visornic_debugfs_dir)
2134                return err;
2135
2136        ret = debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2137                                  &debugfs_info_fops);
2138        if (!ret)
2139                goto cleanup_debugfs;
2140        ret = debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir,
2141                                  NULL, &debugfs_enable_ints_fops);
2142        if (!ret)
2143                goto cleanup_debugfs;
2144
2145        err = visorbus_register_visor_driver(&visornic_driver);
2146        if (err)
2147                goto cleanup_debugfs;
2148
2149        return 0;
2150
2151cleanup_debugfs:
2152        debugfs_remove_recursive(visornic_debugfs_dir);
2153        return err;
2154}
2155
2156/* visornic_cleanup - driver exit routine
2157 *
2158 * Unregister driver from the bus and free up memory.
2159 */
2160static void visornic_cleanup(void)
2161{
2162        visorbus_unregister_visor_driver(&visornic_driver);
2163        debugfs_remove_recursive(visornic_debugfs_dir);
2164}
2165
2166module_init(visornic_init);
2167module_exit(visornic_cleanup);
2168
2169MODULE_AUTHOR("Unisys");
2170MODULE_LICENSE("GPL");
2171MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");
2172