linux/drivers/net/ethernet/sfc/siena_sriov.c
<<
>>
Prefs
   1/****************************************************************************
   2 * Driver for Solarflare network controllers and boards
   3 * Copyright 2010-2012 Solarflare Communications Inc.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published
   7 * by the Free Software Foundation, incorporated herein by reference.
   8 */
   9#include <linux/pci.h>
  10#include <linux/module.h>
  11#include "net_driver.h"
  12#include "efx.h"
  13#include "nic.h"
  14#include "io.h"
  15#include "mcdi.h"
  16#include "filter.h"
  17#include "mcdi_pcol.h"
  18#include "farch_regs.h"
  19#include "vfdi.h"
  20
  21/* Number of longs required to track all the VIs in a VF */
  22#define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
  23
  24/* Maximum number of RX queues supported */
  25#define VF_MAX_RX_QUEUES 63
  26
  27/**
  28 * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
  29 * @VF_TX_FILTER_OFF: Disabled
  30 * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only
  31 *      2 TX queues allowed per VF.
  32 * @VF_TX_FILTER_ON: Enabled
  33 */
  34enum efx_vf_tx_filter_mode {
  35        VF_TX_FILTER_OFF,
  36        VF_TX_FILTER_AUTO,
  37        VF_TX_FILTER_ON,
  38};
  39
  40/**
  41 * struct efx_vf - Back-end resource and protocol state for a PCI VF
  42 * @efx: The Efx NIC owning this VF
  43 * @pci_rid: The PCI requester ID for this VF
  44 * @pci_name: The PCI name (formatted address) of this VF
  45 * @index: Index of VF within its port and PF.
  46 * @req: VFDI incoming request work item. Incoming USR_EV events are received
  47 *      by the NAPI handler, but must be handled by executing MCDI requests
  48 *      inside a work item.
  49 * @req_addr: VFDI incoming request DMA address (in VF's PCI address space).
  50 * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member.
  51 * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member.
  52 * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by
  53 *      @status_lock
  54 * @busy: VFDI request queued to be processed or being processed. Receiving
  55 *      a VFDI request when @busy is set is an error condition.
  56 * @buf: Incoming VFDI requests are DMA from the VF into this buffer.
  57 * @buftbl_base: Buffer table entries for this VF start at this index.
  58 * @rx_filtering: Receive filtering has been requested by the VF driver.
  59 * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request.
  60 * @rx_filter_qid: VF relative qid for RX filter requested by VF.
  61 * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported.
  62 * @tx_filter_mode: Transmit MAC filtering mode.
  63 * @tx_filter_id: Transmit MAC filter ID.
  64 * @addr: The MAC address and outer vlan tag of the VF.
  65 * @status_addr: VF DMA address of page for &struct vfdi_status updates.
  66 * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr,
  67 *      @peer_page_addrs and @peer_page_count from simultaneous
  68 *      updates by the VM and consumption by
  69 *      efx_sriov_update_vf_addr()
  70 * @peer_page_addrs: Pointer to an array of guest pages for local addresses.
  71 * @peer_page_count: Number of entries in @peer_page_count.
  72 * @evq0_addrs: Array of guest pages backing evq0.
  73 * @evq0_count: Number of entries in @evq0_addrs.
  74 * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler
  75 *      to wait for flush completions.
  76 * @txq_lock: Mutex for TX queue allocation.
  77 * @txq_mask: Mask of initialized transmit queues.
  78 * @txq_count: Number of initialized transmit queues.
  79 * @rxq_mask: Mask of initialized receive queues.
  80 * @rxq_count: Number of initialized receive queues.
  81 * @rxq_retry_mask: Mask or receive queues that need to be flushed again
  82 *      due to flush failure.
  83 * @rxq_retry_count: Number of receive queues in @rxq_retry_mask.
  84 * @reset_work: Work item to schedule a VF reset.
  85 */
  86struct efx_vf {
  87        struct efx_nic *efx;
  88        unsigned int pci_rid;
  89        char pci_name[13]; /* dddd:bb:dd.f */
  90        unsigned int index;
  91        struct work_struct req;
  92        u64 req_addr;
  93        int req_type;
  94        unsigned req_seqno;
  95        unsigned msg_seqno;
  96        bool busy;
  97        struct efx_buffer buf;
  98        unsigned buftbl_base;
  99        bool rx_filtering;
 100        enum efx_filter_flags rx_filter_flags;
 101        unsigned rx_filter_qid;
 102        int rx_filter_id;
 103        enum efx_vf_tx_filter_mode tx_filter_mode;
 104        int tx_filter_id;
 105        struct vfdi_endpoint addr;
 106        u64 status_addr;
 107        struct mutex status_lock;
 108        u64 *peer_page_addrs;
 109        unsigned peer_page_count;
 110        u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) /
 111                       EFX_BUF_SIZE];
 112        unsigned evq0_count;
 113        wait_queue_head_t flush_waitq;
 114        struct mutex txq_lock;
 115        unsigned long txq_mask[VI_MASK_LENGTH];
 116        unsigned txq_count;
 117        unsigned long rxq_mask[VI_MASK_LENGTH];
 118        unsigned rxq_count;
 119        unsigned long rxq_retry_mask[VI_MASK_LENGTH];
 120        atomic_t rxq_retry_count;
 121        struct work_struct reset_work;
 122};
 123
 124struct efx_memcpy_req {
 125        unsigned int from_rid;
 126        void *from_buf;
 127        u64 from_addr;
 128        unsigned int to_rid;
 129        u64 to_addr;
 130        unsigned length;
 131};
 132
 133/**
 134 * struct efx_local_addr - A MAC address on the vswitch without a VF.
 135 *
 136 * Siena does not have a switch, so VFs can't transmit data to each
 137 * other. Instead the VFs must be made aware of the local addresses
 138 * on the vswitch, so that they can arrange for an alternative
 139 * software datapath to be used.
 140 *
 141 * @link: List head for insertion into efx->local_addr_list.
 142 * @addr: Ethernet address
 143 */
 144struct efx_local_addr {
 145        struct list_head link;
 146        u8 addr[ETH_ALEN];
 147};
 148
 149/**
 150 * struct efx_endpoint_page - Page of vfdi_endpoint structures
 151 *
 152 * @link: List head for insertion into efx->local_page_list.
 153 * @ptr: Pointer to page.
 154 * @addr: DMA address of page.
 155 */
 156struct efx_endpoint_page {
 157        struct list_head link;
 158        void *ptr;
 159        dma_addr_t addr;
 160};
 161
 162/* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */
 163#define EFX_BUFTBL_TXQ_BASE(_vf, _qid)                                  \
 164        ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid))
 165#define EFX_BUFTBL_RXQ_BASE(_vf, _qid)                                  \
 166        (EFX_BUFTBL_TXQ_BASE(_vf, _qid) +                               \
 167         (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
 168#define EFX_BUFTBL_EVQ_BASE(_vf, _qid)                                  \
 169        (EFX_BUFTBL_TXQ_BASE(_vf, _qid) +                               \
 170         (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
 171
 172#define EFX_FIELD_MASK(_field)                  \
 173        ((1 << _field ## _WIDTH) - 1)
 174
 175/* VFs can only use this many transmit channels */
 176static unsigned int vf_max_tx_channels = 2;
 177module_param(vf_max_tx_channels, uint, 0444);
 178MODULE_PARM_DESC(vf_max_tx_channels,
 179                 "Limit the number of TX channels VFs can use");
 180
 181static int max_vfs = -1;
 182module_param(max_vfs, int, 0444);
 183MODULE_PARM_DESC(max_vfs,
 184                 "Reduce the number of VFs initialized by the driver");
 185
 186/* Workqueue used by VFDI communication.  We can't use the global
 187 * workqueue because it may be running the VF driver's probe()
 188 * routine, which will be blocked there waiting for a VFDI response.
 189 */
 190static struct workqueue_struct *vfdi_workqueue;
 191
 192static unsigned abs_index(struct efx_vf *vf, unsigned index)
 193{
 194        return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index;
 195}
 196
 197static int efx_sriov_cmd(struct efx_nic *efx, bool enable,
 198                         unsigned *vi_scale_out, unsigned *vf_total_out)
 199{
 200        MCDI_DECLARE_BUF(inbuf, MC_CMD_SRIOV_IN_LEN);
 201        MCDI_DECLARE_BUF(outbuf, MC_CMD_SRIOV_OUT_LEN);
 202        unsigned vi_scale, vf_total;
 203        size_t outlen;
 204        int rc;
 205
 206        MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0);
 207        MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE);
 208        MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count);
 209
 210        rc = efx_mcdi_rpc(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN,
 211                          outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen);
 212        if (rc)
 213                return rc;
 214        if (outlen < MC_CMD_SRIOV_OUT_LEN)
 215                return -EIO;
 216
 217        vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL);
 218        vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE);
 219        if (vi_scale > EFX_VI_SCALE_MAX)
 220                return -EOPNOTSUPP;
 221
 222        if (vi_scale_out)
 223                *vi_scale_out = vi_scale;
 224        if (vf_total_out)
 225                *vf_total_out = vf_total;
 226
 227        return 0;
 228}
 229
 230static void efx_sriov_usrev(struct efx_nic *efx, bool enabled)
 231{
 232        efx_oword_t reg;
 233
 234        EFX_POPULATE_OWORD_2(reg,
 235                             FRF_CZ_USREV_DIS, enabled ? 0 : 1,
 236                             FRF_CZ_DFLT_EVQ, efx->vfdi_channel->channel);
 237        efx_writeo(efx, &reg, FR_CZ_USR_EV_CFG);
 238}
 239
 240static int efx_sriov_memcpy(struct efx_nic *efx, struct efx_memcpy_req *req,
 241                            unsigned int count)
 242{
 243        MCDI_DECLARE_BUF(inbuf, MCDI_CTL_SDU_LEN_MAX_V1);
 244        MCDI_DECLARE_STRUCT_PTR(record);
 245        unsigned int index, used;
 246        u64 from_addr;
 247        u32 from_rid;
 248        int rc;
 249
 250        mb();   /* Finish writing source/reading dest before DMA starts */
 251
 252        if (WARN_ON(count > MC_CMD_MEMCPY_IN_RECORD_MAXNUM))
 253                return -ENOBUFS;
 254        used = MC_CMD_MEMCPY_IN_LEN(count);
 255
 256        for (index = 0; index < count; index++) {
 257                record = MCDI_ARRAY_STRUCT_PTR(inbuf, MEMCPY_IN_RECORD, index);
 258                MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_NUM_RECORDS,
 259                               count);
 260                MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID,
 261                               req->to_rid);
 262                MCDI_SET_QWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR,
 263                               req->to_addr);
 264                if (req->from_buf == NULL) {
 265                        from_rid = req->from_rid;
 266                        from_addr = req->from_addr;
 267                } else {
 268                        if (WARN_ON(used + req->length >
 269                                    MCDI_CTL_SDU_LEN_MAX_V1)) {
 270                                rc = -ENOBUFS;
 271                                goto out;
 272                        }
 273
 274                        from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE;
 275                        from_addr = used;
 276                        memcpy(_MCDI_PTR(inbuf, used), req->from_buf,
 277                               req->length);
 278                        used += req->length;
 279                }
 280
 281                MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid);
 282                MCDI_SET_QWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR,
 283                               from_addr);
 284                MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH,
 285                               req->length);
 286
 287                ++req;
 288        }
 289
 290        rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL);
 291out:
 292        mb();   /* Don't write source/read dest before DMA is complete */
 293
 294        return rc;
 295}
 296
 297/* The TX filter is entirely controlled by this driver, and is modified
 298 * underneath the feet of the VF
 299 */
 300static void efx_sriov_reset_tx_filter(struct efx_vf *vf)
 301{
 302        struct efx_nic *efx = vf->efx;
 303        struct efx_filter_spec filter;
 304        u16 vlan;
 305        int rc;
 306
 307        if (vf->tx_filter_id != -1) {
 308                efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
 309                                          vf->tx_filter_id);
 310                netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n",
 311                          vf->pci_name, vf->tx_filter_id);
 312                vf->tx_filter_id = -1;
 313        }
 314
 315        if (is_zero_ether_addr(vf->addr.mac_addr))
 316                return;
 317
 318        /* Turn on TX filtering automatically if not explicitly
 319         * enabled or disabled.
 320         */
 321        if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2)
 322                vf->tx_filter_mode = VF_TX_FILTER_ON;
 323
 324        vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
 325        efx_filter_init_tx(&filter, abs_index(vf, 0));
 326        rc = efx_filter_set_eth_local(&filter,
 327                                      vlan ? vlan : EFX_FILTER_VID_UNSPEC,
 328                                      vf->addr.mac_addr);
 329        BUG_ON(rc);
 330
 331        rc = efx_filter_insert_filter(efx, &filter, true);
 332        if (rc < 0) {
 333                netif_warn(efx, hw, efx->net_dev,
 334                           "Unable to migrate tx filter for vf %s\n",
 335                           vf->pci_name);
 336        } else {
 337                netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n",
 338                          vf->pci_name, rc);
 339                vf->tx_filter_id = rc;
 340        }
 341}
 342
 343/* The RX filter is managed here on behalf of the VF driver */
 344static void efx_sriov_reset_rx_filter(struct efx_vf *vf)
 345{
 346        struct efx_nic *efx = vf->efx;
 347        struct efx_filter_spec filter;
 348        u16 vlan;
 349        int rc;
 350
 351        if (vf->rx_filter_id != -1) {
 352                efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
 353                                          vf->rx_filter_id);
 354                netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n",
 355                          vf->pci_name, vf->rx_filter_id);
 356                vf->rx_filter_id = -1;
 357        }
 358
 359        if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr))
 360                return;
 361
 362        vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
 363        efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED,
 364                           vf->rx_filter_flags,
 365                           abs_index(vf, vf->rx_filter_qid));
 366        rc = efx_filter_set_eth_local(&filter,
 367                                      vlan ? vlan : EFX_FILTER_VID_UNSPEC,
 368                                      vf->addr.mac_addr);
 369        BUG_ON(rc);
 370
 371        rc = efx_filter_insert_filter(efx, &filter, true);
 372        if (rc < 0) {
 373                netif_warn(efx, hw, efx->net_dev,
 374                           "Unable to insert rx filter for vf %s\n",
 375                           vf->pci_name);
 376        } else {
 377                netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n",
 378                          vf->pci_name, rc);
 379                vf->rx_filter_id = rc;
 380        }
 381}
 382
 383static void __efx_sriov_update_vf_addr(struct efx_vf *vf)
 384{
 385        efx_sriov_reset_tx_filter(vf);
 386        efx_sriov_reset_rx_filter(vf);
 387        queue_work(vfdi_workqueue, &vf->efx->peer_work);
 388}
 389
 390/* Push the peer list to this VF. The caller must hold status_lock to interlock
 391 * with VFDI requests, and they must be serialised against manipulation of
 392 * local_page_list, either by acquiring local_lock or by running from
 393 * efx_sriov_peer_work()
 394 */
 395static void __efx_sriov_push_vf_status(struct efx_vf *vf)
 396{
 397        struct efx_nic *efx = vf->efx;
 398        struct vfdi_status *status = efx->vfdi_status.addr;
 399        struct efx_memcpy_req copy[4];
 400        struct efx_endpoint_page *epp;
 401        unsigned int pos, count;
 402        unsigned data_offset;
 403        efx_qword_t event;
 404
 405        WARN_ON(!mutex_is_locked(&vf->status_lock));
 406        WARN_ON(!vf->status_addr);
 407
 408        status->local = vf->addr;
 409        status->generation_end = ++status->generation_start;
 410
 411        memset(copy, '\0', sizeof(copy));
 412        /* Write generation_start */
 413        copy[0].from_buf = &status->generation_start;
 414        copy[0].to_rid = vf->pci_rid;
 415        copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status,
 416                                                     generation_start);
 417        copy[0].length = sizeof(status->generation_start);
 418        /* DMA the rest of the structure (excluding the generations). This
 419         * assumes that the non-generation portion of vfdi_status is in
 420         * one chunk starting at the version member.
 421         */
 422        data_offset = offsetof(struct vfdi_status, version);
 423        copy[1].from_rid = efx->pci_dev->devfn;
 424        copy[1].from_addr = efx->vfdi_status.dma_addr + data_offset;
 425        copy[1].to_rid = vf->pci_rid;
 426        copy[1].to_addr = vf->status_addr + data_offset;
 427        copy[1].length =  status->length - data_offset;
 428
 429        /* Copy the peer pages */
 430        pos = 2;
 431        count = 0;
 432        list_for_each_entry(epp, &efx->local_page_list, link) {
 433                if (count == vf->peer_page_count) {
 434                        /* The VF driver will know they need to provide more
 435                         * pages because peer_addr_count is too large.
 436                         */
 437                        break;
 438                }
 439                copy[pos].from_buf = NULL;
 440                copy[pos].from_rid = efx->pci_dev->devfn;
 441                copy[pos].from_addr = epp->addr;
 442                copy[pos].to_rid = vf->pci_rid;
 443                copy[pos].to_addr = vf->peer_page_addrs[count];
 444                copy[pos].length = EFX_PAGE_SIZE;
 445
 446                if (++pos == ARRAY_SIZE(copy)) {
 447                        efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
 448                        pos = 0;
 449                }
 450                ++count;
 451        }
 452
 453        /* Write generation_end */
 454        copy[pos].from_buf = &status->generation_end;
 455        copy[pos].to_rid = vf->pci_rid;
 456        copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status,
 457                                                       generation_end);
 458        copy[pos].length = sizeof(status->generation_end);
 459        efx_sriov_memcpy(efx, copy, pos + 1);
 460
 461        /* Notify the guest */
 462        EFX_POPULATE_QWORD_3(event,
 463                             FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
 464                             VFDI_EV_SEQ, (vf->msg_seqno & 0xff),
 465                             VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS);
 466        ++vf->msg_seqno;
 467        efx_farch_generate_event(efx,
 468                                 EFX_VI_BASE + vf->index * efx_vf_size(efx),
 469                                 &event);
 470}
 471
 472static void efx_sriov_bufs(struct efx_nic *efx, unsigned offset,
 473                           u64 *addr, unsigned count)
 474{
 475        efx_qword_t buf;
 476        unsigned pos;
 477
 478        for (pos = 0; pos < count; ++pos) {
 479                EFX_POPULATE_QWORD_3(buf,
 480                                     FRF_AZ_BUF_ADR_REGION, 0,
 481                                     FRF_AZ_BUF_ADR_FBUF,
 482                                     addr ? addr[pos] >> 12 : 0,
 483                                     FRF_AZ_BUF_OWNER_ID_FBUF, 0);
 484                efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL,
 485                                &buf, offset + pos);
 486        }
 487}
 488
 489static bool bad_vf_index(struct efx_nic *efx, unsigned index)
 490{
 491        return index >= efx_vf_size(efx);
 492}
 493
 494static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count)
 495{
 496        unsigned max_buf_count = max_entry_count *
 497                sizeof(efx_qword_t) / EFX_BUF_SIZE;
 498
 499        return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count);
 500}
 501
 502/* Check that VI specified by per-port index belongs to a VF.
 503 * Optionally set VF index and VI index within the VF.
 504 */
 505static bool map_vi_index(struct efx_nic *efx, unsigned abs_index,
 506                         struct efx_vf **vf_out, unsigned *rel_index_out)
 507{
 508        unsigned vf_i;
 509
 510        if (abs_index < EFX_VI_BASE)
 511                return true;
 512        vf_i = (abs_index - EFX_VI_BASE) / efx_vf_size(efx);
 513        if (vf_i >= efx->vf_init_count)
 514                return true;
 515
 516        if (vf_out)
 517                *vf_out = efx->vf + vf_i;
 518        if (rel_index_out)
 519                *rel_index_out = abs_index % efx_vf_size(efx);
 520        return false;
 521}
 522
 523static int efx_vfdi_init_evq(struct efx_vf *vf)
 524{
 525        struct efx_nic *efx = vf->efx;
 526        struct vfdi_req *req = vf->buf.addr;
 527        unsigned vf_evq = req->u.init_evq.index;
 528        unsigned buf_count = req->u.init_evq.buf_count;
 529        unsigned abs_evq = abs_index(vf, vf_evq);
 530        unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq);
 531        efx_oword_t reg;
 532
 533        if (bad_vf_index(efx, vf_evq) ||
 534            bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) {
 535                if (net_ratelimit())
 536                        netif_err(efx, hw, efx->net_dev,
 537                                  "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n",
 538                                  vf->pci_name, vf_evq, buf_count);
 539                return VFDI_RC_EINVAL;
 540        }
 541
 542        efx_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count);
 543
 544        EFX_POPULATE_OWORD_3(reg,
 545                             FRF_CZ_TIMER_Q_EN, 1,
 546                             FRF_CZ_HOST_NOTIFY_MODE, 0,
 547                             FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
 548        efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
 549        EFX_POPULATE_OWORD_3(reg,
 550                             FRF_AZ_EVQ_EN, 1,
 551                             FRF_AZ_EVQ_SIZE, __ffs(buf_count),
 552                             FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
 553        efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
 554
 555        if (vf_evq == 0) {
 556                memcpy(vf->evq0_addrs, req->u.init_evq.addr,
 557                       buf_count * sizeof(u64));
 558                vf->evq0_count = buf_count;
 559        }
 560
 561        return VFDI_RC_SUCCESS;
 562}
 563
 564static int efx_vfdi_init_rxq(struct efx_vf *vf)
 565{
 566        struct efx_nic *efx = vf->efx;
 567        struct vfdi_req *req = vf->buf.addr;
 568        unsigned vf_rxq = req->u.init_rxq.index;
 569        unsigned vf_evq = req->u.init_rxq.evq;
 570        unsigned buf_count = req->u.init_rxq.buf_count;
 571        unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq);
 572        unsigned label;
 573        efx_oword_t reg;
 574
 575        if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
 576            vf_rxq >= VF_MAX_RX_QUEUES ||
 577            bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
 578                if (net_ratelimit())
 579                        netif_err(efx, hw, efx->net_dev,
 580                                  "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d "
 581                                  "buf_count %d\n", vf->pci_name, vf_rxq,
 582                                  vf_evq, buf_count);
 583                return VFDI_RC_EINVAL;
 584        }
 585        if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask))
 586                ++vf->rxq_count;
 587        efx_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count);
 588
 589        label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL);
 590        EFX_POPULATE_OWORD_6(reg,
 591                             FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl,
 592                             FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
 593                             FRF_AZ_RX_DESCQ_LABEL, label,
 594                             FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count),
 595                             FRF_AZ_RX_DESCQ_JUMBO,
 596                             !!(req->u.init_rxq.flags &
 597                                VFDI_RXQ_FLAG_SCATTER_EN),
 598                             FRF_AZ_RX_DESCQ_EN, 1);
 599        efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
 600                         abs_index(vf, vf_rxq));
 601
 602        return VFDI_RC_SUCCESS;
 603}
 604
 605static int efx_vfdi_init_txq(struct efx_vf *vf)
 606{
 607        struct efx_nic *efx = vf->efx;
 608        struct vfdi_req *req = vf->buf.addr;
 609        unsigned vf_txq = req->u.init_txq.index;
 610        unsigned vf_evq = req->u.init_txq.evq;
 611        unsigned buf_count = req->u.init_txq.buf_count;
 612        unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq);
 613        unsigned label, eth_filt_en;
 614        efx_oword_t reg;
 615
 616        if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) ||
 617            vf_txq >= vf_max_tx_channels ||
 618            bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
 619                if (net_ratelimit())
 620                        netif_err(efx, hw, efx->net_dev,
 621                                  "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d "
 622                                  "buf_count %d\n", vf->pci_name, vf_txq,
 623                                  vf_evq, buf_count);
 624                return VFDI_RC_EINVAL;
 625        }
 626
 627        mutex_lock(&vf->txq_lock);
 628        if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask))
 629                ++vf->txq_count;
 630        mutex_unlock(&vf->txq_lock);
 631        efx_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count);
 632
 633        eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON;
 634
 635        label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL);
 636        EFX_POPULATE_OWORD_8(reg,
 637                             FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U),
 638                             FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en,
 639                             FRF_AZ_TX_DESCQ_EN, 1,
 640                             FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl,
 641                             FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
 642                             FRF_AZ_TX_DESCQ_LABEL, label,
 643                             FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count),
 644                             FRF_BZ_TX_NON_IP_DROP_DIS, 1);
 645        efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
 646                         abs_index(vf, vf_txq));
 647
 648        return VFDI_RC_SUCCESS;
 649}
 650
 651/* Returns true when efx_vfdi_fini_all_queues should wake */
 652static bool efx_vfdi_flush_wake(struct efx_vf *vf)
 653{
 654        /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */
 655        smp_mb();
 656
 657        return (!vf->txq_count && !vf->rxq_count) ||
 658                atomic_read(&vf->rxq_retry_count);
 659}
 660
 661static void efx_vfdi_flush_clear(struct efx_vf *vf)
 662{
 663        memset(vf->txq_mask, 0, sizeof(vf->txq_mask));
 664        vf->txq_count = 0;
 665        memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask));
 666        vf->rxq_count = 0;
 667        memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask));
 668        atomic_set(&vf->rxq_retry_count, 0);
 669}
 670
 671static int efx_vfdi_fini_all_queues(struct efx_vf *vf)
 672{
 673        struct efx_nic *efx = vf->efx;
 674        efx_oword_t reg;
 675        unsigned count = efx_vf_size(efx);
 676        unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx);
 677        unsigned timeout = HZ;
 678        unsigned index, rxqs_count;
 679        MCDI_DECLARE_BUF(inbuf, MC_CMD_FLUSH_RX_QUEUES_IN_LENMAX);
 680        int rc;
 681
 682        BUILD_BUG_ON(VF_MAX_RX_QUEUES >
 683                     MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM);
 684
 685        rtnl_lock();
 686        siena_prepare_flush(efx);
 687        rtnl_unlock();
 688
 689        /* Flush all the initialized queues */
 690        rxqs_count = 0;
 691        for (index = 0; index < count; ++index) {
 692                if (test_bit(index, vf->txq_mask)) {
 693                        EFX_POPULATE_OWORD_2(reg,
 694                                             FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
 695                                             FRF_AZ_TX_FLUSH_DESCQ,
 696                                             vf_offset + index);
 697                        efx_writeo(efx, &reg, FR_AZ_TX_FLUSH_DESCQ);
 698                }
 699                if (test_bit(index, vf->rxq_mask)) {
 700                        MCDI_SET_ARRAY_DWORD(
 701                                inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
 702                                rxqs_count, vf_offset + index);
 703                        rxqs_count++;
 704                }
 705        }
 706
 707        atomic_set(&vf->rxq_retry_count, 0);
 708        while (timeout && (vf->rxq_count || vf->txq_count)) {
 709                rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, inbuf,
 710                                  MC_CMD_FLUSH_RX_QUEUES_IN_LEN(rxqs_count),
 711                                  NULL, 0, NULL);
 712                WARN_ON(rc < 0);
 713
 714                timeout = wait_event_timeout(vf->flush_waitq,
 715                                             efx_vfdi_flush_wake(vf),
 716                                             timeout);
 717                rxqs_count = 0;
 718                for (index = 0; index < count; ++index) {
 719                        if (test_and_clear_bit(index, vf->rxq_retry_mask)) {
 720                                atomic_dec(&vf->rxq_retry_count);
 721                                MCDI_SET_ARRAY_DWORD(
 722                                        inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
 723                                        rxqs_count, vf_offset + index);
 724                                rxqs_count++;
 725                        }
 726                }
 727        }
 728
 729        rtnl_lock();
 730        siena_finish_flush(efx);
 731        rtnl_unlock();
 732
 733        /* Irrespective of success/failure, fini the queues */
 734        EFX_ZERO_OWORD(reg);
 735        for (index = 0; index < count; ++index) {
 736                efx_writeo_table(efx, &reg, FR_BZ_RX_DESC_PTR_TBL,
 737                                 vf_offset + index);
 738                efx_writeo_table(efx, &reg, FR_BZ_TX_DESC_PTR_TBL,
 739                                 vf_offset + index);
 740                efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL,
 741                                 vf_offset + index);
 742                efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL,
 743                                 vf_offset + index);
 744        }
 745        efx_sriov_bufs(efx, vf->buftbl_base, NULL,
 746                       EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx));
 747        efx_vfdi_flush_clear(vf);
 748
 749        vf->evq0_count = 0;
 750
 751        return timeout ? 0 : VFDI_RC_ETIMEDOUT;
 752}
 753
 754static int efx_vfdi_insert_filter(struct efx_vf *vf)
 755{
 756        struct efx_nic *efx = vf->efx;
 757        struct vfdi_req *req = vf->buf.addr;
 758        unsigned vf_rxq = req->u.mac_filter.rxq;
 759        unsigned flags;
 760
 761        if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) {
 762                if (net_ratelimit())
 763                        netif_err(efx, hw, efx->net_dev,
 764                                  "ERROR: Invalid INSERT_FILTER from %s: rxq %d "
 765                                  "flags 0x%x\n", vf->pci_name, vf_rxq,
 766                                  req->u.mac_filter.flags);
 767                return VFDI_RC_EINVAL;
 768        }
 769
 770        flags = 0;
 771        if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS)
 772                flags |= EFX_FILTER_FLAG_RX_RSS;
 773        if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER)
 774                flags |= EFX_FILTER_FLAG_RX_SCATTER;
 775        vf->rx_filter_flags = flags;
 776        vf->rx_filter_qid = vf_rxq;
 777        vf->rx_filtering = true;
 778
 779        efx_sriov_reset_rx_filter(vf);
 780        queue_work(vfdi_workqueue, &efx->peer_work);
 781
 782        return VFDI_RC_SUCCESS;
 783}
 784
 785static int efx_vfdi_remove_all_filters(struct efx_vf *vf)
 786{
 787        vf->rx_filtering = false;
 788        efx_sriov_reset_rx_filter(vf);
 789        queue_work(vfdi_workqueue, &vf->efx->peer_work);
 790
 791        return VFDI_RC_SUCCESS;
 792}
 793
 794static int efx_vfdi_set_status_page(struct efx_vf *vf)
 795{
 796        struct efx_nic *efx = vf->efx;
 797        struct vfdi_req *req = vf->buf.addr;
 798        u64 page_count = req->u.set_status_page.peer_page_count;
 799        u64 max_page_count =
 800                (EFX_PAGE_SIZE -
 801                 offsetof(struct vfdi_req, u.set_status_page.peer_page_addr[0]))
 802                / sizeof(req->u.set_status_page.peer_page_addr[0]);
 803
 804        if (!req->u.set_status_page.dma_addr || page_count > max_page_count) {
 805                if (net_ratelimit())
 806                        netif_err(efx, hw, efx->net_dev,
 807                                  "ERROR: Invalid SET_STATUS_PAGE from %s\n",
 808                                  vf->pci_name);
 809                return VFDI_RC_EINVAL;
 810        }
 811
 812        mutex_lock(&efx->local_lock);
 813        mutex_lock(&vf->status_lock);
 814        vf->status_addr = req->u.set_status_page.dma_addr;
 815
 816        kfree(vf->peer_page_addrs);
 817        vf->peer_page_addrs = NULL;
 818        vf->peer_page_count = 0;
 819
 820        if (page_count) {
 821                vf->peer_page_addrs = kcalloc(page_count, sizeof(u64),
 822                                              GFP_KERNEL);
 823                if (vf->peer_page_addrs) {
 824                        memcpy(vf->peer_page_addrs,
 825                               req->u.set_status_page.peer_page_addr,
 826                               page_count * sizeof(u64));
 827                        vf->peer_page_count = page_count;
 828                }
 829        }
 830
 831        __efx_sriov_push_vf_status(vf);
 832        mutex_unlock(&vf->status_lock);
 833        mutex_unlock(&efx->local_lock);
 834
 835        return VFDI_RC_SUCCESS;
 836}
 837
 838static int efx_vfdi_clear_status_page(struct efx_vf *vf)
 839{
 840        mutex_lock(&vf->status_lock);
 841        vf->status_addr = 0;
 842        mutex_unlock(&vf->status_lock);
 843
 844        return VFDI_RC_SUCCESS;
 845}
 846
 847typedef int (*efx_vfdi_op_t)(struct efx_vf *vf);
 848
 849static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = {
 850        [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq,
 851        [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq,
 852        [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq,
 853        [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues,
 854        [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter,
 855        [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters,
 856        [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page,
 857        [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page,
 858};
 859
 860static void efx_sriov_vfdi(struct work_struct *work)
 861{
 862        struct efx_vf *vf = container_of(work, struct efx_vf, req);
 863        struct efx_nic *efx = vf->efx;
 864        struct vfdi_req *req = vf->buf.addr;
 865        struct efx_memcpy_req copy[2];
 866        int rc;
 867
 868        /* Copy this page into the local address space */
 869        memset(copy, '\0', sizeof(copy));
 870        copy[0].from_rid = vf->pci_rid;
 871        copy[0].from_addr = vf->req_addr;
 872        copy[0].to_rid = efx->pci_dev->devfn;
 873        copy[0].to_addr = vf->buf.dma_addr;
 874        copy[0].length = EFX_PAGE_SIZE;
 875        rc = efx_sriov_memcpy(efx, copy, 1);
 876        if (rc) {
 877                /* If we can't get the request, we can't reply to the caller */
 878                if (net_ratelimit())
 879                        netif_err(efx, hw, efx->net_dev,
 880                                  "ERROR: Unable to fetch VFDI request from %s rc %d\n",
 881                                  vf->pci_name, -rc);
 882                vf->busy = false;
 883                return;
 884        }
 885
 886        if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) {
 887                rc = vfdi_ops[req->op](vf);
 888                if (rc == 0) {
 889                        netif_dbg(efx, hw, efx->net_dev,
 890                                  "vfdi request %d from %s ok\n",
 891                                  req->op, vf->pci_name);
 892                }
 893        } else {
 894                netif_dbg(efx, hw, efx->net_dev,
 895                          "ERROR: Unrecognised request %d from VF %s addr "
 896                          "%llx\n", req->op, vf->pci_name,
 897                          (unsigned long long)vf->req_addr);
 898                rc = VFDI_RC_EOPNOTSUPP;
 899        }
 900
 901        /* Allow subsequent VF requests */
 902        vf->busy = false;
 903        smp_wmb();
 904
 905        /* Respond to the request */
 906        req->rc = rc;
 907        req->op = VFDI_OP_RESPONSE;
 908
 909        memset(copy, '\0', sizeof(copy));
 910        copy[0].from_buf = &req->rc;
 911        copy[0].to_rid = vf->pci_rid;
 912        copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc);
 913        copy[0].length = sizeof(req->rc);
 914        copy[1].from_buf = &req->op;
 915        copy[1].to_rid = vf->pci_rid;
 916        copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op);
 917        copy[1].length = sizeof(req->op);
 918
 919        (void) efx_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
 920}
 921
 922
 923
 924/* After a reset the event queues inside the guests no longer exist. Fill the
 925 * event ring in guest memory with VFDI reset events, then (re-initialise) the
 926 * event queue to raise an interrupt. The guest driver will then recover.
 927 */
 928static void efx_sriov_reset_vf(struct efx_vf *vf, struct efx_buffer *buffer)
 929{
 930        struct efx_nic *efx = vf->efx;
 931        struct efx_memcpy_req copy_req[4];
 932        efx_qword_t event;
 933        unsigned int pos, count, k, buftbl, abs_evq;
 934        efx_oword_t reg;
 935        efx_dword_t ptr;
 936        int rc;
 937
 938        BUG_ON(buffer->len != EFX_PAGE_SIZE);
 939
 940        if (!vf->evq0_count)
 941                return;
 942        BUG_ON(vf->evq0_count & (vf->evq0_count - 1));
 943
 944        mutex_lock(&vf->status_lock);
 945        EFX_POPULATE_QWORD_3(event,
 946                             FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
 947                             VFDI_EV_SEQ, vf->msg_seqno,
 948                             VFDI_EV_TYPE, VFDI_EV_TYPE_RESET);
 949        vf->msg_seqno++;
 950        for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event))
 951                memcpy(buffer->addr + pos, &event, sizeof(event));
 952
 953        for (pos = 0; pos < vf->evq0_count; pos += count) {
 954                count = min_t(unsigned, vf->evq0_count - pos,
 955                              ARRAY_SIZE(copy_req));
 956                for (k = 0; k < count; k++) {
 957                        copy_req[k].from_buf = NULL;
 958                        copy_req[k].from_rid = efx->pci_dev->devfn;
 959                        copy_req[k].from_addr = buffer->dma_addr;
 960                        copy_req[k].to_rid = vf->pci_rid;
 961                        copy_req[k].to_addr = vf->evq0_addrs[pos + k];
 962                        copy_req[k].length = EFX_PAGE_SIZE;
 963                }
 964                rc = efx_sriov_memcpy(efx, copy_req, count);
 965                if (rc) {
 966                        if (net_ratelimit())
 967                                netif_err(efx, hw, efx->net_dev,
 968                                          "ERROR: Unable to notify %s of reset"
 969                                          ": %d\n", vf->pci_name, -rc);
 970                        break;
 971                }
 972        }
 973
 974        /* Reinitialise, arm and trigger evq0 */
 975        abs_evq = abs_index(vf, 0);
 976        buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0);
 977        efx_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count);
 978
 979        EFX_POPULATE_OWORD_3(reg,
 980                             FRF_CZ_TIMER_Q_EN, 1,
 981                             FRF_CZ_HOST_NOTIFY_MODE, 0,
 982                             FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
 983        efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, abs_evq);
 984        EFX_POPULATE_OWORD_3(reg,
 985                             FRF_AZ_EVQ_EN, 1,
 986                             FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count),
 987                             FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
 988        efx_writeo_table(efx, &reg, FR_BZ_EVQ_PTR_TBL, abs_evq);
 989        EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0);
 990        efx_writed(efx, &ptr, FR_BZ_EVQ_RPTR + FR_BZ_EVQ_RPTR_STEP * abs_evq);
 991
 992        mutex_unlock(&vf->status_lock);
 993}
 994
 995static void efx_sriov_reset_vf_work(struct work_struct *work)
 996{
 997        struct efx_vf *vf = container_of(work, struct efx_vf, req);
 998        struct efx_nic *efx = vf->efx;
 999        struct efx_buffer buf;
1000
1001        if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE, GFP_NOIO)) {
1002                efx_sriov_reset_vf(vf, &buf);
1003                efx_nic_free_buffer(efx, &buf);
1004        }
1005}
1006
1007static void efx_sriov_handle_no_channel(struct efx_nic *efx)
1008{
1009        netif_err(efx, drv, efx->net_dev,
1010                  "ERROR: IOV requires MSI-X and 1 additional interrupt"
1011                  "vector. IOV disabled\n");
1012        efx->vf_count = 0;
1013}
1014
1015static int efx_sriov_probe_channel(struct efx_channel *channel)
1016{
1017        channel->efx->vfdi_channel = channel;
1018        return 0;
1019}
1020
1021static void
1022efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
1023{
1024        snprintf(buf, len, "%s-iov", channel->efx->name);
1025}
1026
1027static const struct efx_channel_type efx_sriov_channel_type = {
1028        .handle_no_channel      = efx_sriov_handle_no_channel,
1029        .pre_probe              = efx_sriov_probe_channel,
1030        .post_remove            = efx_channel_dummy_op_void,
1031        .get_name               = efx_sriov_get_channel_name,
1032        /* no copy operation; channel must not be reallocated */
1033        .keep_eventq            = true,
1034};
1035
1036void efx_sriov_probe(struct efx_nic *efx)
1037{
1038        unsigned count;
1039
1040        if (!max_vfs)
1041                return;
1042
1043        if (efx_sriov_cmd(efx, false, &efx->vi_scale, &count))
1044                return;
1045        if (count > 0 && count > max_vfs)
1046                count = max_vfs;
1047
1048        /* efx_nic_dimension_resources() will reduce vf_count as appopriate */
1049        efx->vf_count = count;
1050
1051        efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_sriov_channel_type;
1052}
1053
1054/* Copy the list of individual addresses into the vfdi_status.peers
1055 * array and auxillary pages, protected by %local_lock. Drop that lock
1056 * and then broadcast the address list to every VF.
1057 */
1058static void efx_sriov_peer_work(struct work_struct *data)
1059{
1060        struct efx_nic *efx = container_of(data, struct efx_nic, peer_work);
1061        struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1062        struct efx_vf *vf;
1063        struct efx_local_addr *local_addr;
1064        struct vfdi_endpoint *peer;
1065        struct efx_endpoint_page *epp;
1066        struct list_head pages;
1067        unsigned int peer_space;
1068        unsigned int peer_count;
1069        unsigned int pos;
1070
1071        mutex_lock(&efx->local_lock);
1072
1073        /* Move the existing peer pages off %local_page_list */
1074        INIT_LIST_HEAD(&pages);
1075        list_splice_tail_init(&efx->local_page_list, &pages);
1076
1077        /* Populate the VF addresses starting from entry 1 (entry 0 is
1078         * the PF address)
1079         */
1080        peer = vfdi_status->peers + 1;
1081        peer_space = ARRAY_SIZE(vfdi_status->peers) - 1;
1082        peer_count = 1;
1083        for (pos = 0; pos < efx->vf_count; ++pos) {
1084                vf = efx->vf + pos;
1085
1086                mutex_lock(&vf->status_lock);
1087                if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) {
1088                        *peer++ = vf->addr;
1089                        ++peer_count;
1090                        --peer_space;
1091                        BUG_ON(peer_space == 0);
1092                }
1093                mutex_unlock(&vf->status_lock);
1094        }
1095
1096        /* Fill the remaining addresses */
1097        list_for_each_entry(local_addr, &efx->local_addr_list, link) {
1098                memcpy(peer->mac_addr, local_addr->addr, ETH_ALEN);
1099                peer->tci = 0;
1100                ++peer;
1101                ++peer_count;
1102                if (--peer_space == 0) {
1103                        if (list_empty(&pages)) {
1104                                epp = kmalloc(sizeof(*epp), GFP_KERNEL);
1105                                if (!epp)
1106                                        break;
1107                                epp->ptr = dma_alloc_coherent(
1108                                        &efx->pci_dev->dev, EFX_PAGE_SIZE,
1109                                        &epp->addr, GFP_KERNEL);
1110                                if (!epp->ptr) {
1111                                        kfree(epp);
1112                                        break;
1113                                }
1114                        } else {
1115                                epp = list_first_entry(
1116                                        &pages, struct efx_endpoint_page, link);
1117                                list_del(&epp->link);
1118                        }
1119
1120                        list_add_tail(&epp->link, &efx->local_page_list);
1121                        peer = (struct vfdi_endpoint *)epp->ptr;
1122                        peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint);
1123                }
1124        }
1125        vfdi_status->peer_count = peer_count;
1126        mutex_unlock(&efx->local_lock);
1127
1128        /* Free any now unused endpoint pages */
1129        while (!list_empty(&pages)) {
1130                epp = list_first_entry(
1131                        &pages, struct efx_endpoint_page, link);
1132                list_del(&epp->link);
1133                dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1134                                  epp->ptr, epp->addr);
1135                kfree(epp);
1136        }
1137
1138        /* Finally, push the pages */
1139        for (pos = 0; pos < efx->vf_count; ++pos) {
1140                vf = efx->vf + pos;
1141
1142                mutex_lock(&vf->status_lock);
1143                if (vf->status_addr)
1144                        __efx_sriov_push_vf_status(vf);
1145                mutex_unlock(&vf->status_lock);
1146        }
1147}
1148
1149static void efx_sriov_free_local(struct efx_nic *efx)
1150{
1151        struct efx_local_addr *local_addr;
1152        struct efx_endpoint_page *epp;
1153
1154        while (!list_empty(&efx->local_addr_list)) {
1155                local_addr = list_first_entry(&efx->local_addr_list,
1156                                              struct efx_local_addr, link);
1157                list_del(&local_addr->link);
1158                kfree(local_addr);
1159        }
1160
1161        while (!list_empty(&efx->local_page_list)) {
1162                epp = list_first_entry(&efx->local_page_list,
1163                                       struct efx_endpoint_page, link);
1164                list_del(&epp->link);
1165                dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
1166                                  epp->ptr, epp->addr);
1167                kfree(epp);
1168        }
1169}
1170
1171static int efx_sriov_vf_alloc(struct efx_nic *efx)
1172{
1173        unsigned index;
1174        struct efx_vf *vf;
1175
1176        efx->vf = kzalloc(sizeof(struct efx_vf) * efx->vf_count, GFP_KERNEL);
1177        if (!efx->vf)
1178                return -ENOMEM;
1179
1180        for (index = 0; index < efx->vf_count; ++index) {
1181                vf = efx->vf + index;
1182
1183                vf->efx = efx;
1184                vf->index = index;
1185                vf->rx_filter_id = -1;
1186                vf->tx_filter_mode = VF_TX_FILTER_AUTO;
1187                vf->tx_filter_id = -1;
1188                INIT_WORK(&vf->req, efx_sriov_vfdi);
1189                INIT_WORK(&vf->reset_work, efx_sriov_reset_vf_work);
1190                init_waitqueue_head(&vf->flush_waitq);
1191                mutex_init(&vf->status_lock);
1192                mutex_init(&vf->txq_lock);
1193        }
1194
1195        return 0;
1196}
1197
1198static void efx_sriov_vfs_fini(struct efx_nic *efx)
1199{
1200        struct efx_vf *vf;
1201        unsigned int pos;
1202
1203        for (pos = 0; pos < efx->vf_count; ++pos) {
1204                vf = efx->vf + pos;
1205
1206                efx_nic_free_buffer(efx, &vf->buf);
1207                kfree(vf->peer_page_addrs);
1208                vf->peer_page_addrs = NULL;
1209                vf->peer_page_count = 0;
1210
1211                vf->evq0_count = 0;
1212        }
1213}
1214
1215static int efx_sriov_vfs_init(struct efx_nic *efx)
1216{
1217        struct pci_dev *pci_dev = efx->pci_dev;
1218        unsigned index, devfn, sriov, buftbl_base;
1219        u16 offset, stride;
1220        struct efx_vf *vf;
1221        int rc;
1222
1223        sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV);
1224        if (!sriov)
1225                return -ENOENT;
1226
1227        pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset);
1228        pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride);
1229
1230        buftbl_base = efx->vf_buftbl_base;
1231        devfn = pci_dev->devfn + offset;
1232        for (index = 0; index < efx->vf_count; ++index) {
1233                vf = efx->vf + index;
1234
1235                /* Reserve buffer entries */
1236                vf->buftbl_base = buftbl_base;
1237                buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx);
1238
1239                vf->pci_rid = devfn;
1240                snprintf(vf->pci_name, sizeof(vf->pci_name),
1241                         "%04x:%02x:%02x.%d",
1242                         pci_domain_nr(pci_dev->bus), pci_dev->bus->number,
1243                         PCI_SLOT(devfn), PCI_FUNC(devfn));
1244
1245                rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE,
1246                                          GFP_KERNEL);
1247                if (rc)
1248                        goto fail;
1249
1250                devfn += stride;
1251        }
1252
1253        return 0;
1254
1255fail:
1256        efx_sriov_vfs_fini(efx);
1257        return rc;
1258}
1259
1260int efx_sriov_init(struct efx_nic *efx)
1261{
1262        struct net_device *net_dev = efx->net_dev;
1263        struct vfdi_status *vfdi_status;
1264        int rc;
1265
1266        /* Ensure there's room for vf_channel */
1267        BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE);
1268        /* Ensure that VI_BASE is aligned on VI_SCALE */
1269        BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1));
1270
1271        if (efx->vf_count == 0)
1272                return 0;
1273
1274        rc = efx_sriov_cmd(efx, true, NULL, NULL);
1275        if (rc)
1276                goto fail_cmd;
1277
1278        rc = efx_nic_alloc_buffer(efx, &efx->vfdi_status, sizeof(*vfdi_status),
1279                                  GFP_KERNEL);
1280        if (rc)
1281                goto fail_status;
1282        vfdi_status = efx->vfdi_status.addr;
1283        memset(vfdi_status, 0, sizeof(*vfdi_status));
1284        vfdi_status->version = 1;
1285        vfdi_status->length = sizeof(*vfdi_status);
1286        vfdi_status->max_tx_channels = vf_max_tx_channels;
1287        vfdi_status->vi_scale = efx->vi_scale;
1288        vfdi_status->rss_rxq_count = efx->rss_spread;
1289        vfdi_status->peer_count = 1 + efx->vf_count;
1290        vfdi_status->timer_quantum_ns = efx->timer_quantum_ns;
1291
1292        rc = efx_sriov_vf_alloc(efx);
1293        if (rc)
1294                goto fail_alloc;
1295
1296        mutex_init(&efx->local_lock);
1297        INIT_WORK(&efx->peer_work, efx_sriov_peer_work);
1298        INIT_LIST_HEAD(&efx->local_addr_list);
1299        INIT_LIST_HEAD(&efx->local_page_list);
1300
1301        rc = efx_sriov_vfs_init(efx);
1302        if (rc)
1303                goto fail_vfs;
1304
1305        rtnl_lock();
1306        memcpy(vfdi_status->peers[0].mac_addr,
1307               net_dev->dev_addr, ETH_ALEN);
1308        efx->vf_init_count = efx->vf_count;
1309        rtnl_unlock();
1310
1311        efx_sriov_usrev(efx, true);
1312
1313        /* At this point we must be ready to accept VFDI requests */
1314
1315        rc = pci_enable_sriov(efx->pci_dev, efx->vf_count);
1316        if (rc)
1317                goto fail_pci;
1318
1319        netif_info(efx, probe, net_dev,
1320                   "enabled SR-IOV for %d VFs, %d VI per VF\n",
1321                   efx->vf_count, efx_vf_size(efx));
1322        return 0;
1323
1324fail_pci:
1325        efx_sriov_usrev(efx, false);
1326        rtnl_lock();
1327        efx->vf_init_count = 0;
1328        rtnl_unlock();
1329        efx_sriov_vfs_fini(efx);
1330fail_vfs:
1331        cancel_work_sync(&efx->peer_work);
1332        efx_sriov_free_local(efx);
1333        kfree(efx->vf);
1334fail_alloc:
1335        efx_nic_free_buffer(efx, &efx->vfdi_status);
1336fail_status:
1337        efx_sriov_cmd(efx, false, NULL, NULL);
1338fail_cmd:
1339        return rc;
1340}
1341
1342void efx_sriov_fini(struct efx_nic *efx)
1343{
1344        struct efx_vf *vf;
1345        unsigned int pos;
1346
1347        if (efx->vf_init_count == 0)
1348                return;
1349
1350        /* Disable all interfaces to reconfiguration */
1351        BUG_ON(efx->vfdi_channel->enabled);
1352        efx_sriov_usrev(efx, false);
1353        rtnl_lock();
1354        efx->vf_init_count = 0;
1355        rtnl_unlock();
1356
1357        /* Flush all reconfiguration work */
1358        for (pos = 0; pos < efx->vf_count; ++pos) {
1359                vf = efx->vf + pos;
1360                cancel_work_sync(&vf->req);
1361                cancel_work_sync(&vf->reset_work);
1362        }
1363        cancel_work_sync(&efx->peer_work);
1364
1365        pci_disable_sriov(efx->pci_dev);
1366
1367        /* Tear down back-end state */
1368        efx_sriov_vfs_fini(efx);
1369        efx_sriov_free_local(efx);
1370        kfree(efx->vf);
1371        efx_nic_free_buffer(efx, &efx->vfdi_status);
1372        efx_sriov_cmd(efx, false, NULL, NULL);
1373}
1374
1375void efx_sriov_event(struct efx_channel *channel, efx_qword_t *event)
1376{
1377        struct efx_nic *efx = channel->efx;
1378        struct efx_vf *vf;
1379        unsigned qid, seq, type, data;
1380
1381        qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID);
1382
1383        /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */
1384        BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0);
1385        seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ);
1386        type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE);
1387        data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA);
1388
1389        netif_vdbg(efx, hw, efx->net_dev,
1390                   "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n",
1391                   qid, seq, type, data);
1392
1393        if (map_vi_index(efx, qid, &vf, NULL))
1394                return;
1395        if (vf->busy)
1396                goto error;
1397
1398        if (type == VFDI_EV_TYPE_REQ_WORD0) {
1399                /* Resynchronise */
1400                vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1401                vf->req_seqno = seq + 1;
1402                vf->req_addr = 0;
1403        } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type)
1404                goto error;
1405
1406        switch (vf->req_type) {
1407        case VFDI_EV_TYPE_REQ_WORD0:
1408        case VFDI_EV_TYPE_REQ_WORD1:
1409        case VFDI_EV_TYPE_REQ_WORD2:
1410                vf->req_addr |= (u64)data << (vf->req_type << 4);
1411                ++vf->req_type;
1412                return;
1413
1414        case VFDI_EV_TYPE_REQ_WORD3:
1415                vf->req_addr |= (u64)data << 48;
1416                vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1417                vf->busy = true;
1418                queue_work(vfdi_workqueue, &vf->req);
1419                return;
1420        }
1421
1422error:
1423        if (net_ratelimit())
1424                netif_err(efx, hw, efx->net_dev,
1425                          "ERROR: Screaming VFDI request from %s\n",
1426                          vf->pci_name);
1427        /* Reset the request and sequence number */
1428        vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
1429        vf->req_seqno = seq + 1;
1430}
1431
1432void efx_sriov_flr(struct efx_nic *efx, unsigned vf_i)
1433{
1434        struct efx_vf *vf;
1435
1436        if (vf_i > efx->vf_init_count)
1437                return;
1438        vf = efx->vf + vf_i;
1439        netif_info(efx, hw, efx->net_dev,
1440                   "FLR on VF %s\n", vf->pci_name);
1441
1442        vf->status_addr = 0;
1443        efx_vfdi_remove_all_filters(vf);
1444        efx_vfdi_flush_clear(vf);
1445
1446        vf->evq0_count = 0;
1447}
1448
1449void efx_sriov_mac_address_changed(struct efx_nic *efx)
1450{
1451        struct vfdi_status *vfdi_status = efx->vfdi_status.addr;
1452
1453        if (!efx->vf_init_count)
1454                return;
1455        memcpy(vfdi_status->peers[0].mac_addr,
1456               efx->net_dev->dev_addr, ETH_ALEN);
1457        queue_work(vfdi_workqueue, &efx->peer_work);
1458}
1459
1460void efx_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1461{
1462        struct efx_vf *vf;
1463        unsigned queue, qid;
1464
1465        queue = EFX_QWORD_FIELD(*event,  FSF_AZ_DRIVER_EV_SUBDATA);
1466        if (map_vi_index(efx, queue, &vf, &qid))
1467                return;
1468        /* Ignore flush completions triggered by an FLR */
1469        if (!test_bit(qid, vf->txq_mask))
1470                return;
1471
1472        __clear_bit(qid, vf->txq_mask);
1473        --vf->txq_count;
1474
1475        if (efx_vfdi_flush_wake(vf))
1476                wake_up(&vf->flush_waitq);
1477}
1478
1479void efx_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
1480{
1481        struct efx_vf *vf;
1482        unsigned ev_failed, queue, qid;
1483
1484        queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
1485        ev_failed = EFX_QWORD_FIELD(*event,
1486                                    FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
1487        if (map_vi_index(efx, queue, &vf, &qid))
1488                return;
1489        if (!test_bit(qid, vf->rxq_mask))
1490                return;
1491
1492        if (ev_failed) {
1493                set_bit(qid, vf->rxq_retry_mask);
1494                atomic_inc(&vf->rxq_retry_count);
1495        } else {
1496                __clear_bit(qid, vf->rxq_mask);
1497                --vf->rxq_count;
1498        }
1499        if (efx_vfdi_flush_wake(vf))
1500                wake_up(&vf->flush_waitq);
1501}
1502
1503/* Called from napi. Schedule the reset work item */
1504void efx_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq)
1505{
1506        struct efx_vf *vf;
1507        unsigned int rel;
1508
1509        if (map_vi_index(efx, dmaq, &vf, &rel))
1510                return;
1511
1512        if (net_ratelimit())
1513                netif_err(efx, hw, efx->net_dev,
1514                          "VF %d DMA Q %d reports descriptor fetch error.\n",
1515                          vf->index, rel);
1516        queue_work(vfdi_workqueue, &vf->reset_work);
1517}
1518
1519/* Reset all VFs */
1520void efx_sriov_reset(struct efx_nic *efx)
1521{
1522        unsigned int vf_i;
1523        struct efx_buffer buf;
1524        struct efx_vf *vf;
1525
1526        ASSERT_RTNL();
1527
1528        if (efx->vf_init_count == 0)
1529                return;
1530
1531        efx_sriov_usrev(efx, true);
1532        (void)efx_sriov_cmd(efx, true, NULL, NULL);
1533
1534        if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE, GFP_NOIO))
1535                return;
1536
1537        for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) {
1538                vf = efx->vf + vf_i;
1539                efx_sriov_reset_vf(vf, &buf);
1540        }
1541
1542        efx_nic_free_buffer(efx, &buf);
1543}
1544
1545int efx_init_sriov(void)
1546{
1547        /* A single threaded workqueue is sufficient. efx_sriov_vfdi() and
1548         * efx_sriov_peer_work() spend almost all their time sleeping for
1549         * MCDI to complete anyway
1550         */
1551        vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi");
1552        if (!vfdi_workqueue)
1553                return -ENOMEM;
1554
1555        return 0;
1556}
1557
1558void efx_fini_sriov(void)
1559{
1560        destroy_workqueue(vfdi_workqueue);
1561}
1562
1563int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac)
1564{
1565        struct efx_nic *efx = netdev_priv(net_dev);
1566        struct efx_vf *vf;
1567
1568        if (vf_i >= efx->vf_init_count)
1569                return -EINVAL;
1570        vf = efx->vf + vf_i;
1571
1572        mutex_lock(&vf->status_lock);
1573        memcpy(vf->addr.mac_addr, mac, ETH_ALEN);
1574        __efx_sriov_update_vf_addr(vf);
1575        mutex_unlock(&vf->status_lock);
1576
1577        return 0;
1578}
1579
1580int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i,
1581                          u16 vlan, u8 qos)
1582{
1583        struct efx_nic *efx = netdev_priv(net_dev);
1584        struct efx_vf *vf;
1585        u16 tci;
1586
1587        if (vf_i >= efx->vf_init_count)
1588                return -EINVAL;
1589        vf = efx->vf + vf_i;
1590
1591        mutex_lock(&vf->status_lock);
1592        tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT);
1593        vf->addr.tci = htons(tci);
1594        __efx_sriov_update_vf_addr(vf);
1595        mutex_unlock(&vf->status_lock);
1596
1597        return 0;
1598}
1599
1600int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
1601                              bool spoofchk)
1602{
1603        struct efx_nic *efx = netdev_priv(net_dev);
1604        struct efx_vf *vf;
1605        int rc;
1606
1607        if (vf_i >= efx->vf_init_count)
1608                return -EINVAL;
1609        vf = efx->vf + vf_i;
1610
1611        mutex_lock(&vf->txq_lock);
1612        if (vf->txq_count == 0) {
1613                vf->tx_filter_mode =
1614                        spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF;
1615                rc = 0;
1616        } else {
1617                /* This cannot be changed while TX queues are running */
1618                rc = -EBUSY;
1619        }
1620        mutex_unlock(&vf->txq_lock);
1621        return rc;
1622}
1623
1624int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
1625                            struct ifla_vf_info *ivi)
1626{
1627        struct efx_nic *efx = netdev_priv(net_dev);
1628        struct efx_vf *vf;
1629        u16 tci;
1630
1631        if (vf_i >= efx->vf_init_count)
1632                return -EINVAL;
1633        vf = efx->vf + vf_i;
1634
1635        ivi->vf = vf_i;
1636        memcpy(ivi->mac, vf->addr.mac_addr, ETH_ALEN);
1637        ivi->tx_rate = 0;
1638        tci = ntohs(vf->addr.tci);
1639        ivi->vlan = tci & VLAN_VID_MASK;
1640        ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
1641        ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON;
1642
1643        return 0;
1644}
1645
1646