dpdk/drivers/net/avp/avp_ethdev.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2013-2017 Wind River Systems, Inc.
   3 */
   4
   5#include <stdint.h>
   6#include <string.h>
   7#include <stdio.h>
   8#include <errno.h>
   9#include <unistd.h>
  10
  11#include <ethdev_driver.h>
  12#include <ethdev_pci.h>
  13#include <rte_memcpy.h>
  14#include <rte_string_fns.h>
  15#include <rte_malloc.h>
  16#include <rte_atomic.h>
  17#include <rte_branch_prediction.h>
  18#include <rte_pci.h>
  19#include <rte_bus_pci.h>
  20#include <rte_ether.h>
  21#include <rte_common.h>
  22#include <rte_cycles.h>
  23#include <rte_spinlock.h>
  24#include <rte_byteorder.h>
  25#include <rte_dev.h>
  26#include <rte_memory.h>
  27#include <rte_eal.h>
  28#include <rte_io.h>
  29
  30#include "rte_avp_common.h"
  31#include "rte_avp_fifo.h"
  32
  33#include "avp_logs.h"
  34
  35static int avp_dev_create(struct rte_pci_device *pci_dev,
  36                          struct rte_eth_dev *eth_dev);
  37
  38static int avp_dev_configure(struct rte_eth_dev *dev);
  39static int avp_dev_start(struct rte_eth_dev *dev);
  40static int avp_dev_stop(struct rte_eth_dev *dev);
  41static int avp_dev_close(struct rte_eth_dev *dev);
  42static int avp_dev_info_get(struct rte_eth_dev *dev,
  43                            struct rte_eth_dev_info *dev_info);
  44static int avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
  45static int avp_dev_link_update(struct rte_eth_dev *dev, int wait_to_complete);
  46static int avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
  47static int avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
  48
  49static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
  50                                  uint16_t rx_queue_id,
  51                                  uint16_t nb_rx_desc,
  52                                  unsigned int socket_id,
  53                                  const struct rte_eth_rxconf *rx_conf,
  54                                  struct rte_mempool *pool);
  55
  56static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
  57                                  uint16_t tx_queue_id,
  58                                  uint16_t nb_tx_desc,
  59                                  unsigned int socket_id,
  60                                  const struct rte_eth_txconf *tx_conf);
  61
  62static uint16_t avp_recv_scattered_pkts(void *rx_queue,
  63                                        struct rte_mbuf **rx_pkts,
  64                                        uint16_t nb_pkts);
  65
  66static uint16_t avp_recv_pkts(void *rx_queue,
  67                              struct rte_mbuf **rx_pkts,
  68                              uint16_t nb_pkts);
  69
  70static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
  71                                        struct rte_mbuf **tx_pkts,
  72                                        uint16_t nb_pkts);
  73
  74static uint16_t avp_xmit_pkts(void *tx_queue,
  75                              struct rte_mbuf **tx_pkts,
  76                              uint16_t nb_pkts);
  77
  78static void avp_dev_rx_queue_release(void *rxq);
  79static void avp_dev_tx_queue_release(void *txq);
  80
  81static int avp_dev_stats_get(struct rte_eth_dev *dev,
  82                              struct rte_eth_stats *stats);
  83static int avp_dev_stats_reset(struct rte_eth_dev *dev);
  84
  85
  86#define AVP_MAX_RX_BURST 64
  87#define AVP_MAX_TX_BURST 64
  88#define AVP_MAX_MAC_ADDRS 1
  89#define AVP_MIN_RX_BUFSIZE RTE_ETHER_MIN_LEN
  90
  91
  92/*
  93 * Defines the number of microseconds to wait before checking the response
  94 * queue for completion.
  95 */
  96#define AVP_REQUEST_DELAY_USECS (5000)
  97
  98/*
  99 * Defines the number times to check the response queue for completion before
 100 * declaring a timeout.
 101 */
 102#define AVP_MAX_REQUEST_RETRY (100)
 103
 104/* Defines the current PCI driver version number */
 105#define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
 106
 107/*
 108 * The set of PCI devices this driver supports
 109 */
 110static const struct rte_pci_id pci_id_avp_map[] = {
 111        { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
 112          .device_id = RTE_AVP_PCI_DEVICE_ID,
 113          .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
 114          .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
 115          .class_id = RTE_CLASS_ANY_ID,
 116        },
 117
 118        { .vendor_id = 0, /* sentinel */
 119        },
 120};
 121
 122/*
 123 * dev_ops for avp, bare necessities for basic operation
 124 */
 125static const struct eth_dev_ops avp_eth_dev_ops = {
 126        .dev_configure       = avp_dev_configure,
 127        .dev_start           = avp_dev_start,
 128        .dev_stop            = avp_dev_stop,
 129        .dev_close           = avp_dev_close,
 130        .dev_infos_get       = avp_dev_info_get,
 131        .vlan_offload_set    = avp_vlan_offload_set,
 132        .stats_get           = avp_dev_stats_get,
 133        .stats_reset         = avp_dev_stats_reset,
 134        .link_update         = avp_dev_link_update,
 135        .promiscuous_enable  = avp_dev_promiscuous_enable,
 136        .promiscuous_disable = avp_dev_promiscuous_disable,
 137        .rx_queue_setup      = avp_dev_rx_queue_setup,
 138        .rx_queue_release    = avp_dev_rx_queue_release,
 139        .tx_queue_setup      = avp_dev_tx_queue_setup,
 140        .tx_queue_release    = avp_dev_tx_queue_release,
 141};
 142
 143/**@{ AVP device flags */
 144#define AVP_F_PROMISC (1 << 1)
 145#define AVP_F_CONFIGURED (1 << 2)
 146#define AVP_F_LINKUP (1 << 3)
 147#define AVP_F_DETACHED (1 << 4)
 148/**@} */
 149
 150/* Ethernet device validation marker */
 151#define AVP_ETHDEV_MAGIC 0x92972862
 152
 153/*
 154 * Defines the AVP device attributes which are attached to an RTE ethernet
 155 * device
 156 */
 157struct avp_dev {
 158        uint32_t magic; /**< Memory validation marker */
 159        uint64_t device_id; /**< Unique system identifier */
 160        struct rte_ether_addr ethaddr; /**< Host specified MAC address */
 161        struct rte_eth_dev_data *dev_data;
 162        /**< Back pointer to ethernet device data */
 163        volatile uint32_t flags; /**< Device operational flags */
 164        uint16_t port_id; /**< Ethernet port identifier */
 165        struct rte_mempool *pool; /**< pkt mbuf mempool */
 166        unsigned int guest_mbuf_size; /**< local pool mbuf size */
 167        unsigned int host_mbuf_size; /**< host mbuf size */
 168        unsigned int max_rx_pkt_len; /**< maximum receive unit */
 169        uint32_t host_features; /**< Supported feature bitmap */
 170        uint32_t features; /**< Enabled feature bitmap */
 171        unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
 172        unsigned int max_tx_queues; /**< Maximum number of transmit queues */
 173        unsigned int num_rx_queues; /**< Negotiated number of receive queues */
 174        unsigned int max_rx_queues; /**< Maximum number of receive queues */
 175
 176        struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
 177        struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
 178        struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
 179        /**< Allocated mbufs queue */
 180        struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
 181        /**< To be freed mbufs queue */
 182
 183        /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
 184        rte_spinlock_t lock;
 185
 186        /* For request & response */
 187        struct rte_avp_fifo *req_q; /**< Request queue */
 188        struct rte_avp_fifo *resp_q; /**< Response queue */
 189        void *host_sync_addr; /**< (host) Req/Resp Mem address */
 190        void *sync_addr; /**< Req/Resp Mem address */
 191        void *host_mbuf_addr; /**< (host) MBUF pool start address */
 192        void *mbuf_addr; /**< MBUF pool start address */
 193} __rte_cache_aligned;
 194
 195/* RTE ethernet private data */
 196struct avp_adapter {
 197        struct avp_dev avp;
 198} __rte_cache_aligned;
 199
 200
 201/* 32-bit MMIO register write */
 202#define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
 203
 204/* 32-bit MMIO register read */
 205#define AVP_READ32(_addr) rte_read32_relaxed((_addr))
 206
 207/* Macro to cast the ethernet device private data to a AVP object */
 208#define AVP_DEV_PRIVATE_TO_HW(adapter) \
 209        (&((struct avp_adapter *)adapter)->avp)
 210
 211/*
 212 * Defines the structure of a AVP device queue for the purpose of handling the
 213 * receive and transmit burst callback functions
 214 */
 215struct avp_queue {
 216        struct rte_eth_dev_data *dev_data;
 217        /**< Backpointer to ethernet device data */
 218        struct avp_dev *avp; /**< Backpointer to AVP device */
 219        uint16_t queue_id;
 220        /**< Queue identifier used for indexing current queue */
 221        uint16_t queue_base;
 222        /**< Base queue identifier for queue servicing */
 223        uint16_t queue_limit;
 224        /**< Maximum queue identifier for queue servicing */
 225
 226        uint64_t packets;
 227        uint64_t bytes;
 228        uint64_t errors;
 229};
 230
 231/* send a request and wait for a response
 232 *
 233 * @warning must be called while holding the avp->lock spinlock.
 234 */
 235static int
 236avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
 237{
 238        unsigned int retry = AVP_MAX_REQUEST_RETRY;
 239        void *resp_addr = NULL;
 240        unsigned int count;
 241        int ret;
 242
 243        PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
 244
 245        request->result = -ENOTSUP;
 246
 247        /* Discard any stale responses before starting a new request */
 248        while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
 249                PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
 250
 251        rte_memcpy(avp->sync_addr, request, sizeof(*request));
 252        count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
 253        if (count < 1) {
 254                PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
 255                            request->req_id);
 256                ret = -EBUSY;
 257                goto done;
 258        }
 259
 260        while (retry--) {
 261                /* wait for a response */
 262                usleep(AVP_REQUEST_DELAY_USECS);
 263
 264                count = avp_fifo_count(avp->resp_q);
 265                if (count >= 1) {
 266                        /* response received */
 267                        break;
 268                }
 269
 270                if (retry == 0) {
 271                        PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
 272                                    request->req_id);
 273                        ret = -ETIME;
 274                        goto done;
 275                }
 276        }
 277
 278        /* retrieve the response */
 279        count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
 280        if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
 281                PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
 282                            count, resp_addr, avp->host_sync_addr);
 283                ret = -ENODATA;
 284                goto done;
 285        }
 286
 287        /* copy to user buffer */
 288        rte_memcpy(request, avp->sync_addr, sizeof(*request));
 289        ret = 0;
 290
 291        PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
 292                    request->result, request->req_id);
 293
 294done:
 295        return ret;
 296}
 297
 298static int
 299avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
 300{
 301        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 302        struct rte_avp_request request;
 303        int ret;
 304
 305        /* setup a link state change request */
 306        memset(&request, 0, sizeof(request));
 307        request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
 308        request.if_up = state;
 309
 310        ret = avp_dev_process_request(avp, &request);
 311
 312        return ret == 0 ? request.result : ret;
 313}
 314
 315static int
 316avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
 317                        struct rte_avp_device_config *config)
 318{
 319        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 320        struct rte_avp_request request;
 321        int ret;
 322
 323        /* setup a configure request */
 324        memset(&request, 0, sizeof(request));
 325        request.req_id = RTE_AVP_REQ_CFG_DEVICE;
 326        memcpy(&request.config, config, sizeof(request.config));
 327
 328        ret = avp_dev_process_request(avp, &request);
 329
 330        return ret == 0 ? request.result : ret;
 331}
 332
 333static int
 334avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
 335{
 336        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 337        struct rte_avp_request request;
 338        int ret;
 339
 340        /* setup a shutdown request */
 341        memset(&request, 0, sizeof(request));
 342        request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
 343
 344        ret = avp_dev_process_request(avp, &request);
 345
 346        return ret == 0 ? request.result : ret;
 347}
 348
 349/* translate from host mbuf virtual address to guest virtual address */
 350static inline void *
 351avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
 352{
 353        return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
 354                                       (uintptr_t)avp->host_mbuf_addr),
 355                           (uintptr_t)avp->mbuf_addr);
 356}
 357
 358/* translate from host physical address to guest virtual address */
 359static void *
 360avp_dev_translate_address(struct rte_eth_dev *eth_dev,
 361                          rte_iova_t host_phys_addr)
 362{
 363        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 364        struct rte_mem_resource *resource;
 365        struct rte_avp_memmap_info *info;
 366        struct rte_avp_memmap *map;
 367        off_t offset;
 368        void *addr;
 369        unsigned int i;
 370
 371        addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
 372        resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
 373        info = (struct rte_avp_memmap_info *)resource->addr;
 374
 375        offset = 0;
 376        for (i = 0; i < info->nb_maps; i++) {
 377                /* search all segments looking for a matching address */
 378                map = &info->maps[i];
 379
 380                if ((host_phys_addr >= map->phys_addr) &&
 381                        (host_phys_addr < (map->phys_addr + map->length))) {
 382                        /* address is within this segment */
 383                        offset += (host_phys_addr - map->phys_addr);
 384                        addr = RTE_PTR_ADD(addr, (uintptr_t)offset);
 385
 386                        PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
 387                                    host_phys_addr, addr);
 388
 389                        return addr;
 390                }
 391                offset += map->length;
 392        }
 393
 394        return NULL;
 395}
 396
 397/* verify that the incoming device version is compatible with our version */
 398static int
 399avp_dev_version_check(uint32_t version)
 400{
 401        uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
 402        uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
 403
 404        if (device <= driver) {
 405                /* the host driver version is less than or equal to ours */
 406                return 0;
 407        }
 408
 409        return 1;
 410}
 411
 412/* verify that memory regions have expected version and validation markers */
 413static int
 414avp_dev_check_regions(struct rte_eth_dev *eth_dev)
 415{
 416        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 417        struct rte_avp_memmap_info *memmap;
 418        struct rte_avp_device_info *info;
 419        struct rte_mem_resource *resource;
 420        unsigned int i;
 421
 422        /* Dump resource info for debug */
 423        for (i = 0; i < PCI_MAX_RESOURCE; i++) {
 424                resource = &pci_dev->mem_resource[i];
 425                if ((resource->phys_addr == 0) || (resource->len == 0))
 426                        continue;
 427
 428                PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
 429                            i, resource->phys_addr,
 430                            resource->len, resource->addr);
 431
 432                switch (i) {
 433                case RTE_AVP_PCI_MEMMAP_BAR:
 434                        memmap = (struct rte_avp_memmap_info *)resource->addr;
 435                        if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
 436                            (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
 437                                PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
 438                                            memmap->magic, memmap->version);
 439                                return -EINVAL;
 440                        }
 441                        break;
 442
 443                case RTE_AVP_PCI_DEVICE_BAR:
 444                        info = (struct rte_avp_device_info *)resource->addr;
 445                        if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
 446                            avp_dev_version_check(info->version)) {
 447                                PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
 448                                            info->magic, info->version,
 449                                            AVP_DPDK_DRIVER_VERSION);
 450                                return -EINVAL;
 451                        }
 452                        break;
 453
 454                case RTE_AVP_PCI_MEMORY_BAR:
 455                case RTE_AVP_PCI_MMIO_BAR:
 456                        if (resource->addr == NULL) {
 457                                PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
 458                                            i);
 459                                return -EINVAL;
 460                        }
 461                        break;
 462
 463                case RTE_AVP_PCI_MSIX_BAR:
 464                default:
 465                        /* no validation required */
 466                        break;
 467                }
 468        }
 469
 470        return 0;
 471}
 472
 473static int
 474avp_dev_detach(struct rte_eth_dev *eth_dev)
 475{
 476        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 477        int ret;
 478
 479        PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
 480                    eth_dev->data->port_id, avp->device_id);
 481
 482        rte_spinlock_lock(&avp->lock);
 483
 484        if (avp->flags & AVP_F_DETACHED) {
 485                PMD_DRV_LOG(NOTICE, "port %u already detached\n",
 486                            eth_dev->data->port_id);
 487                ret = 0;
 488                goto unlock;
 489        }
 490
 491        /* shutdown the device first so the host stops sending us packets. */
 492        ret = avp_dev_ctrl_shutdown(eth_dev);
 493        if (ret < 0) {
 494                PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
 495                            ret);
 496                avp->flags &= ~AVP_F_DETACHED;
 497                goto unlock;
 498        }
 499
 500        avp->flags |= AVP_F_DETACHED;
 501        rte_wmb();
 502
 503        /* wait for queues to acknowledge the presence of the detach flag */
 504        rte_delay_ms(1);
 505
 506        ret = 0;
 507
 508unlock:
 509        rte_spinlock_unlock(&avp->lock);
 510        return ret;
 511}
 512
 513static void
 514_avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
 515{
 516        struct avp_dev *avp =
 517                AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 518        struct avp_queue *rxq;
 519        uint16_t queue_count;
 520        uint16_t remainder;
 521
 522        rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
 523
 524        /*
 525         * Must map all AVP fifos as evenly as possible between the configured
 526         * device queues.  Each device queue will service a subset of the AVP
 527         * fifos. If there is an odd number of device queues the first set of
 528         * device queues will get the extra AVP fifos.
 529         */
 530        queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
 531        remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
 532        if (rx_queue_id < remainder) {
 533                /* these queues must service one extra FIFO */
 534                rxq->queue_base = rx_queue_id * (queue_count + 1);
 535                rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
 536        } else {
 537                /* these queues service the regular number of FIFO */
 538                rxq->queue_base = ((remainder * (queue_count + 1)) +
 539                                   ((rx_queue_id - remainder) * queue_count));
 540                rxq->queue_limit = rxq->queue_base + queue_count - 1;
 541        }
 542
 543        PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
 544                    rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
 545
 546        rxq->queue_id = rxq->queue_base;
 547}
 548
 549static void
 550_avp_set_queue_counts(struct rte_eth_dev *eth_dev)
 551{
 552        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 553        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 554        struct rte_avp_device_info *host_info;
 555        void *addr;
 556
 557        addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
 558        host_info = (struct rte_avp_device_info *)addr;
 559
 560        /*
 561         * the transmit direction is not negotiated beyond respecting the max
 562         * number of queues because the host can handle arbitrary guest tx
 563         * queues (host rx queues).
 564         */
 565        avp->num_tx_queues = eth_dev->data->nb_tx_queues;
 566
 567        /*
 568         * the receive direction is more restrictive.  The host requires a
 569         * minimum number of guest rx queues (host tx queues) therefore
 570         * negotiate a value that is at least as large as the host minimum
 571         * requirement.  If the host and guest values are not identical then a
 572         * mapping will be established in the receive_queue_setup function.
 573         */
 574        avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
 575                                     eth_dev->data->nb_rx_queues);
 576
 577        PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
 578                    avp->num_tx_queues, avp->num_rx_queues);
 579}
 580
 581static int
 582avp_dev_attach(struct rte_eth_dev *eth_dev)
 583{
 584        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 585        struct rte_avp_device_config config;
 586        unsigned int i;
 587        int ret;
 588
 589        PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
 590                    eth_dev->data->port_id, avp->device_id);
 591
 592        rte_spinlock_lock(&avp->lock);
 593
 594        if (!(avp->flags & AVP_F_DETACHED)) {
 595                PMD_DRV_LOG(NOTICE, "port %u already attached\n",
 596                            eth_dev->data->port_id);
 597                ret = 0;
 598                goto unlock;
 599        }
 600
 601        /*
 602         * make sure that the detached flag is set prior to reconfiguring the
 603         * queues.
 604         */
 605        avp->flags |= AVP_F_DETACHED;
 606        rte_wmb();
 607
 608        /*
 609         * re-run the device create utility which will parse the new host info
 610         * and setup the AVP device queue pointers.
 611         */
 612        ret = avp_dev_create(RTE_ETH_DEV_TO_PCI(eth_dev), eth_dev);
 613        if (ret < 0) {
 614                PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
 615                            ret);
 616                goto unlock;
 617        }
 618
 619        if (avp->flags & AVP_F_CONFIGURED) {
 620                /*
 621                 * Update the receive queue mapping to handle cases where the
 622                 * source and destination hosts have different queue
 623                 * requirements.  As long as the DETACHED flag is asserted the
 624                 * queue table should not be referenced so it should be safe to
 625                 * update it.
 626                 */
 627                _avp_set_queue_counts(eth_dev);
 628                for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
 629                        _avp_set_rx_queue_mappings(eth_dev, i);
 630
 631                /*
 632                 * Update the host with our config details so that it knows the
 633                 * device is active.
 634                 */
 635                memset(&config, 0, sizeof(config));
 636                config.device_id = avp->device_id;
 637                config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
 638                config.driver_version = AVP_DPDK_DRIVER_VERSION;
 639                config.features = avp->features;
 640                config.num_tx_queues = avp->num_tx_queues;
 641                config.num_rx_queues = avp->num_rx_queues;
 642                config.if_up = !!(avp->flags & AVP_F_LINKUP);
 643
 644                ret = avp_dev_ctrl_set_config(eth_dev, &config);
 645                if (ret < 0) {
 646                        PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
 647                                    ret);
 648                        goto unlock;
 649                }
 650        }
 651
 652        rte_wmb();
 653        avp->flags &= ~AVP_F_DETACHED;
 654
 655        ret = 0;
 656
 657unlock:
 658        rte_spinlock_unlock(&avp->lock);
 659        return ret;
 660}
 661
 662static void
 663avp_dev_interrupt_handler(void *data)
 664{
 665        struct rte_eth_dev *eth_dev = data;
 666        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 667        void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
 668        uint32_t status, value;
 669        int ret;
 670
 671        if (registers == NULL)
 672                rte_panic("no mapped MMIO register space\n");
 673
 674        /* read the interrupt status register
 675         * note: this register clears on read so all raised interrupts must be
 676         *    handled or remembered for later processing
 677         */
 678        status = AVP_READ32(
 679                RTE_PTR_ADD(registers,
 680                            RTE_AVP_INTERRUPT_STATUS_OFFSET));
 681
 682        if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
 683                /* handle interrupt based on current status */
 684                value = AVP_READ32(
 685                        RTE_PTR_ADD(registers,
 686                                    RTE_AVP_MIGRATION_STATUS_OFFSET));
 687                switch (value) {
 688                case RTE_AVP_MIGRATION_DETACHED:
 689                        ret = avp_dev_detach(eth_dev);
 690                        break;
 691                case RTE_AVP_MIGRATION_ATTACHED:
 692                        ret = avp_dev_attach(eth_dev);
 693                        break;
 694                default:
 695                        PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
 696                                    value);
 697                        ret = -EINVAL;
 698                }
 699
 700                /* acknowledge the request by writing out our current status */
 701                value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
 702                AVP_WRITE32(value,
 703                            RTE_PTR_ADD(registers,
 704                                        RTE_AVP_MIGRATION_ACK_OFFSET));
 705
 706                PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
 707        }
 708
 709        if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
 710                PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
 711                            status);
 712
 713        /* re-enable UIO interrupt handling */
 714        ret = rte_intr_ack(&pci_dev->intr_handle);
 715        if (ret < 0) {
 716                PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
 717                            ret);
 718                /* continue */
 719        }
 720}
 721
 722static int
 723avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
 724{
 725        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 726        void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
 727        int ret;
 728
 729        if (registers == NULL)
 730                return -EINVAL;
 731
 732        /* enable UIO interrupt handling */
 733        ret = rte_intr_enable(&pci_dev->intr_handle);
 734        if (ret < 0) {
 735                PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
 736                            ret);
 737                return ret;
 738        }
 739
 740        /* inform the device that all interrupts are enabled */
 741        AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
 742                    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
 743
 744        return 0;
 745}
 746
 747static int
 748avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
 749{
 750        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 751        void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
 752        int ret;
 753
 754        if (registers == NULL)
 755                return 0;
 756
 757        /* inform the device that all interrupts are disabled */
 758        AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
 759                    RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
 760
 761        /* enable UIO interrupt handling */
 762        ret = rte_intr_disable(&pci_dev->intr_handle);
 763        if (ret < 0) {
 764                PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
 765                            ret);
 766                return ret;
 767        }
 768
 769        return 0;
 770}
 771
 772static int
 773avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
 774{
 775        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 776        int ret;
 777
 778        /* register a callback handler with UIO for interrupt notifications */
 779        ret = rte_intr_callback_register(&pci_dev->intr_handle,
 780                                         avp_dev_interrupt_handler,
 781                                         (void *)eth_dev);
 782        if (ret < 0) {
 783                PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
 784                            ret);
 785                return ret;
 786        }
 787
 788        /* enable interrupt processing */
 789        return avp_dev_enable_interrupts(eth_dev);
 790}
 791
 792static int
 793avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
 794{
 795        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 796        void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
 797        uint32_t value;
 798
 799        if (registers == NULL)
 800                return 0;
 801
 802        value = AVP_READ32(RTE_PTR_ADD(registers,
 803                                       RTE_AVP_MIGRATION_STATUS_OFFSET));
 804        if (value == RTE_AVP_MIGRATION_DETACHED) {
 805                /* migration is in progress; ack it if we have not already */
 806                AVP_WRITE32(value,
 807                            RTE_PTR_ADD(registers,
 808                                        RTE_AVP_MIGRATION_ACK_OFFSET));
 809                return 1;
 810        }
 811        return 0;
 812}
 813
 814/*
 815 * create a AVP device using the supplied device info by first translating it
 816 * to guest address space(s).
 817 */
 818static int
 819avp_dev_create(struct rte_pci_device *pci_dev,
 820               struct rte_eth_dev *eth_dev)
 821{
 822        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 823        struct rte_avp_device_info *host_info;
 824        struct rte_mem_resource *resource;
 825        unsigned int i;
 826
 827        resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
 828        if (resource->addr == NULL) {
 829                PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
 830                            RTE_AVP_PCI_DEVICE_BAR);
 831                return -EFAULT;
 832        }
 833        host_info = (struct rte_avp_device_info *)resource->addr;
 834
 835        if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
 836                avp_dev_version_check(host_info->version)) {
 837                PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
 838                            host_info->magic, host_info->version,
 839                            AVP_DPDK_DRIVER_VERSION);
 840                return -EINVAL;
 841        }
 842
 843        PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
 844                    RTE_AVP_GET_RELEASE_VERSION(host_info->version),
 845                    RTE_AVP_GET_MAJOR_VERSION(host_info->version),
 846                    RTE_AVP_GET_MINOR_VERSION(host_info->version));
 847
 848        PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
 849                    host_info->min_tx_queues, host_info->max_tx_queues);
 850        PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
 851                    host_info->min_rx_queues, host_info->max_rx_queues);
 852        PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
 853                    host_info->features);
 854
 855        if (avp->magic != AVP_ETHDEV_MAGIC) {
 856                /*
 857                 * First time initialization (i.e., not during a VM
 858                 * migration)
 859                 */
 860                memset(avp, 0, sizeof(*avp));
 861                avp->magic = AVP_ETHDEV_MAGIC;
 862                avp->dev_data = eth_dev->data;
 863                avp->port_id = eth_dev->data->port_id;
 864                avp->host_mbuf_size = host_info->mbuf_size;
 865                avp->host_features = host_info->features;
 866                rte_spinlock_init(&avp->lock);
 867                memcpy(&avp->ethaddr.addr_bytes[0],
 868                       host_info->ethaddr, RTE_ETHER_ADDR_LEN);
 869                /* adjust max values to not exceed our max */
 870                avp->max_tx_queues =
 871                        RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
 872                avp->max_rx_queues =
 873                        RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
 874        } else {
 875                /* Re-attaching during migration */
 876
 877                /* TODO... requires validation of host values */
 878                if ((host_info->features & avp->features) != avp->features) {
 879                        PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
 880                                    avp->features, host_info->features);
 881                        /* this should not be possible; continue for now */
 882                }
 883        }
 884
 885        /* the device id is allowed to change over migrations */
 886        avp->device_id = host_info->device_id;
 887
 888        /* translate incoming host addresses to guest address space */
 889        PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
 890                    host_info->tx_phys);
 891        PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
 892                    host_info->alloc_phys);
 893        for (i = 0; i < avp->max_tx_queues; i++) {
 894                avp->tx_q[i] = avp_dev_translate_address(eth_dev,
 895                        host_info->tx_phys + (i * host_info->tx_size));
 896
 897                avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
 898                        host_info->alloc_phys + (i * host_info->alloc_size));
 899        }
 900
 901        PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
 902                    host_info->rx_phys);
 903        PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
 904                    host_info->free_phys);
 905        for (i = 0; i < avp->max_rx_queues; i++) {
 906                avp->rx_q[i] = avp_dev_translate_address(eth_dev,
 907                        host_info->rx_phys + (i * host_info->rx_size));
 908                avp->free_q[i] = avp_dev_translate_address(eth_dev,
 909                        host_info->free_phys + (i * host_info->free_size));
 910        }
 911
 912        PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
 913                    host_info->req_phys);
 914        PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
 915                    host_info->resp_phys);
 916        PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
 917                    host_info->sync_phys);
 918        PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
 919                    host_info->mbuf_phys);
 920        avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
 921        avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
 922        avp->sync_addr =
 923                avp_dev_translate_address(eth_dev, host_info->sync_phys);
 924        avp->mbuf_addr =
 925                avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
 926
 927        /*
 928         * store the host mbuf virtual address so that we can calculate
 929         * relative offsets for each mbuf as they are processed
 930         */
 931        avp->host_mbuf_addr = host_info->mbuf_va;
 932        avp->host_sync_addr = host_info->sync_va;
 933
 934        /*
 935         * store the maximum packet length that is supported by the host.
 936         */
 937        avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
 938        PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
 939                                host_info->max_rx_pkt_len);
 940
 941        return 0;
 942}
 943
 944/*
 945 * This function is based on probe() function in avp_pci.c
 946 * It returns 0 on success.
 947 */
 948static int
 949eth_avp_dev_init(struct rte_eth_dev *eth_dev)
 950{
 951        struct avp_dev *avp =
 952                AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
 953        struct rte_pci_device *pci_dev;
 954        int ret;
 955
 956        pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 957        eth_dev->dev_ops = &avp_eth_dev_ops;
 958        eth_dev->rx_pkt_burst = &avp_recv_pkts;
 959        eth_dev->tx_pkt_burst = &avp_xmit_pkts;
 960
 961        if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 962                /*
 963                 * no setup required on secondary processes.  All data is saved
 964                 * in dev_private by the primary process. All resource should
 965                 * be mapped to the same virtual address so all pointers should
 966                 * be valid.
 967                 */
 968                if (eth_dev->data->scattered_rx) {
 969                        PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
 970                        eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
 971                        eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
 972                }
 973                return 0;
 974        }
 975
 976        rte_eth_copy_pci_info(eth_dev, pci_dev);
 977        eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
 978
 979        /* Check current migration status */
 980        if (avp_dev_migration_pending(eth_dev)) {
 981                PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
 982                return -EBUSY;
 983        }
 984
 985        /* Check BAR resources */
 986        ret = avp_dev_check_regions(eth_dev);
 987        if (ret < 0) {
 988                PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
 989                            ret);
 990                return ret;
 991        }
 992
 993        /* Enable interrupts */
 994        ret = avp_dev_setup_interrupts(eth_dev);
 995        if (ret < 0) {
 996                PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
 997                return ret;
 998        }
 999
1000        /* Handle each subtype */
1001        ret = avp_dev_create(pci_dev, eth_dev);
1002        if (ret < 0) {
1003                PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
1004                return ret;
1005        }
1006
1007        /* Allocate memory for storing MAC addresses */
1008        eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev",
1009                                        RTE_ETHER_ADDR_LEN, 0);
1010        if (eth_dev->data->mac_addrs == NULL) {
1011                PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
1012                            RTE_ETHER_ADDR_LEN);
1013                return -ENOMEM;
1014        }
1015
1016        /* Get a mac from device config */
1017        rte_ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
1018
1019        return 0;
1020}
1021
1022static int
1023eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
1024{
1025        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1026                return -EPERM;
1027
1028        if (eth_dev->data == NULL)
1029                return 0;
1030
1031        avp_dev_close(eth_dev);
1032
1033        return 0;
1034}
1035
1036static int
1037eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
1038                  struct rte_pci_device *pci_dev)
1039{
1040        return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct avp_adapter),
1041                        eth_avp_dev_init);
1042}
1043
1044static int
1045eth_avp_pci_remove(struct rte_pci_device *pci_dev)
1046{
1047        return rte_eth_dev_pci_generic_remove(pci_dev,
1048                                              eth_avp_dev_uninit);
1049}
1050
1051static struct rte_pci_driver rte_avp_pmd = {
1052        .id_table = pci_id_avp_map,
1053        .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
1054        .probe = eth_avp_pci_probe,
1055        .remove = eth_avp_pci_remove,
1056};
1057
1058static int
1059avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
1060                         struct avp_dev *avp)
1061{
1062        unsigned int max_rx_pkt_len;
1063
1064        max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
1065
1066        if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
1067            (max_rx_pkt_len > avp->host_mbuf_size)) {
1068                /*
1069                 * If the guest MTU is greater than either the host or guest
1070                 * buffers then chained mbufs have to be enabled in the TX
1071                 * direction.  It is assumed that the application will not need
1072                 * to send packets larger than their max_rx_pkt_len (MRU).
1073                 */
1074                return 1;
1075        }
1076
1077        if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
1078            (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
1079                /*
1080                 * If the host MRU is greater than its own mbuf size or the
1081                 * guest mbuf size then chained mbufs have to be enabled in the
1082                 * RX direction.
1083                 */
1084                return 1;
1085        }
1086
1087        return 0;
1088}
1089
1090static int
1091avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
1092                       uint16_t rx_queue_id,
1093                       uint16_t nb_rx_desc,
1094                       unsigned int socket_id,
1095                       const struct rte_eth_rxconf *rx_conf,
1096                       struct rte_mempool *pool)
1097{
1098        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1099        struct rte_pktmbuf_pool_private *mbp_priv;
1100        struct avp_queue *rxq;
1101
1102        if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
1103                PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
1104                            rx_queue_id, eth_dev->data->nb_rx_queues);
1105                return -EINVAL;
1106        }
1107
1108        /* Save mbuf pool pointer */
1109        avp->pool = pool;
1110
1111        /* Save the local mbuf size */
1112        mbp_priv = rte_mempool_get_priv(pool);
1113        avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
1114        avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
1115
1116        if (avp_dev_enable_scattered(eth_dev, avp)) {
1117                if (!eth_dev->data->scattered_rx) {
1118                        PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
1119                        eth_dev->data->scattered_rx = 1;
1120                        eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
1121                        eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
1122                }
1123        }
1124
1125        PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
1126                    avp->max_rx_pkt_len,
1127                    eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
1128                    avp->host_mbuf_size,
1129                    avp->guest_mbuf_size);
1130
1131        /* allocate a queue object */
1132        rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
1133                                 RTE_CACHE_LINE_SIZE, socket_id);
1134        if (rxq == NULL) {
1135                PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
1136                return -ENOMEM;
1137        }
1138
1139        /* save back pointers to AVP and Ethernet devices */
1140        rxq->avp = avp;
1141        rxq->dev_data = eth_dev->data;
1142        eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
1143
1144        /* setup the queue receive mapping for the current queue. */
1145        _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
1146
1147        PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
1148
1149        (void)nb_rx_desc;
1150        (void)rx_conf;
1151        return 0;
1152}
1153
1154static int
1155avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
1156                       uint16_t tx_queue_id,
1157                       uint16_t nb_tx_desc,
1158                       unsigned int socket_id,
1159                       const struct rte_eth_txconf *tx_conf)
1160{
1161        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1162        struct avp_queue *txq;
1163
1164        if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
1165                PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
1166                            tx_queue_id, eth_dev->data->nb_tx_queues);
1167                return -EINVAL;
1168        }
1169
1170        /* allocate a queue object */
1171        txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
1172                                 RTE_CACHE_LINE_SIZE, socket_id);
1173        if (txq == NULL) {
1174                PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
1175                return -ENOMEM;
1176        }
1177
1178        /* only the configured set of transmit queues are used */
1179        txq->queue_id = tx_queue_id;
1180        txq->queue_base = tx_queue_id;
1181        txq->queue_limit = tx_queue_id;
1182
1183        /* save back pointers to AVP and Ethernet devices */
1184        txq->avp = avp;
1185        txq->dev_data = eth_dev->data;
1186        eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
1187
1188        PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
1189
1190        (void)nb_tx_desc;
1191        (void)tx_conf;
1192        return 0;
1193}
1194
1195static inline int
1196_avp_cmp_ether_addr(struct rte_ether_addr *a, struct rte_ether_addr *b)
1197{
1198        uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
1199        uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
1200        return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
1201}
1202
1203static inline int
1204_avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
1205{
1206        struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
1207
1208        if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
1209                /* allow all packets destined to our address */
1210                return 0;
1211        }
1212
1213        if (likely(rte_is_broadcast_ether_addr(&eth->d_addr))) {
1214                /* allow all broadcast packets */
1215                return 0;
1216        }
1217
1218        if (likely(rte_is_multicast_ether_addr(&eth->d_addr))) {
1219                /* allow all multicast packets */
1220                return 0;
1221        }
1222
1223        if (avp->flags & AVP_F_PROMISC) {
1224                /* allow all packets when in promiscuous mode */
1225                return 0;
1226        }
1227
1228        return -1;
1229}
1230
1231#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1232static inline void
1233__avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
1234{
1235        struct rte_avp_desc *first_buf;
1236        struct rte_avp_desc *pkt_buf;
1237        unsigned int pkt_len;
1238        unsigned int nb_segs;
1239        void *pkt_data;
1240        unsigned int i;
1241
1242        first_buf = avp_dev_translate_buffer(avp, buf);
1243
1244        i = 0;
1245        pkt_len = 0;
1246        nb_segs = first_buf->nb_segs;
1247        do {
1248                /* Adjust pointers for guest addressing */
1249                pkt_buf = avp_dev_translate_buffer(avp, buf);
1250                if (pkt_buf == NULL)
1251                        rte_panic("bad buffer: segment %u has an invalid address %p\n",
1252                                  i, buf);
1253                pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1254                if (pkt_data == NULL)
1255                        rte_panic("bad buffer: segment %u has a NULL data pointer\n",
1256                                  i);
1257                if (pkt_buf->data_len == 0)
1258                        rte_panic("bad buffer: segment %u has 0 data length\n",
1259                                  i);
1260                pkt_len += pkt_buf->data_len;
1261                nb_segs--;
1262                i++;
1263
1264        } while (nb_segs && (buf = pkt_buf->next) != NULL);
1265
1266        if (nb_segs != 0)
1267                rte_panic("bad buffer: expected %u segments found %u\n",
1268                          first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
1269        if (pkt_len != first_buf->pkt_len)
1270                rte_panic("bad buffer: expected length %u found %u\n",
1271                          first_buf->pkt_len, pkt_len);
1272}
1273
1274#define avp_dev_buffer_sanity_check(a, b) \
1275        __avp_dev_buffer_sanity_check((a), (b))
1276
1277#else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
1278
1279#define avp_dev_buffer_sanity_check(a, b) do {} while (0)
1280
1281#endif
1282
1283/*
1284 * Copy a host buffer chain to a set of mbufs.  This function assumes that
1285 * there exactly the required number of mbufs to copy all source bytes.
1286 */
1287static inline struct rte_mbuf *
1288avp_dev_copy_from_buffers(struct avp_dev *avp,
1289                          struct rte_avp_desc *buf,
1290                          struct rte_mbuf **mbufs,
1291                          unsigned int count)
1292{
1293        struct rte_mbuf *m_previous = NULL;
1294        struct rte_avp_desc *pkt_buf;
1295        unsigned int total_length = 0;
1296        unsigned int copy_length;
1297        unsigned int src_offset;
1298        struct rte_mbuf *m;
1299        uint16_t ol_flags;
1300        uint16_t vlan_tci;
1301        void *pkt_data;
1302        unsigned int i;
1303
1304        avp_dev_buffer_sanity_check(avp, buf);
1305
1306        /* setup the first source buffer */
1307        pkt_buf = avp_dev_translate_buffer(avp, buf);
1308        pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1309        total_length = pkt_buf->pkt_len;
1310        src_offset = 0;
1311
1312        if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1313                ol_flags = PKT_RX_VLAN;
1314                vlan_tci = pkt_buf->vlan_tci;
1315        } else {
1316                ol_flags = 0;
1317                vlan_tci = 0;
1318        }
1319
1320        for (i = 0; (i < count) && (buf != NULL); i++) {
1321                /* fill each destination buffer */
1322                m = mbufs[i];
1323
1324                if (m_previous != NULL)
1325                        m_previous->next = m;
1326
1327                m_previous = m;
1328
1329                do {
1330                        /*
1331                         * Copy as many source buffers as will fit in the
1332                         * destination buffer.
1333                         */
1334                        copy_length = RTE_MIN((avp->guest_mbuf_size -
1335                                               rte_pktmbuf_data_len(m)),
1336                                              (pkt_buf->data_len -
1337                                               src_offset));
1338                        rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1339                                               rte_pktmbuf_data_len(m)),
1340                                   RTE_PTR_ADD(pkt_data, src_offset),
1341                                   copy_length);
1342                        rte_pktmbuf_data_len(m) += copy_length;
1343                        src_offset += copy_length;
1344
1345                        if (likely(src_offset == pkt_buf->data_len)) {
1346                                /* need a new source buffer */
1347                                buf = pkt_buf->next;
1348                                if (buf != NULL) {
1349                                        pkt_buf = avp_dev_translate_buffer(
1350                                                avp, buf);
1351                                        pkt_data = avp_dev_translate_buffer(
1352                                                avp, pkt_buf->data);
1353                                        src_offset = 0;
1354                                }
1355                        }
1356
1357                        if (unlikely(rte_pktmbuf_data_len(m) ==
1358                                     avp->guest_mbuf_size)) {
1359                                /* need a new destination mbuf */
1360                                break;
1361                        }
1362
1363                } while (buf != NULL);
1364        }
1365
1366        m = mbufs[0];
1367        m->ol_flags = ol_flags;
1368        m->nb_segs = count;
1369        rte_pktmbuf_pkt_len(m) = total_length;
1370        m->vlan_tci = vlan_tci;
1371
1372        __rte_mbuf_sanity_check(m, 1);
1373
1374        return m;
1375}
1376
1377static uint16_t
1378avp_recv_scattered_pkts(void *rx_queue,
1379                        struct rte_mbuf **rx_pkts,
1380                        uint16_t nb_pkts)
1381{
1382        struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1383        struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1384        struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
1385        struct avp_dev *avp = rxq->avp;
1386        struct rte_avp_desc *pkt_buf;
1387        struct rte_avp_fifo *free_q;
1388        struct rte_avp_fifo *rx_q;
1389        struct rte_avp_desc *buf;
1390        unsigned int count, avail, n;
1391        unsigned int guest_mbuf_size;
1392        struct rte_mbuf *m;
1393        unsigned int required;
1394        unsigned int buf_len;
1395        unsigned int port_id;
1396        unsigned int i;
1397
1398        if (unlikely(avp->flags & AVP_F_DETACHED)) {
1399                /* VM live migration in progress */
1400                return 0;
1401        }
1402
1403        guest_mbuf_size = avp->guest_mbuf_size;
1404        port_id = avp->port_id;
1405        rx_q = avp->rx_q[rxq->queue_id];
1406        free_q = avp->free_q[rxq->queue_id];
1407
1408        /* setup next queue to service */
1409        rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1410                (rxq->queue_id + 1) : rxq->queue_base;
1411
1412        /* determine how many slots are available in the free queue */
1413        count = avp_fifo_free_count(free_q);
1414
1415        /* determine how many packets are available in the rx queue */
1416        avail = avp_fifo_count(rx_q);
1417
1418        /* determine how many packets can be received */
1419        count = RTE_MIN(count, avail);
1420        count = RTE_MIN(count, nb_pkts);
1421        count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1422
1423        if (unlikely(count == 0)) {
1424                /* no free buffers, or no buffers on the rx queue */
1425                return 0;
1426        }
1427
1428        /* retrieve pending packets */
1429        n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1430        PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1431                   count, rx_q);
1432
1433        count = 0;
1434        for (i = 0; i < n; i++) {
1435                /* prefetch next entry while processing current one */
1436                if (i + 1 < n) {
1437                        pkt_buf = avp_dev_translate_buffer(avp,
1438                                                           avp_bufs[i + 1]);
1439                        rte_prefetch0(pkt_buf);
1440                }
1441                buf = avp_bufs[i];
1442
1443                /* Peek into the first buffer to determine the total length */
1444                pkt_buf = avp_dev_translate_buffer(avp, buf);
1445                buf_len = pkt_buf->pkt_len;
1446
1447                /* Allocate enough mbufs to receive the entire packet */
1448                required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
1449                if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
1450                        rxq->dev_data->rx_mbuf_alloc_failed++;
1451                        continue;
1452                }
1453
1454                /* Copy the data from the buffers to our mbufs */
1455                m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
1456
1457                /* finalize mbuf */
1458                m->port = port_id;
1459
1460                if (_avp_mac_filter(avp, m) != 0) {
1461                        /* silently discard packets not destined to our MAC */
1462                        rte_pktmbuf_free(m);
1463                        continue;
1464                }
1465
1466                /* return new mbuf to caller */
1467                rx_pkts[count++] = m;
1468                rxq->bytes += buf_len;
1469        }
1470
1471        rxq->packets += count;
1472
1473        /* return the buffers to the free queue */
1474        avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1475
1476        return count;
1477}
1478
1479
1480static uint16_t
1481avp_recv_pkts(void *rx_queue,
1482              struct rte_mbuf **rx_pkts,
1483              uint16_t nb_pkts)
1484{
1485        struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1486        struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
1487        struct avp_dev *avp = rxq->avp;
1488        struct rte_avp_desc *pkt_buf;
1489        struct rte_avp_fifo *free_q;
1490        struct rte_avp_fifo *rx_q;
1491        unsigned int count, avail, n;
1492        unsigned int pkt_len;
1493        struct rte_mbuf *m;
1494        char *pkt_data;
1495        unsigned int i;
1496
1497        if (unlikely(avp->flags & AVP_F_DETACHED)) {
1498                /* VM live migration in progress */
1499                return 0;
1500        }
1501
1502        rx_q = avp->rx_q[rxq->queue_id];
1503        free_q = avp->free_q[rxq->queue_id];
1504
1505        /* setup next queue to service */
1506        rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
1507                (rxq->queue_id + 1) : rxq->queue_base;
1508
1509        /* determine how many slots are available in the free queue */
1510        count = avp_fifo_free_count(free_q);
1511
1512        /* determine how many packets are available in the rx queue */
1513        avail = avp_fifo_count(rx_q);
1514
1515        /* determine how many packets can be received */
1516        count = RTE_MIN(count, avail);
1517        count = RTE_MIN(count, nb_pkts);
1518        count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
1519
1520        if (unlikely(count == 0)) {
1521                /* no free buffers, or no buffers on the rx queue */
1522                return 0;
1523        }
1524
1525        /* retrieve pending packets */
1526        n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
1527        PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
1528                   count, rx_q);
1529
1530        count = 0;
1531        for (i = 0; i < n; i++) {
1532                /* prefetch next entry while processing current one */
1533                if (i < n - 1) {
1534                        pkt_buf = avp_dev_translate_buffer(avp,
1535                                                           avp_bufs[i + 1]);
1536                        rte_prefetch0(pkt_buf);
1537                }
1538
1539                /* Adjust host pointers for guest addressing */
1540                pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1541                pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1542                pkt_len = pkt_buf->pkt_len;
1543
1544                if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1545                             (pkt_buf->nb_segs > 1))) {
1546                        /*
1547                         * application should be using the scattered receive
1548                         * function
1549                         */
1550                        rxq->errors++;
1551                        continue;
1552                }
1553
1554                /* process each packet to be transmitted */
1555                m = rte_pktmbuf_alloc(avp->pool);
1556                if (unlikely(m == NULL)) {
1557                        rxq->dev_data->rx_mbuf_alloc_failed++;
1558                        continue;
1559                }
1560
1561                /* copy data out of the host buffer to our buffer */
1562                m->data_off = RTE_PKTMBUF_HEADROOM;
1563                rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
1564
1565                /* initialize the local mbuf */
1566                rte_pktmbuf_data_len(m) = pkt_len;
1567                rte_pktmbuf_pkt_len(m) = pkt_len;
1568                m->port = avp->port_id;
1569
1570                if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
1571                        m->ol_flags = PKT_RX_VLAN;
1572                        m->vlan_tci = pkt_buf->vlan_tci;
1573                }
1574
1575                if (_avp_mac_filter(avp, m) != 0) {
1576                        /* silently discard packets not destined to our MAC */
1577                        rte_pktmbuf_free(m);
1578                        continue;
1579                }
1580
1581                /* return new mbuf to caller */
1582                rx_pkts[count++] = m;
1583                rxq->bytes += pkt_len;
1584        }
1585
1586        rxq->packets += count;
1587
1588        /* return the buffers to the free queue */
1589        avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
1590
1591        return count;
1592}
1593
1594/*
1595 * Copy a chained mbuf to a set of host buffers.  This function assumes that
1596 * there are sufficient destination buffers to contain the entire source
1597 * packet.
1598 */
1599static inline uint16_t
1600avp_dev_copy_to_buffers(struct avp_dev *avp,
1601                        struct rte_mbuf *mbuf,
1602                        struct rte_avp_desc **buffers,
1603                        unsigned int count)
1604{
1605        struct rte_avp_desc *previous_buf = NULL;
1606        struct rte_avp_desc *first_buf = NULL;
1607        struct rte_avp_desc *pkt_buf;
1608        struct rte_avp_desc *buf;
1609        size_t total_length;
1610        struct rte_mbuf *m;
1611        size_t copy_length;
1612        size_t src_offset;
1613        char *pkt_data;
1614        unsigned int i;
1615
1616        __rte_mbuf_sanity_check(mbuf, 1);
1617
1618        m = mbuf;
1619        src_offset = 0;
1620        total_length = rte_pktmbuf_pkt_len(m);
1621        for (i = 0; (i < count) && (m != NULL); i++) {
1622                /* fill each destination buffer */
1623                buf = buffers[i];
1624
1625                if (i < count - 1) {
1626                        /* prefetch next entry while processing this one */
1627                        pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
1628                        rte_prefetch0(pkt_buf);
1629                }
1630
1631                /* Adjust pointers for guest addressing */
1632                pkt_buf = avp_dev_translate_buffer(avp, buf);
1633                pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1634
1635                /* setup the buffer chain */
1636                if (previous_buf != NULL)
1637                        previous_buf->next = buf;
1638                else
1639                        first_buf = pkt_buf;
1640
1641                previous_buf = pkt_buf;
1642
1643                do {
1644                        /*
1645                         * copy as many source mbuf segments as will fit in the
1646                         * destination buffer.
1647                         */
1648                        copy_length = RTE_MIN((avp->host_mbuf_size -
1649                                               pkt_buf->data_len),
1650                                              (rte_pktmbuf_data_len(m) -
1651                                               src_offset));
1652                        rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
1653                                   RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
1654                                               src_offset),
1655                                   copy_length);
1656                        pkt_buf->data_len += copy_length;
1657                        src_offset += copy_length;
1658
1659                        if (likely(src_offset == rte_pktmbuf_data_len(m))) {
1660                                /* need a new source buffer */
1661                                m = m->next;
1662                                src_offset = 0;
1663                        }
1664
1665                        if (unlikely(pkt_buf->data_len ==
1666                                     avp->host_mbuf_size)) {
1667                                /* need a new destination buffer */
1668                                break;
1669                        }
1670
1671                } while (m != NULL);
1672        }
1673
1674        first_buf->nb_segs = count;
1675        first_buf->pkt_len = total_length;
1676
1677        if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
1678                first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1679                first_buf->vlan_tci = mbuf->vlan_tci;
1680        }
1681
1682        avp_dev_buffer_sanity_check(avp, buffers[0]);
1683
1684        return total_length;
1685}
1686
1687
1688static uint16_t
1689avp_xmit_scattered_pkts(void *tx_queue,
1690                        struct rte_mbuf **tx_pkts,
1691                        uint16_t nb_pkts)
1692{
1693        struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
1694                                       RTE_AVP_MAX_MBUF_SEGMENTS)] = {};
1695        struct avp_queue *txq = (struct avp_queue *)tx_queue;
1696        struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
1697        struct avp_dev *avp = txq->avp;
1698        struct rte_avp_fifo *alloc_q;
1699        struct rte_avp_fifo *tx_q;
1700        unsigned int count, avail, n;
1701        unsigned int orig_nb_pkts;
1702        struct rte_mbuf *m;
1703        unsigned int required;
1704        unsigned int segments;
1705        unsigned int tx_bytes;
1706        unsigned int i;
1707
1708        orig_nb_pkts = nb_pkts;
1709        if (unlikely(avp->flags & AVP_F_DETACHED)) {
1710                /* VM live migration in progress */
1711                /* TODO ... buffer for X packets then drop? */
1712                txq->errors += nb_pkts;
1713                return 0;
1714        }
1715
1716        tx_q = avp->tx_q[txq->queue_id];
1717        alloc_q = avp->alloc_q[txq->queue_id];
1718
1719        /* limit the number of transmitted packets to the max burst size */
1720        if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1721                nb_pkts = AVP_MAX_TX_BURST;
1722
1723        /* determine how many buffers are available to copy into */
1724        avail = avp_fifo_count(alloc_q);
1725        if (unlikely(avail > (AVP_MAX_TX_BURST *
1726                              RTE_AVP_MAX_MBUF_SEGMENTS)))
1727                avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
1728
1729        /* determine how many slots are available in the transmit queue */
1730        count = avp_fifo_free_count(tx_q);
1731
1732        /* determine how many packets can be sent */
1733        nb_pkts = RTE_MIN(count, nb_pkts);
1734
1735        /* determine how many packets will fit in the available buffers */
1736        count = 0;
1737        segments = 0;
1738        for (i = 0; i < nb_pkts; i++) {
1739                m = tx_pkts[i];
1740                if (likely(i < (unsigned int)nb_pkts - 1)) {
1741                        /* prefetch next entry while processing this one */
1742                        rte_prefetch0(tx_pkts[i + 1]);
1743                }
1744                required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1745                        avp->host_mbuf_size;
1746
1747                if (unlikely((required == 0) ||
1748                             (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
1749                        break;
1750                else if (unlikely(required + segments > avail))
1751                        break;
1752                segments += required;
1753                count++;
1754        }
1755        nb_pkts = count;
1756
1757        if (unlikely(nb_pkts == 0)) {
1758                /* no available buffers, or no space on the tx queue */
1759                txq->errors += orig_nb_pkts;
1760                return 0;
1761        }
1762
1763        PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1764                   nb_pkts, tx_q);
1765
1766        /* retrieve sufficient send buffers */
1767        n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
1768        if (unlikely(n != segments)) {
1769                PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
1770                           "n=%u, segments=%u, orig=%u\n",
1771                           n, segments, orig_nb_pkts);
1772                txq->errors += orig_nb_pkts;
1773                return 0;
1774        }
1775
1776        tx_bytes = 0;
1777        count = 0;
1778        for (i = 0; i < nb_pkts; i++) {
1779                /* process each packet to be transmitted */
1780                m = tx_pkts[i];
1781
1782                /* determine how many buffers are required for this packet */
1783                required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
1784                        avp->host_mbuf_size;
1785
1786                tx_bytes += avp_dev_copy_to_buffers(avp, m,
1787                                                    &avp_bufs[count], required);
1788                tx_bufs[i] = avp_bufs[count];
1789                count += required;
1790
1791                /* free the original mbuf */
1792                rte_pktmbuf_free(m);
1793        }
1794
1795        txq->packets += nb_pkts;
1796        txq->bytes += tx_bytes;
1797
1798#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
1799        for (i = 0; i < nb_pkts; i++)
1800                avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
1801#endif
1802
1803        /* send the packets */
1804        n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
1805        if (unlikely(n != orig_nb_pkts))
1806                txq->errors += (orig_nb_pkts - n);
1807
1808        return n;
1809}
1810
1811
1812static uint16_t
1813avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1814{
1815        struct avp_queue *txq = (struct avp_queue *)tx_queue;
1816        struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
1817        struct avp_dev *avp = txq->avp;
1818        struct rte_avp_desc *pkt_buf;
1819        struct rte_avp_fifo *alloc_q;
1820        struct rte_avp_fifo *tx_q;
1821        unsigned int count, avail, n;
1822        struct rte_mbuf *m;
1823        unsigned int pkt_len;
1824        unsigned int tx_bytes;
1825        char *pkt_data;
1826        unsigned int i;
1827
1828        if (unlikely(avp->flags & AVP_F_DETACHED)) {
1829                /* VM live migration in progress */
1830                /* TODO ... buffer for X packets then drop?! */
1831                txq->errors++;
1832                return 0;
1833        }
1834
1835        tx_q = avp->tx_q[txq->queue_id];
1836        alloc_q = avp->alloc_q[txq->queue_id];
1837
1838        /* limit the number of transmitted packets to the max burst size */
1839        if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
1840                nb_pkts = AVP_MAX_TX_BURST;
1841
1842        /* determine how many buffers are available to copy into */
1843        avail = avp_fifo_count(alloc_q);
1844
1845        /* determine how many slots are available in the transmit queue */
1846        count = avp_fifo_free_count(tx_q);
1847
1848        /* determine how many packets can be sent */
1849        count = RTE_MIN(count, avail);
1850        count = RTE_MIN(count, nb_pkts);
1851
1852        if (unlikely(count == 0)) {
1853                /* no available buffers, or no space on the tx queue */
1854                txq->errors += nb_pkts;
1855                return 0;
1856        }
1857
1858        PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
1859                   count, tx_q);
1860
1861        /* retrieve sufficient send buffers */
1862        n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
1863        if (unlikely(n != count)) {
1864                txq->errors++;
1865                return 0;
1866        }
1867
1868        tx_bytes = 0;
1869        for (i = 0; i < count; i++) {
1870                /* prefetch next entry while processing the current one */
1871                if (i < count - 1) {
1872                        pkt_buf = avp_dev_translate_buffer(avp,
1873                                                           avp_bufs[i + 1]);
1874                        rte_prefetch0(pkt_buf);
1875                }
1876
1877                /* process each packet to be transmitted */
1878                m = tx_pkts[i];
1879
1880                /* Adjust pointers for guest addressing */
1881                pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
1882                pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
1883                pkt_len = rte_pktmbuf_pkt_len(m);
1884
1885                if (unlikely((pkt_len > avp->guest_mbuf_size) ||
1886                                         (pkt_len > avp->host_mbuf_size))) {
1887                        /*
1888                         * application should be using the scattered transmit
1889                         * function; send it truncated to avoid the performance
1890                         * hit of having to manage returning the already
1891                         * allocated buffer to the free list.  This should not
1892                         * happen since the application should have set the
1893                         * max_rx_pkt_len based on its MTU and it should be
1894                         * policing its own packet sizes.
1895                         */
1896                        txq->errors++;
1897                        pkt_len = RTE_MIN(avp->guest_mbuf_size,
1898                                          avp->host_mbuf_size);
1899                }
1900
1901                /* copy data out of our mbuf and into the AVP buffer */
1902                rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
1903                pkt_buf->pkt_len = pkt_len;
1904                pkt_buf->data_len = pkt_len;
1905                pkt_buf->nb_segs = 1;
1906                pkt_buf->next = NULL;
1907
1908                if (m->ol_flags & PKT_TX_VLAN_PKT) {
1909                        pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
1910                        pkt_buf->vlan_tci = m->vlan_tci;
1911                }
1912
1913                tx_bytes += pkt_len;
1914
1915                /* free the original mbuf */
1916                rte_pktmbuf_free(m);
1917        }
1918
1919        txq->packets += count;
1920        txq->bytes += tx_bytes;
1921
1922        /* send the packets */
1923        n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
1924
1925        return n;
1926}
1927
1928static void
1929avp_dev_rx_queue_release(void *rx_queue)
1930{
1931        struct avp_queue *rxq = (struct avp_queue *)rx_queue;
1932        struct avp_dev *avp = rxq->avp;
1933        struct rte_eth_dev_data *data = avp->dev_data;
1934        unsigned int i;
1935
1936        for (i = 0; i < avp->num_rx_queues; i++) {
1937                if (data->rx_queues[i] == rxq) {
1938                        rte_free(data->rx_queues[i]);
1939                        data->rx_queues[i] = NULL;
1940                }
1941        }
1942}
1943
1944static void
1945avp_dev_rx_queue_release_all(struct rte_eth_dev *eth_dev)
1946{
1947        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1948        struct rte_eth_dev_data *data = avp->dev_data;
1949        unsigned int i;
1950
1951        for (i = 0; i < avp->num_rx_queues; i++) {
1952                if (data->rx_queues[i]) {
1953                        rte_free(data->rx_queues[i]);
1954                        data->rx_queues[i] = NULL;
1955                }
1956        }
1957}
1958
1959static void
1960avp_dev_tx_queue_release(void *tx_queue)
1961{
1962        struct avp_queue *txq = (struct avp_queue *)tx_queue;
1963        struct avp_dev *avp = txq->avp;
1964        struct rte_eth_dev_data *data = avp->dev_data;
1965        unsigned int i;
1966
1967        for (i = 0; i < avp->num_tx_queues; i++) {
1968                if (data->tx_queues[i] == txq) {
1969                        rte_free(data->tx_queues[i]);
1970                        data->tx_queues[i] = NULL;
1971                }
1972        }
1973}
1974
1975static void
1976avp_dev_tx_queue_release_all(struct rte_eth_dev *eth_dev)
1977{
1978        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1979        struct rte_eth_dev_data *data = avp->dev_data;
1980        unsigned int i;
1981
1982        for (i = 0; i < avp->num_tx_queues; i++) {
1983                if (data->tx_queues[i]) {
1984                        rte_free(data->tx_queues[i]);
1985                        data->tx_queues[i] = NULL;
1986                }
1987        }
1988}
1989
1990static int
1991avp_dev_configure(struct rte_eth_dev *eth_dev)
1992{
1993        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
1994        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
1995        struct rte_avp_device_info *host_info;
1996        struct rte_avp_device_config config;
1997        int mask = 0;
1998        void *addr;
1999        int ret;
2000
2001        rte_spinlock_lock(&avp->lock);
2002        if (avp->flags & AVP_F_DETACHED) {
2003                PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2004                ret = -ENOTSUP;
2005                goto unlock;
2006        }
2007
2008        addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
2009        host_info = (struct rte_avp_device_info *)addr;
2010
2011        /* Setup required number of queues */
2012        _avp_set_queue_counts(eth_dev);
2013
2014        mask = (ETH_VLAN_STRIP_MASK |
2015                ETH_VLAN_FILTER_MASK |
2016                ETH_VLAN_EXTEND_MASK);
2017        ret = avp_vlan_offload_set(eth_dev, mask);
2018        if (ret < 0) {
2019                PMD_DRV_LOG(ERR, "VLAN offload set failed by host, ret=%d\n",
2020                            ret);
2021                goto unlock;
2022        }
2023
2024        /* update device config */
2025        memset(&config, 0, sizeof(config));
2026        config.device_id = host_info->device_id;
2027        config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
2028        config.driver_version = AVP_DPDK_DRIVER_VERSION;
2029        config.features = avp->features;
2030        config.num_tx_queues = avp->num_tx_queues;
2031        config.num_rx_queues = avp->num_rx_queues;
2032
2033        ret = avp_dev_ctrl_set_config(eth_dev, &config);
2034        if (ret < 0) {
2035                PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
2036                            ret);
2037                goto unlock;
2038        }
2039
2040        avp->flags |= AVP_F_CONFIGURED;
2041        ret = 0;
2042
2043unlock:
2044        rte_spinlock_unlock(&avp->lock);
2045        return ret;
2046}
2047
2048static int
2049avp_dev_start(struct rte_eth_dev *eth_dev)
2050{
2051        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2052        int ret;
2053
2054        rte_spinlock_lock(&avp->lock);
2055        if (avp->flags & AVP_F_DETACHED) {
2056                PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2057                ret = -ENOTSUP;
2058                goto unlock;
2059        }
2060
2061        /* update link state */
2062        ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
2063        if (ret < 0) {
2064                PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2065                            ret);
2066                goto unlock;
2067        }
2068
2069        /* remember current link state */
2070        avp->flags |= AVP_F_LINKUP;
2071
2072        ret = 0;
2073
2074unlock:
2075        rte_spinlock_unlock(&avp->lock);
2076        return ret;
2077}
2078
2079static int
2080avp_dev_stop(struct rte_eth_dev *eth_dev)
2081{
2082        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2083        int ret;
2084
2085        rte_spinlock_lock(&avp->lock);
2086        if (avp->flags & AVP_F_DETACHED) {
2087                PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2088                ret = -ENOTSUP;
2089                goto unlock;
2090        }
2091
2092        /* remember current link state */
2093        avp->flags &= ~AVP_F_LINKUP;
2094
2095        /* update link state */
2096        ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
2097        if (ret < 0) {
2098                PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
2099                            ret);
2100        }
2101
2102unlock:
2103        rte_spinlock_unlock(&avp->lock);
2104        return ret;
2105}
2106
2107static int
2108avp_dev_close(struct rte_eth_dev *eth_dev)
2109{
2110        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2111        int ret;
2112
2113        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2114                return 0;
2115
2116        rte_spinlock_lock(&avp->lock);
2117        if (avp->flags & AVP_F_DETACHED) {
2118                PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
2119                goto unlock;
2120        }
2121
2122        /* remember current link state */
2123        avp->flags &= ~AVP_F_LINKUP;
2124        avp->flags &= ~AVP_F_CONFIGURED;
2125
2126        ret = avp_dev_disable_interrupts(eth_dev);
2127        if (ret < 0) {
2128                PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
2129                /* continue */
2130        }
2131
2132        /* update device state */
2133        ret = avp_dev_ctrl_shutdown(eth_dev);
2134        if (ret < 0) {
2135                PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
2136                            ret);
2137                /* continue */
2138        }
2139
2140        /* release dynamic storage for rx/tx queues */
2141        avp_dev_rx_queue_release_all(eth_dev);
2142        avp_dev_tx_queue_release_all(eth_dev);
2143
2144unlock:
2145        rte_spinlock_unlock(&avp->lock);
2146        return 0;
2147}
2148
2149static int
2150avp_dev_link_update(struct rte_eth_dev *eth_dev,
2151                                        __rte_unused int wait_to_complete)
2152{
2153        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2154        struct rte_eth_link *link = &eth_dev->data->dev_link;
2155
2156        link->link_speed = ETH_SPEED_NUM_10G;
2157        link->link_duplex = ETH_LINK_FULL_DUPLEX;
2158        link->link_status = !!(avp->flags & AVP_F_LINKUP);
2159
2160        return -1;
2161}
2162
2163static int
2164avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2165{
2166        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2167
2168        rte_spinlock_lock(&avp->lock);
2169        if ((avp->flags & AVP_F_PROMISC) == 0) {
2170                avp->flags |= AVP_F_PROMISC;
2171                PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
2172                            eth_dev->data->port_id);
2173        }
2174        rte_spinlock_unlock(&avp->lock);
2175
2176        return 0;
2177}
2178
2179static int
2180avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
2181{
2182        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2183
2184        rte_spinlock_lock(&avp->lock);
2185        if ((avp->flags & AVP_F_PROMISC) != 0) {
2186                avp->flags &= ~AVP_F_PROMISC;
2187                PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
2188                            eth_dev->data->port_id);
2189        }
2190        rte_spinlock_unlock(&avp->lock);
2191
2192        return 0;
2193}
2194
2195static int
2196avp_dev_info_get(struct rte_eth_dev *eth_dev,
2197                 struct rte_eth_dev_info *dev_info)
2198{
2199        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2200
2201        dev_info->max_rx_queues = avp->max_rx_queues;
2202        dev_info->max_tx_queues = avp->max_tx_queues;
2203        dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
2204        dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
2205        dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
2206        if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2207                dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
2208                dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
2209        }
2210
2211        return 0;
2212}
2213
2214static int
2215avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
2216{
2217        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2218        struct rte_eth_conf *dev_conf = &eth_dev->data->dev_conf;
2219        uint64_t offloads = dev_conf->rxmode.offloads;
2220
2221        if (mask & ETH_VLAN_STRIP_MASK) {
2222                if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
2223                        if (offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
2224                                avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
2225                        else
2226                                avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
2227                } else {
2228                        PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
2229                }
2230        }
2231
2232        if (mask & ETH_VLAN_FILTER_MASK) {
2233                if (offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
2234                        PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
2235        }
2236
2237        if (mask & ETH_VLAN_EXTEND_MASK) {
2238                if (offloads & DEV_RX_OFFLOAD_VLAN_EXTEND)
2239                        PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
2240        }
2241
2242        return 0;
2243}
2244
2245static int
2246avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
2247{
2248        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2249        unsigned int i;
2250
2251        for (i = 0; i < avp->num_rx_queues; i++) {
2252                struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2253
2254                if (rxq) {
2255                        stats->ipackets += rxq->packets;
2256                        stats->ibytes += rxq->bytes;
2257                        stats->ierrors += rxq->errors;
2258
2259                        stats->q_ipackets[i] += rxq->packets;
2260                        stats->q_ibytes[i] += rxq->bytes;
2261                        stats->q_errors[i] += rxq->errors;
2262                }
2263        }
2264
2265        for (i = 0; i < avp->num_tx_queues; i++) {
2266                struct avp_queue *txq = avp->dev_data->tx_queues[i];
2267
2268                if (txq) {
2269                        stats->opackets += txq->packets;
2270                        stats->obytes += txq->bytes;
2271                        stats->oerrors += txq->errors;
2272
2273                        stats->q_opackets[i] += txq->packets;
2274                        stats->q_obytes[i] += txq->bytes;
2275                }
2276        }
2277
2278        return 0;
2279}
2280
2281static int
2282avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
2283{
2284        struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
2285        unsigned int i;
2286
2287        for (i = 0; i < avp->num_rx_queues; i++) {
2288                struct avp_queue *rxq = avp->dev_data->rx_queues[i];
2289
2290                if (rxq) {
2291                        rxq->bytes = 0;
2292                        rxq->packets = 0;
2293                        rxq->errors = 0;
2294                }
2295        }
2296
2297        for (i = 0; i < avp->num_tx_queues; i++) {
2298                struct avp_queue *txq = avp->dev_data->tx_queues[i];
2299
2300                if (txq) {
2301                        txq->bytes = 0;
2302                        txq->packets = 0;
2303                        txq->errors = 0;
2304                }
2305        }
2306
2307        return 0;
2308}
2309
2310RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
2311RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
2312RTE_LOG_REGISTER_SUFFIX(avp_logtype_driver, driver, NOTICE);
2313