dpdk/drivers/net/memif/rte_eth_memif.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2018-2019 Cisco Systems, Inc.  All rights reserved.
   3 */
   4
   5#include <stdlib.h>
   6#include <fcntl.h>
   7#include <unistd.h>
   8#include <sys/types.h>
   9#include <sys/socket.h>
  10#include <sys/un.h>
  11#include <sys/ioctl.h>
  12#include <sys/mman.h>
  13#include <linux/if_ether.h>
  14#include <errno.h>
  15#include <sys/eventfd.h>
  16
  17#include <rte_version.h>
  18#include <rte_mbuf.h>
  19#include <rte_ether.h>
  20#include <ethdev_driver.h>
  21#include <ethdev_vdev.h>
  22#include <rte_malloc.h>
  23#include <rte_kvargs.h>
  24#include <rte_bus_vdev.h>
  25#include <rte_string_fns.h>
  26#include <rte_errno.h>
  27#include <rte_memory.h>
  28#include <rte_memzone.h>
  29#include <rte_eal_memconfig.h>
  30
  31#include "rte_eth_memif.h"
  32#include "memif_socket.h"
  33
  34#define ETH_MEMIF_ID_ARG                "id"
  35#define ETH_MEMIF_ROLE_ARG              "role"
  36#define ETH_MEMIF_PKT_BUFFER_SIZE_ARG   "bsize"
  37#define ETH_MEMIF_RING_SIZE_ARG         "rsize"
  38#define ETH_MEMIF_SOCKET_ARG            "socket"
  39#define ETH_MEMIF_SOCKET_ABSTRACT_ARG   "socket-abstract"
  40#define ETH_MEMIF_MAC_ARG               "mac"
  41#define ETH_MEMIF_ZC_ARG                "zero-copy"
  42#define ETH_MEMIF_SECRET_ARG            "secret"
  43
  44static const char * const valid_arguments[] = {
  45        ETH_MEMIF_ID_ARG,
  46        ETH_MEMIF_ROLE_ARG,
  47        ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
  48        ETH_MEMIF_RING_SIZE_ARG,
  49        ETH_MEMIF_SOCKET_ARG,
  50        ETH_MEMIF_SOCKET_ABSTRACT_ARG,
  51        ETH_MEMIF_MAC_ARG,
  52        ETH_MEMIF_ZC_ARG,
  53        ETH_MEMIF_SECRET_ARG,
  54        NULL
  55};
  56
  57static const struct rte_eth_link pmd_link = {
  58        .link_speed = ETH_SPEED_NUM_10G,
  59        .link_duplex = ETH_LINK_FULL_DUPLEX,
  60        .link_status = ETH_LINK_DOWN,
  61        .link_autoneg = ETH_LINK_AUTONEG
  62};
  63
  64#define MEMIF_MP_SEND_REGION            "memif_mp_send_region"
  65
  66
  67static int memif_region_init_zc(const struct rte_memseg_list *msl,
  68                                const struct rte_memseg *ms, void *arg);
  69
  70const char *
  71memif_version(void)
  72{
  73        return ("memif-" RTE_STR(MEMIF_VERSION_MAJOR) "." RTE_STR(MEMIF_VERSION_MINOR));
  74}
  75
  76/* Message header to synchronize regions */
  77struct mp_region_msg {
  78        char port_name[RTE_DEV_NAME_MAX_LEN];
  79        memif_region_index_t idx;
  80        memif_region_size_t size;
  81};
  82
  83static int
  84memif_mp_send_region(const struct rte_mp_msg *msg, const void *peer)
  85{
  86        struct rte_eth_dev *dev;
  87        struct pmd_process_private *proc_private;
  88        const struct mp_region_msg *msg_param = (const struct mp_region_msg *)msg->param;
  89        struct rte_mp_msg reply;
  90        struct mp_region_msg *reply_param = (struct mp_region_msg *)reply.param;
  91        uint16_t port_id;
  92        int ret;
  93
  94        /* Get requested port */
  95        ret = rte_eth_dev_get_port_by_name(msg_param->port_name, &port_id);
  96        if (ret) {
  97                MIF_LOG(ERR, "Failed to get port id for %s",
  98                        msg_param->port_name);
  99                return -1;
 100        }
 101        dev = &rte_eth_devices[port_id];
 102        proc_private = dev->process_private;
 103
 104        memset(&reply, 0, sizeof(reply));
 105        strlcpy(reply.name, msg->name, sizeof(reply.name));
 106        reply_param->idx = msg_param->idx;
 107        if (proc_private->regions[msg_param->idx] != NULL) {
 108                reply_param->size = proc_private->regions[msg_param->idx]->region_size;
 109                reply.fds[0] = proc_private->regions[msg_param->idx]->fd;
 110                reply.num_fds = 1;
 111        }
 112        reply.len_param = sizeof(*reply_param);
 113        if (rte_mp_reply(&reply, peer) < 0) {
 114                MIF_LOG(ERR, "Failed to reply to an add region request");
 115                return -1;
 116        }
 117
 118        return 0;
 119}
 120
 121/*
 122 * Request regions
 123 * Called by secondary process, when ports link status goes up.
 124 */
 125static int
 126memif_mp_request_regions(struct rte_eth_dev *dev)
 127{
 128        int ret, i;
 129        struct timespec timeout = {.tv_sec = 5, .tv_nsec = 0};
 130        struct rte_mp_msg msg, *reply;
 131        struct rte_mp_reply replies;
 132        struct mp_region_msg *msg_param = (struct mp_region_msg *)msg.param;
 133        struct mp_region_msg *reply_param;
 134        struct memif_region *r;
 135        struct pmd_process_private *proc_private = dev->process_private;
 136        struct pmd_internals *pmd = dev->data->dev_private;
 137        /* in case of zero-copy client, only request region 0 */
 138        uint16_t max_region_num = (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) ?
 139                                   1 : ETH_MEMIF_MAX_REGION_NUM;
 140
 141        MIF_LOG(DEBUG, "Requesting memory regions");
 142
 143        for (i = 0; i < max_region_num; i++) {
 144                /* Prepare the message */
 145                memset(&msg, 0, sizeof(msg));
 146                strlcpy(msg.name, MEMIF_MP_SEND_REGION, sizeof(msg.name));
 147                strlcpy(msg_param->port_name, dev->data->name,
 148                        sizeof(msg_param->port_name));
 149                msg_param->idx = i;
 150                msg.len_param = sizeof(*msg_param);
 151
 152                /* Send message */
 153                ret = rte_mp_request_sync(&msg, &replies, &timeout);
 154                if (ret < 0 || replies.nb_received != 1) {
 155                        MIF_LOG(ERR, "Failed to send mp msg: %d",
 156                                rte_errno);
 157                        return -1;
 158                }
 159
 160                reply = &replies.msgs[0];
 161                reply_param = (struct mp_region_msg *)reply->param;
 162
 163                if (reply_param->size > 0) {
 164                        r = rte_zmalloc("region", sizeof(struct memif_region), 0);
 165                        if (r == NULL) {
 166                                MIF_LOG(ERR, "Failed to alloc memif region.");
 167                                free(reply);
 168                                return -ENOMEM;
 169                        }
 170                        r->region_size = reply_param->size;
 171                        if (reply->num_fds < 1) {
 172                                MIF_LOG(ERR, "Missing file descriptor.");
 173                                free(reply);
 174                                return -1;
 175                        }
 176                        r->fd = reply->fds[0];
 177                        r->addr = NULL;
 178
 179                        proc_private->regions[reply_param->idx] = r;
 180                        proc_private->regions_num++;
 181                }
 182                free(reply);
 183        }
 184
 185        if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
 186                ret = rte_memseg_walk(memif_region_init_zc, (void *)proc_private);
 187                if (ret < 0)
 188                        return ret;
 189        }
 190
 191        return memif_connect(dev);
 192}
 193
 194static int
 195memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info)
 196{
 197        dev_info->max_mac_addrs = 1;
 198        dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
 199        dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
 200        dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
 201        dev_info->min_rx_bufsize = 0;
 202
 203        return 0;
 204}
 205
 206static memif_ring_t *
 207memif_get_ring(struct pmd_internals *pmd, struct pmd_process_private *proc_private,
 208               memif_ring_type_t type, uint16_t ring_num)
 209{
 210        /* rings only in region 0 */
 211        void *p = proc_private->regions[0]->addr;
 212        int ring_size = sizeof(memif_ring_t) + sizeof(memif_desc_t) *
 213            (1 << pmd->run.log2_ring_size);
 214
 215        p = (uint8_t *)p + (ring_num + type * pmd->run.num_c2s_rings) * ring_size;
 216
 217        return (memif_ring_t *)p;
 218}
 219
 220static memif_region_offset_t
 221memif_get_ring_offset(struct rte_eth_dev *dev, struct memif_queue *mq,
 222                      memif_ring_type_t type, uint16_t num)
 223{
 224        struct pmd_internals *pmd = dev->data->dev_private;
 225        struct pmd_process_private *proc_private = dev->process_private;
 226
 227        return ((uint8_t *)memif_get_ring(pmd, proc_private, type, num) -
 228                (uint8_t *)proc_private->regions[mq->region]->addr);
 229}
 230
 231static memif_ring_t *
 232memif_get_ring_from_queue(struct pmd_process_private *proc_private,
 233                          struct memif_queue *mq)
 234{
 235        struct memif_region *r;
 236
 237        r = proc_private->regions[mq->region];
 238        if (r == NULL)
 239                return NULL;
 240
 241        return (memif_ring_t *)((uint8_t *)r->addr + mq->ring_offset);
 242}
 243
 244static void *
 245memif_get_buffer(struct pmd_process_private *proc_private, memif_desc_t *d)
 246{
 247        return ((uint8_t *)proc_private->regions[d->region]->addr + d->offset);
 248}
 249
 250/* Free mbufs received by server */
 251static void
 252memif_free_stored_mbufs(struct pmd_process_private *proc_private, struct memif_queue *mq)
 253{
 254        uint16_t cur_tail;
 255        uint16_t mask = (1 << mq->log2_ring_size) - 1;
 256        memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 257
 258        /* FIXME: improve performance */
 259        /* The ring->tail acts as a guard variable between Tx and Rx
 260         * threads, so using load-acquire pairs with store-release
 261         * in function eth_memif_rx for C2S queues.
 262         */
 263        cur_tail = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
 264        while (mq->last_tail != cur_tail) {
 265                RTE_MBUF_PREFETCH_TO_FREE(mq->buffers[(mq->last_tail + 1) & mask]);
 266                /* Decrement refcnt and free mbuf. (current segment) */
 267                rte_mbuf_refcnt_update(mq->buffers[mq->last_tail & mask], -1);
 268                rte_pktmbuf_free_seg(mq->buffers[mq->last_tail & mask]);
 269                mq->last_tail++;
 270        }
 271}
 272
 273static int
 274memif_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *cur_tail,
 275                    struct rte_mbuf *tail)
 276{
 277        /* Check for number-of-segments-overflow */
 278        if (unlikely(head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS))
 279                return -EOVERFLOW;
 280
 281        /* Chain 'tail' onto the old tail */
 282        cur_tail->next = tail;
 283
 284        /* accumulate number of segments and total length. */
 285        head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs);
 286
 287        tail->pkt_len = tail->data_len;
 288        head->pkt_len += tail->pkt_len;
 289
 290        return 0;
 291}
 292
 293static uint16_t
 294eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 295{
 296        struct memif_queue *mq = queue;
 297        struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
 298        struct pmd_process_private *proc_private =
 299                rte_eth_devices[mq->in_port].process_private;
 300        memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 301        uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
 302        uint16_t n_rx_pkts = 0;
 303        uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
 304                RTE_PKTMBUF_HEADROOM;
 305        uint16_t src_len, src_off, dst_len, dst_off, cp_len;
 306        memif_ring_type_t type = mq->type;
 307        memif_desc_t *d0;
 308        struct rte_mbuf *mbuf, *mbuf_head, *mbuf_tail;
 309        uint64_t b;
 310        ssize_t size __rte_unused;
 311        uint16_t head;
 312        int ret;
 313        struct rte_eth_link link;
 314
 315        if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
 316                return 0;
 317        if (unlikely(ring == NULL)) {
 318                /* Secondary process will attempt to request regions. */
 319                ret = rte_eth_link_get(mq->in_port, &link);
 320                if (ret < 0)
 321                        MIF_LOG(ERR, "Failed to get port %u link info: %s",
 322                                mq->in_port, rte_strerror(-ret));
 323                return 0;
 324        }
 325
 326        /* consume interrupt */
 327        if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0)
 328                size = read(mq->intr_handle.fd, &b, sizeof(b));
 329
 330        ring_size = 1 << mq->log2_ring_size;
 331        mask = ring_size - 1;
 332
 333        if (type == MEMIF_RING_C2S) {
 334                cur_slot = mq->last_head;
 335                last_slot = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE);
 336        } else {
 337                cur_slot = mq->last_tail;
 338                last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
 339        }
 340
 341        if (cur_slot == last_slot)
 342                goto refill;
 343        n_slots = last_slot - cur_slot;
 344
 345        while (n_slots && n_rx_pkts < nb_pkts) {
 346                mbuf_head = rte_pktmbuf_alloc(mq->mempool);
 347                if (unlikely(mbuf_head == NULL))
 348                        goto no_free_bufs;
 349                mbuf = mbuf_head;
 350                mbuf->port = mq->in_port;
 351
 352next_slot:
 353                s0 = cur_slot & mask;
 354                d0 = &ring->desc[s0];
 355
 356                src_len = d0->length;
 357                dst_off = 0;
 358                src_off = 0;
 359
 360                do {
 361                        dst_len = mbuf_size - dst_off;
 362                        if (dst_len == 0) {
 363                                dst_off = 0;
 364                                dst_len = mbuf_size;
 365
 366                                /* store pointer to tail */
 367                                mbuf_tail = mbuf;
 368                                mbuf = rte_pktmbuf_alloc(mq->mempool);
 369                                if (unlikely(mbuf == NULL))
 370                                        goto no_free_bufs;
 371                                mbuf->port = mq->in_port;
 372                                ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
 373                                if (unlikely(ret < 0)) {
 374                                        MIF_LOG(ERR, "number-of-segments-overflow");
 375                                        rte_pktmbuf_free(mbuf);
 376                                        goto no_free_bufs;
 377                                }
 378                        }
 379                        cp_len = RTE_MIN(dst_len, src_len);
 380
 381                        rte_pktmbuf_data_len(mbuf) += cp_len;
 382                        rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
 383                        if (mbuf != mbuf_head)
 384                                rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
 385
 386                        rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
 387                                                           dst_off),
 388                                (uint8_t *)memif_get_buffer(proc_private, d0) +
 389                                src_off, cp_len);
 390
 391                        src_off += cp_len;
 392                        dst_off += cp_len;
 393                        src_len -= cp_len;
 394                } while (src_len);
 395
 396                cur_slot++;
 397                n_slots--;
 398
 399                if (d0->flags & MEMIF_DESC_FLAG_NEXT)
 400                        goto next_slot;
 401
 402                mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
 403                *bufs++ = mbuf_head;
 404                n_rx_pkts++;
 405        }
 406
 407no_free_bufs:
 408        if (type == MEMIF_RING_C2S) {
 409                __atomic_store_n(&ring->tail, cur_slot, __ATOMIC_RELEASE);
 410                mq->last_head = cur_slot;
 411        } else {
 412                mq->last_tail = cur_slot;
 413        }
 414
 415refill:
 416        if (type == MEMIF_RING_S2C) {
 417                /* ring->head is updated by the receiver and this function
 418                 * is called in the context of receiver thread. The loads in
 419                 * the receiver do not need to synchronize with its own stores.
 420                 */
 421                head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
 422                n_slots = ring_size - head + mq->last_tail;
 423
 424                while (n_slots--) {
 425                        s0 = head++ & mask;
 426                        d0 = &ring->desc[s0];
 427                        d0->length = pmd->run.pkt_buffer_size;
 428                }
 429                __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
 430        }
 431
 432        mq->n_pkts += n_rx_pkts;
 433        return n_rx_pkts;
 434}
 435
 436static uint16_t
 437eth_memif_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 438{
 439        struct memif_queue *mq = queue;
 440        struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
 441        struct pmd_process_private *proc_private =
 442                rte_eth_devices[mq->in_port].process_private;
 443        memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 444        uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0, head;
 445        uint16_t n_rx_pkts = 0;
 446        memif_desc_t *d0;
 447        struct rte_mbuf *mbuf, *mbuf_tail;
 448        struct rte_mbuf *mbuf_head = NULL;
 449        int ret;
 450        struct rte_eth_link link;
 451
 452        if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
 453                return 0;
 454        if (unlikely(ring == NULL)) {
 455                /* Secondary process will attempt to request regions. */
 456                rte_eth_link_get(mq->in_port, &link);
 457                return 0;
 458        }
 459
 460        /* consume interrupt */
 461        if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
 462                uint64_t b;
 463                ssize_t size __rte_unused;
 464                size = read(mq->intr_handle.fd, &b, sizeof(b));
 465        }
 466
 467        ring_size = 1 << mq->log2_ring_size;
 468        mask = ring_size - 1;
 469
 470        cur_slot = mq->last_tail;
 471        /* The ring->tail acts as a guard variable between Tx and Rx
 472         * threads, so using load-acquire pairs with store-release
 473         * to synchronize it between threads.
 474         */
 475        last_slot = __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
 476        if (cur_slot == last_slot)
 477                goto refill;
 478        n_slots = last_slot - cur_slot;
 479
 480        while (n_slots && n_rx_pkts < nb_pkts) {
 481                s0 = cur_slot & mask;
 482
 483                d0 = &ring->desc[s0];
 484                mbuf_head = mq->buffers[s0];
 485                mbuf = mbuf_head;
 486
 487next_slot:
 488                /* prefetch next descriptor */
 489                if (n_rx_pkts + 1 < nb_pkts)
 490                        rte_prefetch0(&ring->desc[(cur_slot + 1) & mask]);
 491
 492                mbuf->port = mq->in_port;
 493                rte_pktmbuf_data_len(mbuf) = d0->length;
 494                rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
 495
 496                mq->n_bytes += rte_pktmbuf_data_len(mbuf);
 497
 498                cur_slot++;
 499                n_slots--;
 500                if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
 501                        s0 = cur_slot & mask;
 502                        d0 = &ring->desc[s0];
 503                        mbuf_tail = mbuf;
 504                        mbuf = mq->buffers[s0];
 505                        ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
 506                        if (unlikely(ret < 0)) {
 507                                MIF_LOG(ERR, "number-of-segments-overflow");
 508                                goto refill;
 509                        }
 510                        goto next_slot;
 511                }
 512
 513                *bufs++ = mbuf_head;
 514                n_rx_pkts++;
 515        }
 516
 517        mq->last_tail = cur_slot;
 518
 519/* Supply server with new buffers */
 520refill:
 521        /* ring->head is updated by the receiver and this function
 522         * is called in the context of receiver thread. The loads in
 523         * the receiver do not need to synchronize with its own stores.
 524         */
 525        head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
 526        n_slots = ring_size - head + mq->last_tail;
 527
 528        if (n_slots < 32)
 529                goto no_free_mbufs;
 530
 531        ret = rte_pktmbuf_alloc_bulk(mq->mempool, &mq->buffers[head & mask], n_slots);
 532        if (unlikely(ret < 0))
 533                goto no_free_mbufs;
 534
 535        while (n_slots--) {
 536                s0 = head++ & mask;
 537                if (n_slots > 0)
 538                        rte_prefetch0(mq->buffers[head & mask]);
 539                d0 = &ring->desc[s0];
 540                /* store buffer header */
 541                mbuf = mq->buffers[s0];
 542                /* populate descriptor */
 543                d0->length = rte_pktmbuf_data_room_size(mq->mempool) -
 544                                RTE_PKTMBUF_HEADROOM;
 545                d0->region = 1;
 546                d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
 547                        (uint8_t *)proc_private->regions[d0->region]->addr;
 548        }
 549no_free_mbufs:
 550        /* The ring->head acts as a guard variable between Tx and Rx
 551         * threads, so using store-release pairs with load-acquire
 552         * in function eth_memif_tx.
 553         */
 554        __atomic_store_n(&ring->head, head, __ATOMIC_RELEASE);
 555
 556        mq->n_pkts += n_rx_pkts;
 557
 558        return n_rx_pkts;
 559}
 560
 561static uint16_t
 562eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 563{
 564        struct memif_queue *mq = queue;
 565        struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
 566        struct pmd_process_private *proc_private =
 567                rte_eth_devices[mq->in_port].process_private;
 568        memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 569        uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
 570        uint16_t src_len, src_off, dst_len, dst_off, cp_len;
 571        memif_ring_type_t type = mq->type;
 572        memif_desc_t *d0;
 573        struct rte_mbuf *mbuf;
 574        struct rte_mbuf *mbuf_head;
 575        uint64_t a;
 576        ssize_t size;
 577        struct rte_eth_link link;
 578
 579        if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
 580                return 0;
 581        if (unlikely(ring == NULL)) {
 582                int ret;
 583
 584                /* Secondary process will attempt to request regions. */
 585                ret = rte_eth_link_get(mq->in_port, &link);
 586                if (ret < 0)
 587                        MIF_LOG(ERR, "Failed to get port %u link info: %s",
 588                                mq->in_port, rte_strerror(-ret));
 589                return 0;
 590        }
 591
 592        ring_size = 1 << mq->log2_ring_size;
 593        mask = ring_size - 1;
 594
 595        if (type == MEMIF_RING_C2S) {
 596                /* For C2S queues ring->head is updated by the sender and
 597                 * this function is called in the context of sending thread.
 598                 * The loads in the sender do not need to synchronize with
 599                 * its own stores. Hence, the following load can be a
 600                 * relaxed load.
 601                 */
 602                slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
 603                n_free = ring_size - slot +
 604                                __atomic_load_n(&ring->tail, __ATOMIC_ACQUIRE);
 605        } else {
 606                /* For S2C queues ring->tail is updated by the sender and
 607                 * this function is called in the context of sending thread.
 608                 * The loads in the sender do not need to synchronize with
 609                 * its own stores. Hence, the following load can be a
 610                 * relaxed load.
 611                 */
 612                slot = __atomic_load_n(&ring->tail, __ATOMIC_RELAXED);
 613                n_free = __atomic_load_n(&ring->head, __ATOMIC_ACQUIRE) - slot;
 614        }
 615
 616        while (n_tx_pkts < nb_pkts && n_free) {
 617                mbuf_head = *bufs++;
 618                mbuf = mbuf_head;
 619
 620                saved_slot = slot;
 621                d0 = &ring->desc[slot & mask];
 622                dst_off = 0;
 623                dst_len = (type == MEMIF_RING_C2S) ?
 624                        pmd->run.pkt_buffer_size : d0->length;
 625
 626next_in_chain:
 627                src_off = 0;
 628                src_len = rte_pktmbuf_data_len(mbuf);
 629
 630                while (src_len) {
 631                        if (dst_len == 0) {
 632                                if (n_free) {
 633                                        slot++;
 634                                        n_free--;
 635                                        d0->flags |= MEMIF_DESC_FLAG_NEXT;
 636                                        d0 = &ring->desc[slot & mask];
 637                                        dst_off = 0;
 638                                        dst_len = (type == MEMIF_RING_C2S) ?
 639                                            pmd->run.pkt_buffer_size : d0->length;
 640                                        d0->flags = 0;
 641                                } else {
 642                                        slot = saved_slot;
 643                                        goto no_free_slots;
 644                                }
 645                        }
 646                        cp_len = RTE_MIN(dst_len, src_len);
 647
 648                        rte_memcpy((uint8_t *)memif_get_buffer(proc_private,
 649                                                               d0) + dst_off,
 650                                rte_pktmbuf_mtod_offset(mbuf, void *, src_off),
 651                                cp_len);
 652
 653                        mq->n_bytes += cp_len;
 654                        src_off += cp_len;
 655                        dst_off += cp_len;
 656                        src_len -= cp_len;
 657                        dst_len -= cp_len;
 658
 659                        d0->length = dst_off;
 660                }
 661
 662                if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
 663                        mbuf = mbuf->next;
 664                        goto next_in_chain;
 665                }
 666
 667                n_tx_pkts++;
 668                slot++;
 669                n_free--;
 670                rte_pktmbuf_free(mbuf_head);
 671        }
 672
 673no_free_slots:
 674        if (type == MEMIF_RING_C2S)
 675                __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
 676        else
 677                __atomic_store_n(&ring->tail, slot, __ATOMIC_RELEASE);
 678
 679        if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
 680                a = 1;
 681                size = write(mq->intr_handle.fd, &a, sizeof(a));
 682                if (unlikely(size < 0)) {
 683                        MIF_LOG(WARNING,
 684                                "Failed to send interrupt. %s", strerror(errno));
 685                }
 686        }
 687
 688        mq->n_pkts += n_tx_pkts;
 689        return n_tx_pkts;
 690}
 691
 692
 693static int
 694memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
 695                memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
 696                uint16_t slot, uint16_t n_free)
 697{
 698        memif_desc_t *d0;
 699        int used_slots = 1;
 700
 701next_in_chain:
 702        /* store pointer to mbuf to free it later */
 703        mq->buffers[slot & mask] = mbuf;
 704        /* Increment refcnt to make sure the buffer is not freed before server
 705         * receives it. (current segment)
 706         */
 707        rte_mbuf_refcnt_update(mbuf, 1);
 708        /* populate descriptor */
 709        d0 = &ring->desc[slot & mask];
 710        d0->length = rte_pktmbuf_data_len(mbuf);
 711        mq->n_bytes += rte_pktmbuf_data_len(mbuf);
 712        /* FIXME: get region index */
 713        d0->region = 1;
 714        d0->offset = rte_pktmbuf_mtod(mbuf, uint8_t *) -
 715                (uint8_t *)proc_private->regions[d0->region]->addr;
 716        d0->flags = 0;
 717
 718        /* check if buffer is chained */
 719        if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
 720                if (n_free < 2)
 721                        return 0;
 722                /* mark buffer as chained */
 723                d0->flags |= MEMIF_DESC_FLAG_NEXT;
 724                /* advance mbuf */
 725                mbuf = mbuf->next;
 726                /* update counters */
 727                used_slots++;
 728                slot++;
 729                n_free--;
 730                goto next_in_chain;
 731        }
 732        return used_slots;
 733}
 734
 735static uint16_t
 736eth_memif_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 737{
 738        struct memif_queue *mq = queue;
 739        struct pmd_internals *pmd = rte_eth_devices[mq->in_port].data->dev_private;
 740        struct pmd_process_private *proc_private =
 741                rte_eth_devices[mq->in_port].process_private;
 742        memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 743        uint16_t slot, n_free, ring_size, mask, n_tx_pkts = 0;
 744        struct rte_eth_link link;
 745
 746        if (unlikely((pmd->flags & ETH_MEMIF_FLAG_CONNECTED) == 0))
 747                return 0;
 748        if (unlikely(ring == NULL)) {
 749                /* Secondary process will attempt to request regions. */
 750                rte_eth_link_get(mq->in_port, &link);
 751                return 0;
 752        }
 753
 754        ring_size = 1 << mq->log2_ring_size;
 755        mask = ring_size - 1;
 756
 757        /* free mbufs received by server */
 758        memif_free_stored_mbufs(proc_private, mq);
 759
 760        /* ring type always MEMIF_RING_C2S */
 761        /* For C2S queues ring->head is updated by the sender and
 762         * this function is called in the context of sending thread.
 763         * The loads in the sender do not need to synchronize with
 764         * its own stores. Hence, the following load can be a
 765         * relaxed load.
 766         */
 767        slot = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);
 768        n_free = ring_size - slot + mq->last_tail;
 769
 770        int used_slots;
 771
 772        while (n_free && (n_tx_pkts < nb_pkts)) {
 773                while ((n_free > 4) && ((nb_pkts - n_tx_pkts) > 4)) {
 774                        if ((nb_pkts - n_tx_pkts) > 8) {
 775                                rte_prefetch0(*bufs + 4);
 776                                rte_prefetch0(*bufs + 5);
 777                                rte_prefetch0(*bufs + 6);
 778                                rte_prefetch0(*bufs + 7);
 779                        }
 780                        used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
 781                                mask, slot, n_free);
 782                        if (unlikely(used_slots < 1))
 783                                goto no_free_slots;
 784                        n_tx_pkts++;
 785                        slot += used_slots;
 786                        n_free -= used_slots;
 787
 788                        used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
 789                                mask, slot, n_free);
 790                        if (unlikely(used_slots < 1))
 791                                goto no_free_slots;
 792                        n_tx_pkts++;
 793                        slot += used_slots;
 794                        n_free -= used_slots;
 795
 796                        used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
 797                                mask, slot, n_free);
 798                        if (unlikely(used_slots < 1))
 799                                goto no_free_slots;
 800                        n_tx_pkts++;
 801                        slot += used_slots;
 802                        n_free -= used_slots;
 803
 804                        used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
 805                                mask, slot, n_free);
 806                        if (unlikely(used_slots < 1))
 807                                goto no_free_slots;
 808                        n_tx_pkts++;
 809                        slot += used_slots;
 810                        n_free -= used_slots;
 811                }
 812                used_slots = memif_tx_one_zc(proc_private, mq, ring, *bufs++,
 813                        mask, slot, n_free);
 814                if (unlikely(used_slots < 1))
 815                        goto no_free_slots;
 816                n_tx_pkts++;
 817                slot += used_slots;
 818                n_free -= used_slots;
 819        }
 820
 821no_free_slots:
 822        /* ring type always MEMIF_RING_C2S */
 823        /* The ring->head acts as a guard variable between Tx and Rx
 824         * threads, so using store-release pairs with load-acquire
 825         * in function eth_memif_rx for C2S rings.
 826         */
 827        __atomic_store_n(&ring->head, slot, __ATOMIC_RELEASE);
 828
 829        /* Send interrupt, if enabled. */
 830        if ((ring->flags & MEMIF_RING_FLAG_MASK_INT) == 0) {
 831                uint64_t a = 1;
 832                ssize_t size = write(mq->intr_handle.fd, &a, sizeof(a));
 833                if (unlikely(size < 0)) {
 834                        MIF_LOG(WARNING,
 835                                "Failed to send interrupt. %s", strerror(errno));
 836                }
 837        }
 838
 839        /* increment queue counters */
 840        mq->n_pkts += n_tx_pkts;
 841
 842        return n_tx_pkts;
 843}
 844
 845void
 846memif_free_regions(struct rte_eth_dev *dev)
 847{
 848        struct pmd_process_private *proc_private = dev->process_private;
 849        struct pmd_internals *pmd = dev->data->dev_private;
 850        int i;
 851        struct memif_region *r;
 852
 853        /* regions are allocated contiguously, so it's
 854         * enough to loop until 'proc_private->regions_num'
 855         */
 856        for (i = 0; i < proc_private->regions_num; i++) {
 857                r = proc_private->regions[i];
 858                if (r != NULL) {
 859                        /* This is memzone */
 860                        if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
 861                                r->addr = NULL;
 862                                if (r->fd > 0)
 863                                        close(r->fd);
 864                        }
 865                        if (r->addr != NULL) {
 866                                munmap(r->addr, r->region_size);
 867                                if (r->fd > 0) {
 868                                        close(r->fd);
 869                                        r->fd = -1;
 870                                }
 871                        }
 872                        rte_free(r);
 873                        proc_private->regions[i] = NULL;
 874                }
 875        }
 876        proc_private->regions_num = 0;
 877}
 878
 879static int
 880memif_region_init_zc(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
 881                     void *arg)
 882{
 883        struct pmd_process_private *proc_private = (struct pmd_process_private *)arg;
 884        struct memif_region *r;
 885
 886        if (proc_private->regions_num < 1) {
 887                MIF_LOG(ERR, "Missing descriptor region");
 888                return -1;
 889        }
 890
 891        r = proc_private->regions[proc_private->regions_num - 1];
 892
 893        if (r->addr != msl->base_va)
 894                r = proc_private->regions[++proc_private->regions_num - 1];
 895
 896        if (r == NULL) {
 897                r = rte_zmalloc("region", sizeof(struct memif_region), 0);
 898                if (r == NULL) {
 899                        MIF_LOG(ERR, "Failed to alloc memif region.");
 900                        return -ENOMEM;
 901                }
 902
 903                r->addr = msl->base_va;
 904                r->region_size = ms->len;
 905                r->fd = rte_memseg_get_fd(ms);
 906                if (r->fd < 0)
 907                        return -1;
 908                r->pkt_buffer_offset = 0;
 909
 910                proc_private->regions[proc_private->regions_num - 1] = r;
 911        } else {
 912                r->region_size += ms->len;
 913        }
 914
 915        return 0;
 916}
 917
 918static int
 919memif_region_init_shm(struct rte_eth_dev *dev, uint8_t has_buffers)
 920{
 921        struct pmd_internals *pmd = dev->data->dev_private;
 922        struct pmd_process_private *proc_private = dev->process_private;
 923        char shm_name[ETH_MEMIF_SHM_NAME_SIZE];
 924        int ret = 0;
 925        struct memif_region *r;
 926
 927        if (proc_private->regions_num >= ETH_MEMIF_MAX_REGION_NUM) {
 928                MIF_LOG(ERR, "Too many regions.");
 929                return -1;
 930        }
 931
 932        r = rte_zmalloc("region", sizeof(struct memif_region), 0);
 933        if (r == NULL) {
 934                MIF_LOG(ERR, "Failed to alloc memif region.");
 935                return -ENOMEM;
 936        }
 937
 938        /* calculate buffer offset */
 939        r->pkt_buffer_offset = (pmd->run.num_c2s_rings + pmd->run.num_s2c_rings) *
 940            (sizeof(memif_ring_t) + sizeof(memif_desc_t) *
 941            (1 << pmd->run.log2_ring_size));
 942
 943        r->region_size = r->pkt_buffer_offset;
 944        /* if region has buffers, add buffers size to region_size */
 945        if (has_buffers == 1)
 946                r->region_size += (uint32_t)(pmd->run.pkt_buffer_size *
 947                        (1 << pmd->run.log2_ring_size) *
 948                        (pmd->run.num_c2s_rings +
 949                         pmd->run.num_s2c_rings));
 950
 951        memset(shm_name, 0, sizeof(char) * ETH_MEMIF_SHM_NAME_SIZE);
 952        snprintf(shm_name, ETH_MEMIF_SHM_NAME_SIZE, "memif_region_%d",
 953                 proc_private->regions_num);
 954
 955        r->fd = memfd_create(shm_name, MFD_ALLOW_SEALING);
 956        if (r->fd < 0) {
 957                MIF_LOG(ERR, "Failed to create shm file: %s.", strerror(errno));
 958                ret = -1;
 959                goto error;
 960        }
 961
 962        ret = fcntl(r->fd, F_ADD_SEALS, F_SEAL_SHRINK);
 963        if (ret < 0) {
 964                MIF_LOG(ERR, "Failed to add seals to shm file: %s.", strerror(errno));
 965                goto error;
 966        }
 967
 968        ret = ftruncate(r->fd, r->region_size);
 969        if (ret < 0) {
 970                MIF_LOG(ERR, "Failed to truncate shm file: %s.", strerror(errno));
 971                goto error;
 972        }
 973
 974        r->addr = mmap(NULL, r->region_size, PROT_READ |
 975                       PROT_WRITE, MAP_SHARED, r->fd, 0);
 976        if (r->addr == MAP_FAILED) {
 977                MIF_LOG(ERR, "Failed to mmap shm region: %s.", strerror(ret));
 978                ret = -1;
 979                goto error;
 980        }
 981
 982        proc_private->regions[proc_private->regions_num] = r;
 983        proc_private->regions_num++;
 984
 985        return ret;
 986
 987error:
 988        if (r->fd > 0)
 989                close(r->fd);
 990        r->fd = -1;
 991
 992        return ret;
 993}
 994
 995static int
 996memif_regions_init(struct rte_eth_dev *dev)
 997{
 998        struct pmd_internals *pmd = dev->data->dev_private;
 999        int ret;
1000
1001        /*
1002         * Zero-copy exposes dpdk memory.
1003         * Each memseg list will be represented by memif region.
1004         * Zero-copy regions indexing: memseg list idx + 1,
1005         * as we already have region 0 reserved for descriptors.
1006         */
1007        if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1008                /* create region idx 0 containing descriptors */
1009                ret = memif_region_init_shm(dev, 0);
1010                if (ret < 0)
1011                        return ret;
1012                ret = rte_memseg_walk(memif_region_init_zc, (void *)dev->process_private);
1013                if (ret < 0)
1014                        return ret;
1015        } else {
1016                /* create one memory region contaning rings and buffers */
1017                ret = memif_region_init_shm(dev, /* has buffers */ 1);
1018                if (ret < 0)
1019                        return ret;
1020        }
1021
1022        return 0;
1023}
1024
1025static void
1026memif_init_rings(struct rte_eth_dev *dev)
1027{
1028        struct pmd_internals *pmd = dev->data->dev_private;
1029        struct pmd_process_private *proc_private = dev->process_private;
1030        memif_ring_t *ring;
1031        int i, j;
1032        uint16_t slot;
1033
1034        for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1035                ring = memif_get_ring(pmd, proc_private, MEMIF_RING_C2S, i);
1036                __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1037                __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1038                ring->cookie = MEMIF_COOKIE;
1039                ring->flags = 0;
1040
1041                if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1042                        continue;
1043
1044                for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1045                        slot = i * (1 << pmd->run.log2_ring_size) + j;
1046                        ring->desc[j].region = 0;
1047                        ring->desc[j].offset =
1048                                proc_private->regions[0]->pkt_buffer_offset +
1049                                (uint32_t)(slot * pmd->run.pkt_buffer_size);
1050                        ring->desc[j].length = pmd->run.pkt_buffer_size;
1051                }
1052        }
1053
1054        for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1055                ring = memif_get_ring(pmd, proc_private, MEMIF_RING_S2C, i);
1056                __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1057                __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1058                ring->cookie = MEMIF_COOKIE;
1059                ring->flags = 0;
1060
1061                if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)
1062                        continue;
1063
1064                for (j = 0; j < (1 << pmd->run.log2_ring_size); j++) {
1065                        slot = (i + pmd->run.num_c2s_rings) *
1066                            (1 << pmd->run.log2_ring_size) + j;
1067                        ring->desc[j].region = 0;
1068                        ring->desc[j].offset =
1069                                proc_private->regions[0]->pkt_buffer_offset +
1070                                (uint32_t)(slot * pmd->run.pkt_buffer_size);
1071                        ring->desc[j].length = pmd->run.pkt_buffer_size;
1072                }
1073        }
1074}
1075
1076/* called only by client */
1077static int
1078memif_init_queues(struct rte_eth_dev *dev)
1079{
1080        struct pmd_internals *pmd = dev->data->dev_private;
1081        struct memif_queue *mq;
1082        int i;
1083
1084        for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1085                mq = dev->data->tx_queues[i];
1086                mq->log2_ring_size = pmd->run.log2_ring_size;
1087                /* queues located only in region 0 */
1088                mq->region = 0;
1089                mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_C2S, i);
1090                mq->last_head = 0;
1091                mq->last_tail = 0;
1092                mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1093                if (mq->intr_handle.fd < 0) {
1094                        MIF_LOG(WARNING,
1095                                "Failed to create eventfd for tx queue %d: %s.", i,
1096                                strerror(errno));
1097                }
1098                mq->buffers = NULL;
1099                if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1100                        mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1101                                                  (1 << mq->log2_ring_size), 0);
1102                        if (mq->buffers == NULL)
1103                                return -ENOMEM;
1104                }
1105        }
1106
1107        for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1108                mq = dev->data->rx_queues[i];
1109                mq->log2_ring_size = pmd->run.log2_ring_size;
1110                /* queues located only in region 0 */
1111                mq->region = 0;
1112                mq->ring_offset = memif_get_ring_offset(dev, mq, MEMIF_RING_S2C, i);
1113                mq->last_head = 0;
1114                mq->last_tail = 0;
1115                mq->intr_handle.fd = eventfd(0, EFD_NONBLOCK);
1116                if (mq->intr_handle.fd < 0) {
1117                        MIF_LOG(WARNING,
1118                                "Failed to create eventfd for rx queue %d: %s.", i,
1119                                strerror(errno));
1120                }
1121                mq->buffers = NULL;
1122                if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1123                        mq->buffers = rte_zmalloc("bufs", sizeof(struct rte_mbuf *) *
1124                                                  (1 << mq->log2_ring_size), 0);
1125                        if (mq->buffers == NULL)
1126                                return -ENOMEM;
1127                }
1128        }
1129        return 0;
1130}
1131
1132int
1133memif_init_regions_and_queues(struct rte_eth_dev *dev)
1134{
1135        int ret;
1136
1137        ret = memif_regions_init(dev);
1138        if (ret < 0)
1139                return ret;
1140
1141        memif_init_rings(dev);
1142
1143        ret = memif_init_queues(dev);
1144        if (ret < 0)
1145                return ret;
1146
1147        return 0;
1148}
1149
1150int
1151memif_connect(struct rte_eth_dev *dev)
1152{
1153        struct pmd_internals *pmd = dev->data->dev_private;
1154        struct pmd_process_private *proc_private = dev->process_private;
1155        struct memif_region *mr;
1156        struct memif_queue *mq;
1157        memif_ring_t *ring;
1158        int i;
1159
1160        for (i = 0; i < proc_private->regions_num; i++) {
1161                mr = proc_private->regions[i];
1162                if (mr != NULL) {
1163                        if (mr->addr == NULL) {
1164                                if (mr->fd < 0)
1165                                        return -1;
1166                                mr->addr = mmap(NULL, mr->region_size,
1167                                                PROT_READ | PROT_WRITE,
1168                                                MAP_SHARED, mr->fd, 0);
1169                                if (mr->addr == MAP_FAILED) {
1170                                        MIF_LOG(ERR, "mmap failed: %s\n",
1171                                                strerror(errno));
1172                                        return -1;
1173                                }
1174                        }
1175                        if (i > 0 && (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY)) {
1176                                /* close memseg file */
1177                                close(mr->fd);
1178                                mr->fd = -1;
1179                        }
1180                }
1181        }
1182
1183        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1184                for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1185                        mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1186                            dev->data->tx_queues[i] : dev->data->rx_queues[i];
1187                        ring = memif_get_ring_from_queue(proc_private, mq);
1188                        if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1189                                MIF_LOG(ERR, "Wrong ring");
1190                                return -1;
1191                        }
1192                        __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1193                        __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1194                        mq->last_head = 0;
1195                        mq->last_tail = 0;
1196                        /* enable polling mode */
1197                        if (pmd->role == MEMIF_ROLE_SERVER)
1198                                ring->flags = MEMIF_RING_FLAG_MASK_INT;
1199                }
1200                for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1201                        mq = (pmd->role == MEMIF_ROLE_CLIENT) ?
1202                            dev->data->rx_queues[i] : dev->data->tx_queues[i];
1203                        ring = memif_get_ring_from_queue(proc_private, mq);
1204                        if (ring == NULL || ring->cookie != MEMIF_COOKIE) {
1205                                MIF_LOG(ERR, "Wrong ring");
1206                                return -1;
1207                        }
1208                        __atomic_store_n(&ring->head, 0, __ATOMIC_RELAXED);
1209                        __atomic_store_n(&ring->tail, 0, __ATOMIC_RELAXED);
1210                        mq->last_head = 0;
1211                        mq->last_tail = 0;
1212                        /* enable polling mode */
1213                        if (pmd->role == MEMIF_ROLE_CLIENT)
1214                                ring->flags = MEMIF_RING_FLAG_MASK_INT;
1215                }
1216
1217                pmd->flags &= ~ETH_MEMIF_FLAG_CONNECTING;
1218                pmd->flags |= ETH_MEMIF_FLAG_CONNECTED;
1219                dev->data->dev_link.link_status = ETH_LINK_UP;
1220        }
1221        MIF_LOG(INFO, "Connected.");
1222        return 0;
1223}
1224
1225static int
1226memif_dev_start(struct rte_eth_dev *dev)
1227{
1228        struct pmd_internals *pmd = dev->data->dev_private;
1229        int ret = 0;
1230
1231        switch (pmd->role) {
1232        case MEMIF_ROLE_CLIENT:
1233                ret = memif_connect_client(dev);
1234                break;
1235        case MEMIF_ROLE_SERVER:
1236                ret = memif_connect_server(dev);
1237                break;
1238        default:
1239                MIF_LOG(ERR, "Unknown role: %d.", pmd->role);
1240                ret = -1;
1241                break;
1242        }
1243
1244        return ret;
1245}
1246
1247static int
1248memif_dev_close(struct rte_eth_dev *dev)
1249{
1250        struct pmd_internals *pmd = dev->data->dev_private;
1251        int i;
1252
1253        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1254                memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
1255                memif_disconnect(dev);
1256
1257                for (i = 0; i < dev->data->nb_rx_queues; i++)
1258                        (*dev->dev_ops->rx_queue_release)(dev->data->rx_queues[i]);
1259                for (i = 0; i < dev->data->nb_tx_queues; i++)
1260                        (*dev->dev_ops->tx_queue_release)(dev->data->tx_queues[i]);
1261
1262                memif_socket_remove_device(dev);
1263        } else {
1264                memif_disconnect(dev);
1265        }
1266
1267        rte_free(dev->process_private);
1268
1269        return 0;
1270}
1271
1272static int
1273memif_dev_configure(struct rte_eth_dev *dev)
1274{
1275        struct pmd_internals *pmd = dev->data->dev_private;
1276
1277        /*
1278         * CLIENT - TXQ
1279         * SERVER - RXQ
1280         */
1281        pmd->cfg.num_c2s_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1282                                  dev->data->nb_tx_queues : dev->data->nb_rx_queues;
1283
1284        /*
1285         * CLIENT - RXQ
1286         * SERVER - TXQ
1287         */
1288        pmd->cfg.num_s2c_rings = (pmd->role == MEMIF_ROLE_CLIENT) ?
1289                                  dev->data->nb_rx_queues : dev->data->nb_tx_queues;
1290
1291        return 0;
1292}
1293
1294static int
1295memif_tx_queue_setup(struct rte_eth_dev *dev,
1296                     uint16_t qid,
1297                     uint16_t nb_tx_desc __rte_unused,
1298                     unsigned int socket_id __rte_unused,
1299                     const struct rte_eth_txconf *tx_conf __rte_unused)
1300{
1301        struct pmd_internals *pmd = dev->data->dev_private;
1302        struct memif_queue *mq;
1303
1304        mq = rte_zmalloc("tx-queue", sizeof(struct memif_queue), 0);
1305        if (mq == NULL) {
1306                MIF_LOG(ERR, "Failed to allocate tx queue id: %u", qid);
1307                return -ENOMEM;
1308        }
1309
1310        mq->type =
1311            (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_C2S : MEMIF_RING_S2C;
1312        mq->n_pkts = 0;
1313        mq->n_bytes = 0;
1314        mq->intr_handle.fd = -1;
1315        mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1316        mq->in_port = dev->data->port_id;
1317        dev->data->tx_queues[qid] = mq;
1318
1319        return 0;
1320}
1321
1322static int
1323memif_rx_queue_setup(struct rte_eth_dev *dev,
1324                     uint16_t qid,
1325                     uint16_t nb_rx_desc __rte_unused,
1326                     unsigned int socket_id __rte_unused,
1327                     const struct rte_eth_rxconf *rx_conf __rte_unused,
1328                     struct rte_mempool *mb_pool)
1329{
1330        struct pmd_internals *pmd = dev->data->dev_private;
1331        struct memif_queue *mq;
1332
1333        mq = rte_zmalloc("rx-queue", sizeof(struct memif_queue), 0);
1334        if (mq == NULL) {
1335                MIF_LOG(ERR, "Failed to allocate rx queue id: %u", qid);
1336                return -ENOMEM;
1337        }
1338
1339        mq->type = (pmd->role == MEMIF_ROLE_CLIENT) ? MEMIF_RING_S2C : MEMIF_RING_C2S;
1340        mq->n_pkts = 0;
1341        mq->n_bytes = 0;
1342        mq->intr_handle.fd = -1;
1343        mq->intr_handle.type = RTE_INTR_HANDLE_EXT;
1344        mq->mempool = mb_pool;
1345        mq->in_port = dev->data->port_id;
1346        dev->data->rx_queues[qid] = mq;
1347
1348        return 0;
1349}
1350
1351static void
1352memif_queue_release(void *queue)
1353{
1354        struct memif_queue *mq = (struct memif_queue *)queue;
1355
1356        if (!mq)
1357                return;
1358
1359        rte_free(mq);
1360}
1361
1362static int
1363memif_link_update(struct rte_eth_dev *dev,
1364                  int wait_to_complete __rte_unused)
1365{
1366        struct pmd_process_private *proc_private;
1367
1368        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1369                proc_private = dev->process_private;
1370                if (dev->data->dev_link.link_status == ETH_LINK_UP &&
1371                                proc_private->regions_num == 0) {
1372                        memif_mp_request_regions(dev);
1373                } else if (dev->data->dev_link.link_status == ETH_LINK_DOWN &&
1374                                proc_private->regions_num > 0) {
1375                        memif_free_regions(dev);
1376                }
1377        }
1378        return 0;
1379}
1380
1381static int
1382memif_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1383{
1384        struct pmd_internals *pmd = dev->data->dev_private;
1385        struct memif_queue *mq;
1386        int i;
1387        uint8_t tmp, nq;
1388
1389        stats->ipackets = 0;
1390        stats->ibytes = 0;
1391        stats->opackets = 0;
1392        stats->obytes = 0;
1393
1394        tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_c2s_rings :
1395            pmd->run.num_s2c_rings;
1396        nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1397            RTE_ETHDEV_QUEUE_STAT_CNTRS;
1398
1399        /* RX stats */
1400        for (i = 0; i < nq; i++) {
1401                mq = dev->data->rx_queues[i];
1402                stats->q_ipackets[i] = mq->n_pkts;
1403                stats->q_ibytes[i] = mq->n_bytes;
1404                stats->ipackets += mq->n_pkts;
1405                stats->ibytes += mq->n_bytes;
1406        }
1407
1408        tmp = (pmd->role == MEMIF_ROLE_CLIENT) ? pmd->run.num_s2c_rings :
1409            pmd->run.num_c2s_rings;
1410        nq = (tmp < RTE_ETHDEV_QUEUE_STAT_CNTRS) ? tmp :
1411            RTE_ETHDEV_QUEUE_STAT_CNTRS;
1412
1413        /* TX stats */
1414        for (i = 0; i < nq; i++) {
1415                mq = dev->data->tx_queues[i];
1416                stats->q_opackets[i] = mq->n_pkts;
1417                stats->q_obytes[i] = mq->n_bytes;
1418                stats->opackets += mq->n_pkts;
1419                stats->obytes += mq->n_bytes;
1420        }
1421        return 0;
1422}
1423
1424static int
1425memif_stats_reset(struct rte_eth_dev *dev)
1426{
1427        struct pmd_internals *pmd = dev->data->dev_private;
1428        int i;
1429        struct memif_queue *mq;
1430
1431        for (i = 0; i < pmd->run.num_c2s_rings; i++) {
1432                mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->tx_queues[i] :
1433                    dev->data->rx_queues[i];
1434                mq->n_pkts = 0;
1435                mq->n_bytes = 0;
1436        }
1437        for (i = 0; i < pmd->run.num_s2c_rings; i++) {
1438                mq = (pmd->role == MEMIF_ROLE_CLIENT) ? dev->data->rx_queues[i] :
1439                    dev->data->tx_queues[i];
1440                mq->n_pkts = 0;
1441                mq->n_bytes = 0;
1442        }
1443
1444        return 0;
1445}
1446
1447static int
1448memif_rx_queue_intr_enable(struct rte_eth_dev *dev __rte_unused,
1449                           uint16_t qid __rte_unused)
1450{
1451        MIF_LOG(WARNING, "Interrupt mode not supported.");
1452
1453        return -1;
1454}
1455
1456static int
1457memif_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t qid __rte_unused)
1458{
1459        struct pmd_internals *pmd __rte_unused = dev->data->dev_private;
1460
1461        return 0;
1462}
1463
1464static const struct eth_dev_ops ops = {
1465        .dev_start = memif_dev_start,
1466        .dev_close = memif_dev_close,
1467        .dev_infos_get = memif_dev_info,
1468        .dev_configure = memif_dev_configure,
1469        .tx_queue_setup = memif_tx_queue_setup,
1470        .rx_queue_setup = memif_rx_queue_setup,
1471        .rx_queue_release = memif_queue_release,
1472        .tx_queue_release = memif_queue_release,
1473        .rx_queue_intr_enable = memif_rx_queue_intr_enable,
1474        .rx_queue_intr_disable = memif_rx_queue_intr_disable,
1475        .link_update = memif_link_update,
1476        .stats_get = memif_stats_get,
1477        .stats_reset = memif_stats_reset,
1478};
1479
1480static int
1481memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
1482             memif_interface_id_t id, uint32_t flags,
1483             const char *socket_filename,
1484             memif_log2_ring_size_t log2_ring_size,
1485             uint16_t pkt_buffer_size, const char *secret,
1486             struct rte_ether_addr *ether_addr)
1487{
1488        int ret = 0;
1489        struct rte_eth_dev *eth_dev;
1490        struct rte_eth_dev_data *data;
1491        struct pmd_internals *pmd;
1492        struct pmd_process_private *process_private;
1493        const unsigned int numa_node = vdev->device.numa_node;
1494        const char *name = rte_vdev_device_name(vdev);
1495
1496        eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*pmd));
1497        if (eth_dev == NULL) {
1498                MIF_LOG(ERR, "%s: Unable to allocate device struct.", name);
1499                return -1;
1500        }
1501
1502        process_private = (struct pmd_process_private *)
1503                rte_zmalloc(name, sizeof(struct pmd_process_private),
1504                            RTE_CACHE_LINE_SIZE);
1505
1506        if (process_private == NULL) {
1507                MIF_LOG(ERR, "Failed to alloc memory for process private");
1508                return -1;
1509        }
1510        eth_dev->process_private = process_private;
1511
1512        pmd = eth_dev->data->dev_private;
1513        memset(pmd, 0, sizeof(*pmd));
1514
1515        pmd->id = id;
1516        pmd->flags = flags;
1517        pmd->flags |= ETH_MEMIF_FLAG_DISABLED;
1518        pmd->role = role;
1519        /* Zero-copy flag irelevant to server. */
1520        if (pmd->role == MEMIF_ROLE_SERVER)
1521                pmd->flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1522
1523        ret = memif_socket_init(eth_dev, socket_filename);
1524        if (ret < 0)
1525                return ret;
1526
1527        memset(pmd->secret, 0, sizeof(char) * ETH_MEMIF_SECRET_SIZE);
1528        if (secret != NULL)
1529                strlcpy(pmd->secret, secret, sizeof(pmd->secret));
1530
1531        pmd->cfg.log2_ring_size = log2_ring_size;
1532        /* set in .dev_configure() */
1533        pmd->cfg.num_c2s_rings = 0;
1534        pmd->cfg.num_s2c_rings = 0;
1535
1536        pmd->cfg.pkt_buffer_size = pkt_buffer_size;
1537        rte_spinlock_init(&pmd->cc_lock);
1538
1539        data = eth_dev->data;
1540        data->dev_private = pmd;
1541        data->numa_node = numa_node;
1542        data->dev_link = pmd_link;
1543        data->mac_addrs = ether_addr;
1544        data->promiscuous = 1;
1545        data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1546
1547        eth_dev->dev_ops = &ops;
1548        eth_dev->device = &vdev->device;
1549        if (pmd->flags & ETH_MEMIF_FLAG_ZERO_COPY) {
1550                eth_dev->rx_pkt_burst = eth_memif_rx_zc;
1551                eth_dev->tx_pkt_burst = eth_memif_tx_zc;
1552        } else {
1553                eth_dev->rx_pkt_burst = eth_memif_rx;
1554                eth_dev->tx_pkt_burst = eth_memif_tx;
1555        }
1556
1557        rte_eth_dev_probing_finish(eth_dev);
1558
1559        return 0;
1560}
1561
1562static int
1563memif_set_role(const char *key __rte_unused, const char *value,
1564               void *extra_args)
1565{
1566        enum memif_role_t *role = (enum memif_role_t *)extra_args;
1567
1568        if (strstr(value, "server") != NULL) {
1569                *role = MEMIF_ROLE_SERVER;
1570        } else if (strstr(value, "client") != NULL) {
1571                *role = MEMIF_ROLE_CLIENT;
1572        } else if (strstr(value, "master") != NULL) {
1573                MIF_LOG(NOTICE, "Role argument \"master\" is deprecated, use \"server\"");
1574                *role = MEMIF_ROLE_SERVER;
1575        } else if (strstr(value, "slave") != NULL) {
1576                MIF_LOG(NOTICE, "Role argument \"slave\" is deprecated, use \"client\"");
1577                *role = MEMIF_ROLE_CLIENT;
1578        } else {
1579                MIF_LOG(ERR, "Unknown role: %s.", value);
1580                return -EINVAL;
1581        }
1582        return 0;
1583}
1584
1585static int
1586memif_set_zc(const char *key __rte_unused, const char *value, void *extra_args)
1587{
1588        uint32_t *flags = (uint32_t *)extra_args;
1589
1590        if (strstr(value, "yes") != NULL) {
1591                if (!rte_mcfg_get_single_file_segments()) {
1592                        MIF_LOG(ERR, "Zero-copy doesn't support multi-file segments.");
1593                        return -ENOTSUP;
1594                }
1595                *flags |= ETH_MEMIF_FLAG_ZERO_COPY;
1596        } else if (strstr(value, "no") != NULL) {
1597                *flags &= ~ETH_MEMIF_FLAG_ZERO_COPY;
1598        } else {
1599                MIF_LOG(ERR, "Failed to parse zero-copy param: %s.", value);
1600                return -EINVAL;
1601        }
1602        return 0;
1603}
1604
1605static int
1606memif_set_id(const char *key __rte_unused, const char *value, void *extra_args)
1607{
1608        memif_interface_id_t *id = (memif_interface_id_t *)extra_args;
1609
1610        /* even if parsing fails, 0 is a valid id */
1611        *id = strtoul(value, NULL, 10);
1612        return 0;
1613}
1614
1615static int
1616memif_set_bs(const char *key __rte_unused, const char *value, void *extra_args)
1617{
1618        unsigned long tmp;
1619        uint16_t *pkt_buffer_size = (uint16_t *)extra_args;
1620
1621        tmp = strtoul(value, NULL, 10);
1622        if (tmp == 0 || tmp > 0xFFFF) {
1623                MIF_LOG(ERR, "Invalid buffer size: %s.", value);
1624                return -EINVAL;
1625        }
1626        *pkt_buffer_size = tmp;
1627        return 0;
1628}
1629
1630static int
1631memif_set_rs(const char *key __rte_unused, const char *value, void *extra_args)
1632{
1633        unsigned long tmp;
1634        memif_log2_ring_size_t *log2_ring_size =
1635            (memif_log2_ring_size_t *)extra_args;
1636
1637        tmp = strtoul(value, NULL, 10);
1638        if (tmp == 0 || tmp > ETH_MEMIF_MAX_LOG2_RING_SIZE) {
1639                MIF_LOG(ERR, "Invalid ring size: %s (max %u).",
1640                        value, ETH_MEMIF_MAX_LOG2_RING_SIZE);
1641                return -EINVAL;
1642        }
1643        *log2_ring_size = tmp;
1644        return 0;
1645}
1646
1647/* check if directory exists and if we have permission to read/write */
1648static int
1649memif_check_socket_filename(const char *filename)
1650{
1651        char *dir = NULL, *tmp;
1652        uint32_t idx;
1653        int ret = 0;
1654
1655        if (strlen(filename) >= MEMIF_SOCKET_UN_SIZE) {
1656                MIF_LOG(ERR, "Unix socket address too long (max 108).");
1657                return -1;
1658        }
1659
1660        tmp = strrchr(filename, '/');
1661        if (tmp != NULL) {
1662                idx = tmp - filename;
1663                dir = rte_zmalloc("memif_tmp", sizeof(char) * (idx + 1), 0);
1664                if (dir == NULL) {
1665                        MIF_LOG(ERR, "Failed to allocate memory.");
1666                        return -1;
1667                }
1668                strlcpy(dir, filename, sizeof(char) * (idx + 1));
1669        }
1670
1671        if (dir == NULL || (faccessat(-1, dir, F_OK | R_OK |
1672                                        W_OK, AT_EACCESS) < 0)) {
1673                MIF_LOG(ERR, "Invalid socket directory.");
1674                ret = -EINVAL;
1675        }
1676
1677        if (dir != NULL)
1678                rte_free(dir);
1679
1680        return ret;
1681}
1682
1683static int
1684memif_set_socket_filename(const char *key __rte_unused, const char *value,
1685                          void *extra_args)
1686{
1687        const char **socket_filename = (const char **)extra_args;
1688
1689        *socket_filename = value;
1690        return 0;
1691}
1692
1693static int
1694memif_set_is_socket_abstract(const char *key __rte_unused, const char *value, void *extra_args)
1695{
1696        uint32_t *flags = (uint32_t *)extra_args;
1697
1698        if (strstr(value, "yes") != NULL) {
1699                *flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1700        } else if (strstr(value, "no") != NULL) {
1701                *flags &= ~ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1702        } else {
1703                MIF_LOG(ERR, "Failed to parse socket-abstract param: %s.", value);
1704                return -EINVAL;
1705        }
1706        return 0;
1707}
1708
1709static int
1710memif_set_mac(const char *key __rte_unused, const char *value, void *extra_args)
1711{
1712        struct rte_ether_addr *ether_addr = (struct rte_ether_addr *)extra_args;
1713
1714        if (rte_ether_unformat_addr(value, ether_addr) < 0)
1715                MIF_LOG(WARNING, "Failed to parse mac '%s'.", value);
1716        return 0;
1717}
1718
1719static int
1720memif_set_secret(const char *key __rte_unused, const char *value, void *extra_args)
1721{
1722        const char **secret = (const char **)extra_args;
1723
1724        *secret = value;
1725        return 0;
1726}
1727
1728static int
1729rte_pmd_memif_probe(struct rte_vdev_device *vdev)
1730{
1731        RTE_BUILD_BUG_ON(sizeof(memif_msg_t) != 128);
1732        RTE_BUILD_BUG_ON(sizeof(memif_desc_t) != 16);
1733        int ret = 0;
1734        struct rte_kvargs *kvlist;
1735        const char *name = rte_vdev_device_name(vdev);
1736        enum memif_role_t role = MEMIF_ROLE_CLIENT;
1737        memif_interface_id_t id = 0;
1738        uint16_t pkt_buffer_size = ETH_MEMIF_DEFAULT_PKT_BUFFER_SIZE;
1739        memif_log2_ring_size_t log2_ring_size = ETH_MEMIF_DEFAULT_RING_SIZE;
1740        const char *socket_filename = ETH_MEMIF_DEFAULT_SOCKET_FILENAME;
1741        uint32_t flags = 0;
1742        const char *secret = NULL;
1743        struct rte_ether_addr *ether_addr = rte_zmalloc("",
1744                sizeof(struct rte_ether_addr), 0);
1745        struct rte_eth_dev *eth_dev;
1746
1747        rte_eth_random_addr(ether_addr->addr_bytes);
1748
1749        MIF_LOG(INFO, "Initialize MEMIF: %s.", name);
1750
1751        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1752                eth_dev = rte_eth_dev_attach_secondary(name);
1753                if (!eth_dev) {
1754                        MIF_LOG(ERR, "Failed to probe %s", name);
1755                        return -1;
1756                }
1757
1758                eth_dev->dev_ops = &ops;
1759                eth_dev->device = &vdev->device;
1760                eth_dev->rx_pkt_burst = eth_memif_rx;
1761                eth_dev->tx_pkt_burst = eth_memif_tx;
1762
1763                if (!rte_eal_primary_proc_alive(NULL)) {
1764                        MIF_LOG(ERR, "Primary process is missing");
1765                        return -1;
1766                }
1767
1768                eth_dev->process_private = (struct pmd_process_private *)
1769                        rte_zmalloc(name,
1770                                sizeof(struct pmd_process_private),
1771                                RTE_CACHE_LINE_SIZE);
1772                if (eth_dev->process_private == NULL) {
1773                        MIF_LOG(ERR,
1774                                "Failed to alloc memory for process private");
1775                        return -1;
1776                }
1777
1778                rte_eth_dev_probing_finish(eth_dev);
1779
1780                return 0;
1781        }
1782
1783        ret = rte_mp_action_register(MEMIF_MP_SEND_REGION, memif_mp_send_region);
1784        /*
1785         * Primary process can continue probing, but secondary process won't
1786         * be able to get memory regions information
1787         */
1788        if (ret < 0 && rte_errno != EEXIST)
1789                MIF_LOG(WARNING, "Failed to register mp action callback: %s",
1790                        strerror(rte_errno));
1791
1792        /* use abstract address by default */
1793        flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
1794
1795        kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_arguments);
1796
1797        /* parse parameters */
1798        if (kvlist != NULL) {
1799                ret = rte_kvargs_process(kvlist, ETH_MEMIF_ROLE_ARG,
1800                                         &memif_set_role, &role);
1801                if (ret < 0)
1802                        goto exit;
1803                ret = rte_kvargs_process(kvlist, ETH_MEMIF_ID_ARG,
1804                                         &memif_set_id, &id);
1805                if (ret < 0)
1806                        goto exit;
1807                ret = rte_kvargs_process(kvlist, ETH_MEMIF_PKT_BUFFER_SIZE_ARG,
1808                                         &memif_set_bs, &pkt_buffer_size);
1809                if (ret < 0)
1810                        goto exit;
1811                ret = rte_kvargs_process(kvlist, ETH_MEMIF_RING_SIZE_ARG,
1812                                         &memif_set_rs, &log2_ring_size);
1813                if (ret < 0)
1814                        goto exit;
1815                ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ARG,
1816                                         &memif_set_socket_filename,
1817                                         (void *)(&socket_filename));
1818                if (ret < 0)
1819                        goto exit;
1820                ret = rte_kvargs_process(kvlist, ETH_MEMIF_SOCKET_ABSTRACT_ARG,
1821                                         &memif_set_is_socket_abstract, &flags);
1822                if (ret < 0)
1823                        goto exit;
1824                ret = rte_kvargs_process(kvlist, ETH_MEMIF_MAC_ARG,
1825                                         &memif_set_mac, ether_addr);
1826                if (ret < 0)
1827                        goto exit;
1828                ret = rte_kvargs_process(kvlist, ETH_MEMIF_ZC_ARG,
1829                                         &memif_set_zc, &flags);
1830                if (ret < 0)
1831                        goto exit;
1832                ret = rte_kvargs_process(kvlist, ETH_MEMIF_SECRET_ARG,
1833                                         &memif_set_secret, (void *)(&secret));
1834                if (ret < 0)
1835                        goto exit;
1836        }
1837
1838        if (!(flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT)) {
1839                ret = memif_check_socket_filename(socket_filename);
1840                if (ret < 0)
1841                        goto exit;
1842        }
1843
1844        /* create interface */
1845        ret = memif_create(vdev, role, id, flags, socket_filename,
1846                           log2_ring_size, pkt_buffer_size, secret, ether_addr);
1847
1848exit:
1849        if (kvlist != NULL)
1850                rte_kvargs_free(kvlist);
1851        return ret;
1852}
1853
1854static int
1855rte_pmd_memif_remove(struct rte_vdev_device *vdev)
1856{
1857        struct rte_eth_dev *eth_dev;
1858
1859        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
1860        if (eth_dev == NULL)
1861                return 0;
1862
1863        return rte_eth_dev_close(eth_dev->data->port_id);
1864}
1865
1866static struct rte_vdev_driver pmd_memif_drv = {
1867        .probe = rte_pmd_memif_probe,
1868        .remove = rte_pmd_memif_remove,
1869};
1870
1871RTE_PMD_REGISTER_VDEV(net_memif, pmd_memif_drv);
1872
1873RTE_PMD_REGISTER_PARAM_STRING(net_memif,
1874                              ETH_MEMIF_ID_ARG "=<int>"
1875                              ETH_MEMIF_ROLE_ARG "=server|client"
1876                              ETH_MEMIF_PKT_BUFFER_SIZE_ARG "=<int>"
1877                              ETH_MEMIF_RING_SIZE_ARG "=<int>"
1878                              ETH_MEMIF_SOCKET_ARG "=<string>"
1879                                  ETH_MEMIF_SOCKET_ABSTRACT_ARG "=yes|no"
1880                              ETH_MEMIF_MAC_ARG "=xx:xx:xx:xx:xx:xx"
1881                              ETH_MEMIF_ZC_ARG "=yes|no"
1882                              ETH_MEMIF_SECRET_ARG "=<string>");
1883
1884RTE_LOG_REGISTER_DEFAULT(memif_logtype, NOTICE);
1885