dpdk/drivers/net/pcap/pcap_ethdev.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation.
   3 * Copyright(c) 2014 6WIND S.A.
   4 * All rights reserved.
   5 */
   6
   7#include <time.h>
   8
   9#include <pcap.h>
  10
  11#include <rte_cycles.h>
  12#include <ethdev_driver.h>
  13#include <ethdev_vdev.h>
  14#include <rte_kvargs.h>
  15#include <rte_malloc.h>
  16#include <rte_mbuf.h>
  17#include <rte_mbuf_dyn.h>
  18#include <rte_bus_vdev.h>
  19#include <rte_os_shim.h>
  20
  21#include "pcap_osdep.h"
  22
  23#define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
  24#define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN
  25#define RTE_ETH_PCAP_PROMISC 1
  26#define RTE_ETH_PCAP_TIMEOUT -1
  27
  28#define ETH_PCAP_RX_PCAP_ARG  "rx_pcap"
  29#define ETH_PCAP_TX_PCAP_ARG  "tx_pcap"
  30#define ETH_PCAP_RX_IFACE_ARG "rx_iface"
  31#define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
  32#define ETH_PCAP_TX_IFACE_ARG "tx_iface"
  33#define ETH_PCAP_IFACE_ARG    "iface"
  34#define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
  35#define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
  36
  37#define ETH_PCAP_ARG_MAXLEN     64
  38
  39#define RTE_PMD_PCAP_MAX_QUEUES 16
  40
  41static char errbuf[PCAP_ERRBUF_SIZE];
  42static struct timespec start_time;
  43static uint64_t start_cycles;
  44static uint64_t hz;
  45static uint8_t iface_idx;
  46
  47static uint64_t timestamp_rx_dynflag;
  48static int timestamp_dynfield_offset = -1;
  49
  50struct queue_stat {
  51        volatile unsigned long pkts;
  52        volatile unsigned long bytes;
  53        volatile unsigned long err_pkts;
  54        volatile unsigned long rx_nombuf;
  55};
  56
  57struct queue_missed_stat {
  58        /* last value retrieved from pcap */
  59        unsigned int pcap;
  60        /* stores values lost by pcap stop or rollover */
  61        unsigned long mnemonic;
  62        /* value on last reset */
  63        unsigned long reset;
  64};
  65
  66struct pcap_rx_queue {
  67        uint16_t port_id;
  68        uint16_t queue_id;
  69        struct rte_mempool *mb_pool;
  70        struct queue_stat rx_stat;
  71        struct queue_missed_stat missed_stat;
  72        char name[PATH_MAX];
  73        char type[ETH_PCAP_ARG_MAXLEN];
  74
  75        /* Contains pre-generated packets to be looped through */
  76        struct rte_ring *pkts;
  77};
  78
  79struct pcap_tx_queue {
  80        uint16_t port_id;
  81        uint16_t queue_id;
  82        struct queue_stat tx_stat;
  83        char name[PATH_MAX];
  84        char type[ETH_PCAP_ARG_MAXLEN];
  85};
  86
  87struct pmd_internals {
  88        struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
  89        struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
  90        char devargs[ETH_PCAP_ARG_MAXLEN];
  91        struct rte_ether_addr eth_addr;
  92        int if_index;
  93        int single_iface;
  94        int phy_mac;
  95        unsigned int infinite_rx;
  96};
  97
  98struct pmd_process_private {
  99        pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
 100        pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
 101        pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
 102};
 103
 104struct pmd_devargs {
 105        unsigned int num_of_queue;
 106        struct devargs_queue {
 107                pcap_dumper_t *dumper;
 108                pcap_t *pcap;
 109                const char *name;
 110                const char *type;
 111        } queue[RTE_PMD_PCAP_MAX_QUEUES];
 112        int phy_mac;
 113};
 114
 115struct pmd_devargs_all {
 116        struct pmd_devargs rx_queues;
 117        struct pmd_devargs tx_queues;
 118        int single_iface;
 119        unsigned int is_tx_pcap;
 120        unsigned int is_tx_iface;
 121        unsigned int is_rx_pcap;
 122        unsigned int is_rx_iface;
 123        unsigned int infinite_rx;
 124};
 125
 126static const char *valid_arguments[] = {
 127        ETH_PCAP_RX_PCAP_ARG,
 128        ETH_PCAP_TX_PCAP_ARG,
 129        ETH_PCAP_RX_IFACE_ARG,
 130        ETH_PCAP_RX_IFACE_IN_ARG,
 131        ETH_PCAP_TX_IFACE_ARG,
 132        ETH_PCAP_IFACE_ARG,
 133        ETH_PCAP_PHY_MAC_ARG,
 134        ETH_PCAP_INFINITE_RX_ARG,
 135        NULL
 136};
 137
 138static struct rte_eth_link pmd_link = {
 139                .link_speed = RTE_ETH_SPEED_NUM_10G,
 140                .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
 141                .link_status = RTE_ETH_LINK_DOWN,
 142                .link_autoneg = RTE_ETH_LINK_FIXED,
 143};
 144
 145RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
 146
 147static struct queue_missed_stat*
 148queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
 149{
 150        struct pmd_internals *internals = dev->data->dev_private;
 151        struct queue_missed_stat *missed_stat =
 152                        &internals->rx_queue[qid].missed_stat;
 153        const struct pmd_process_private *pp = dev->process_private;
 154        pcap_t *pcap = pp->rx_pcap[qid];
 155        struct pcap_stat stat;
 156
 157        if (!pcap || (pcap_stats(pcap, &stat) != 0))
 158                return missed_stat;
 159
 160        /* rollover check - best effort fixup assuming single rollover */
 161        if (stat.ps_drop < missed_stat->pcap)
 162                missed_stat->mnemonic += UINT_MAX;
 163        missed_stat->pcap = stat.ps_drop;
 164
 165        return missed_stat;
 166}
 167
 168static void
 169queue_missed_stat_on_stop_update(struct rte_eth_dev *dev, unsigned int qid)
 170{
 171        struct queue_missed_stat *missed_stat =
 172                        queue_missed_stat_update(dev, qid);
 173
 174        missed_stat->mnemonic += missed_stat->pcap;
 175        missed_stat->pcap = 0;
 176}
 177
 178static void
 179queue_missed_stat_reset(struct rte_eth_dev *dev, unsigned int qid)
 180{
 181        struct queue_missed_stat *missed_stat =
 182                        queue_missed_stat_update(dev, qid);
 183
 184        missed_stat->reset = missed_stat->pcap;
 185        missed_stat->mnemonic = 0;
 186}
 187
 188static unsigned long
 189queue_missed_stat_get(struct rte_eth_dev *dev, unsigned int qid)
 190{
 191        const struct queue_missed_stat *missed_stat =
 192                        queue_missed_stat_update(dev, qid);
 193
 194        return missed_stat->pcap + missed_stat->mnemonic - missed_stat->reset;
 195}
 196
 197static int
 198eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
 199                const u_char *data, uint16_t data_len)
 200{
 201        /* Copy the first segment. */
 202        uint16_t len = rte_pktmbuf_tailroom(mbuf);
 203        struct rte_mbuf *m = mbuf;
 204
 205        rte_memcpy(rte_pktmbuf_append(mbuf, len), data, len);
 206        data_len -= len;
 207        data += len;
 208
 209        while (data_len > 0) {
 210                /* Allocate next mbuf and point to that. */
 211                m->next = rte_pktmbuf_alloc(mb_pool);
 212
 213                if (unlikely(!m->next))
 214                        return -1;
 215
 216                m = m->next;
 217
 218                /* Headroom is not needed in chained mbufs. */
 219                rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
 220                m->pkt_len = 0;
 221                m->data_len = 0;
 222
 223                /* Copy next segment. */
 224                len = RTE_MIN(rte_pktmbuf_tailroom(m), data_len);
 225                rte_memcpy(rte_pktmbuf_append(m, len), data, len);
 226
 227                mbuf->nb_segs++;
 228                data_len -= len;
 229                data += len;
 230        }
 231
 232        return mbuf->nb_segs;
 233}
 234
 235static uint16_t
 236eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 237{
 238        int i;
 239        struct pcap_rx_queue *pcap_q = queue;
 240        uint32_t rx_bytes = 0;
 241
 242        if (unlikely(nb_pkts == 0))
 243                return 0;
 244
 245        if (rte_pktmbuf_alloc_bulk(pcap_q->mb_pool, bufs, nb_pkts) != 0)
 246                return 0;
 247
 248        for (i = 0; i < nb_pkts; i++) {
 249                struct rte_mbuf *pcap_buf;
 250                int err = rte_ring_dequeue(pcap_q->pkts, (void **)&pcap_buf);
 251                if (err)
 252                        return i;
 253
 254                rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 255                                rte_pktmbuf_mtod(pcap_buf, void *),
 256                                pcap_buf->data_len);
 257                bufs[i]->data_len = pcap_buf->data_len;
 258                bufs[i]->pkt_len = pcap_buf->pkt_len;
 259                bufs[i]->port = pcap_q->port_id;
 260                rx_bytes += pcap_buf->data_len;
 261
 262                /* Enqueue packet back on ring to allow infinite rx. */
 263                rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 264        }
 265
 266        pcap_q->rx_stat.pkts += i;
 267        pcap_q->rx_stat.bytes += rx_bytes;
 268
 269        return i;
 270}
 271
 272static uint16_t
 273eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 274{
 275        unsigned int i;
 276        struct pcap_pkthdr header;
 277        struct pmd_process_private *pp;
 278        const u_char *packet;
 279        struct rte_mbuf *mbuf;
 280        struct pcap_rx_queue *pcap_q = queue;
 281        uint16_t num_rx = 0;
 282        uint32_t rx_bytes = 0;
 283        pcap_t *pcap;
 284
 285        pp = rte_eth_devices[pcap_q->port_id].process_private;
 286        pcap = pp->rx_pcap[pcap_q->queue_id];
 287
 288        if (unlikely(pcap == NULL || nb_pkts == 0))
 289                return 0;
 290
 291        /* Reads the given number of packets from the pcap file one by one
 292         * and copies the packet data into a newly allocated mbuf to return.
 293         */
 294        for (i = 0; i < nb_pkts; i++) {
 295                /* Get the next PCAP packet */
 296                packet = pcap_next(pcap, &header);
 297                if (unlikely(packet == NULL))
 298                        break;
 299
 300                mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 301                if (unlikely(mbuf == NULL)) {
 302                        pcap_q->rx_stat.rx_nombuf++;
 303                        break;
 304                }
 305
 306                if (header.caplen <= rte_pktmbuf_tailroom(mbuf)) {
 307                        /* pcap packet will fit in the mbuf, can copy it */
 308                        rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
 309                                        header.caplen);
 310                        mbuf->data_len = (uint16_t)header.caplen;
 311                } else {
 312                        /* Try read jumbo frame into multi mbufs. */
 313                        if (unlikely(eth_pcap_rx_jumbo(pcap_q->mb_pool,
 314                                                       mbuf,
 315                                                       packet,
 316                                                       header.caplen) == -1)) {
 317                                pcap_q->rx_stat.err_pkts++;
 318                                rte_pktmbuf_free(mbuf);
 319                                break;
 320                        }
 321                }
 322
 323                mbuf->pkt_len = (uint16_t)header.caplen;
 324                *RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset,
 325                        rte_mbuf_timestamp_t *) =
 326                                (uint64_t)header.ts.tv_sec * 1000000 +
 327                                header.ts.tv_usec;
 328                mbuf->ol_flags |= timestamp_rx_dynflag;
 329                mbuf->port = pcap_q->port_id;
 330                bufs[num_rx] = mbuf;
 331                num_rx++;
 332                rx_bytes += header.caplen;
 333        }
 334        pcap_q->rx_stat.pkts += num_rx;
 335        pcap_q->rx_stat.bytes += rx_bytes;
 336
 337        return num_rx;
 338}
 339
 340static uint16_t
 341eth_null_rx(void *queue __rte_unused,
 342                struct rte_mbuf **bufs __rte_unused,
 343                uint16_t nb_pkts __rte_unused)
 344{
 345        return 0;
 346}
 347
 348#define NSEC_PER_SEC    1000000000L
 349
 350/*
 351 * This function stores nanoseconds in `tv_usec` field of `struct timeval`,
 352 * because `ts` goes directly to nanosecond-precision dump.
 353 */
 354static inline void
 355calculate_timestamp(struct timeval *ts) {
 356        uint64_t cycles;
 357        struct timespec cur_time;
 358
 359        cycles = rte_get_timer_cycles() - start_cycles;
 360        cur_time.tv_sec = cycles / hz;
 361        cur_time.tv_nsec = (cycles % hz) * NSEC_PER_SEC / hz;
 362
 363        ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
 364        ts->tv_usec = start_time.tv_nsec + cur_time.tv_nsec;
 365        if (ts->tv_usec >= NSEC_PER_SEC) {
 366                ts->tv_usec -= NSEC_PER_SEC;
 367                ts->tv_sec += 1;
 368        }
 369}
 370
 371/*
 372 * Callback to handle writing packets to a pcap file.
 373 */
 374static uint16_t
 375eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 376{
 377        unsigned int i;
 378        struct rte_mbuf *mbuf;
 379        struct pmd_process_private *pp;
 380        struct pcap_tx_queue *dumper_q = queue;
 381        uint16_t num_tx = 0;
 382        uint32_t tx_bytes = 0;
 383        struct pcap_pkthdr header;
 384        pcap_dumper_t *dumper;
 385        unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 386        size_t len, caplen;
 387
 388        pp = rte_eth_devices[dumper_q->port_id].process_private;
 389        dumper = pp->tx_dumper[dumper_q->queue_id];
 390
 391        if (dumper == NULL || nb_pkts == 0)
 392                return 0;
 393
 394        /* writes the nb_pkts packets to the previously opened pcap file
 395         * dumper */
 396        for (i = 0; i < nb_pkts; i++) {
 397                mbuf = bufs[i];
 398                len = caplen = rte_pktmbuf_pkt_len(mbuf);
 399                if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 400                                len > sizeof(temp_data))) {
 401                        caplen = sizeof(temp_data);
 402                }
 403
 404                calculate_timestamp(&header.ts);
 405                header.len = len;
 406                header.caplen = caplen;
 407                /* rte_pktmbuf_read() returns a pointer to the data directly
 408                 * in the mbuf (when the mbuf is contiguous) or, otherwise,
 409                 * a pointer to temp_data after copying into it.
 410                 */
 411                pcap_dump((u_char *)dumper, &header,
 412                        rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 413
 414                num_tx++;
 415                tx_bytes += caplen;
 416                rte_pktmbuf_free(mbuf);
 417        }
 418
 419        /*
 420         * Since there's no place to hook a callback when the forwarding
 421         * process stops and to make sure the pcap file is actually written,
 422         * we flush the pcap dumper within each burst.
 423         */
 424        pcap_dump_flush(dumper);
 425        dumper_q->tx_stat.pkts += num_tx;
 426        dumper_q->tx_stat.bytes += tx_bytes;
 427        dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 428
 429        return nb_pkts;
 430}
 431
 432/*
 433 * Callback to handle dropping packets in the infinite rx case.
 434 */
 435static uint16_t
 436eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 437{
 438        unsigned int i;
 439        uint32_t tx_bytes = 0;
 440        struct pcap_tx_queue *tx_queue = queue;
 441
 442        if (unlikely(nb_pkts == 0))
 443                return 0;
 444
 445        for (i = 0; i < nb_pkts; i++) {
 446                tx_bytes += bufs[i]->pkt_len;
 447                rte_pktmbuf_free(bufs[i]);
 448        }
 449
 450        tx_queue->tx_stat.pkts += nb_pkts;
 451        tx_queue->tx_stat.bytes += tx_bytes;
 452
 453        return i;
 454}
 455
 456/*
 457 * Callback to handle sending packets through a real NIC.
 458 */
 459static uint16_t
 460eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 461{
 462        unsigned int i;
 463        int ret;
 464        struct rte_mbuf *mbuf;
 465        struct pmd_process_private *pp;
 466        struct pcap_tx_queue *tx_queue = queue;
 467        uint16_t num_tx = 0;
 468        uint32_t tx_bytes = 0;
 469        pcap_t *pcap;
 470        unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 471        size_t len;
 472
 473        pp = rte_eth_devices[tx_queue->port_id].process_private;
 474        pcap = pp->tx_pcap[tx_queue->queue_id];
 475
 476        if (unlikely(nb_pkts == 0 || pcap == NULL))
 477                return 0;
 478
 479        for (i = 0; i < nb_pkts; i++) {
 480                mbuf = bufs[i];
 481                len = rte_pktmbuf_pkt_len(mbuf);
 482                if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 483                                len > sizeof(temp_data))) {
 484                        PMD_LOG(ERR,
 485                                "Dropping multi segment PCAP packet. Size (%zd) > max size (%zd).",
 486                                len, sizeof(temp_data));
 487                        rte_pktmbuf_free(mbuf);
 488                        continue;
 489                }
 490
 491                /* rte_pktmbuf_read() returns a pointer to the data directly
 492                 * in the mbuf (when the mbuf is contiguous) or, otherwise,
 493                 * a pointer to temp_data after copying into it.
 494                 */
 495                ret = pcap_sendpacket(pcap,
 496                        rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 497                if (unlikely(ret != 0))
 498                        break;
 499                num_tx++;
 500                tx_bytes += len;
 501                rte_pktmbuf_free(mbuf);
 502        }
 503
 504        tx_queue->tx_stat.pkts += num_tx;
 505        tx_queue->tx_stat.bytes += tx_bytes;
 506        tx_queue->tx_stat.err_pkts += i - num_tx;
 507
 508        return i;
 509}
 510
 511/*
 512 * pcap_open_live wrapper function
 513 */
 514static inline int
 515open_iface_live(const char *iface, pcap_t **pcap) {
 516        *pcap = pcap_open_live(iface, RTE_ETH_PCAP_SNAPLEN,
 517                        RTE_ETH_PCAP_PROMISC, RTE_ETH_PCAP_TIMEOUT, errbuf);
 518
 519        if (*pcap == NULL) {
 520                PMD_LOG(ERR, "Couldn't open %s: %s", iface, errbuf);
 521                return -1;
 522        }
 523
 524        return 0;
 525}
 526
 527static int
 528open_single_iface(const char *iface, pcap_t **pcap)
 529{
 530        if (open_iface_live(iface, pcap) < 0) {
 531                PMD_LOG(ERR, "Couldn't open interface %s", iface);
 532                return -1;
 533        }
 534
 535        return 0;
 536}
 537
 538static int
 539open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
 540{
 541        pcap_t *tx_pcap;
 542
 543        /*
 544         * We need to create a dummy empty pcap_t to use it
 545         * with pcap_dump_open(). We create big enough an Ethernet
 546         * pcap holder.
 547         */
 548        tx_pcap = pcap_open_dead_with_tstamp_precision(DLT_EN10MB,
 549                        RTE_ETH_PCAP_SNAPSHOT_LEN, PCAP_TSTAMP_PRECISION_NANO);
 550        if (tx_pcap == NULL) {
 551                PMD_LOG(ERR, "Couldn't create dead pcap");
 552                return -1;
 553        }
 554
 555        /* The dumper is created using the previous pcap_t reference */
 556        *dumper = pcap_dump_open(tx_pcap, pcap_filename);
 557        if (*dumper == NULL) {
 558                pcap_close(tx_pcap);
 559                PMD_LOG(ERR, "Couldn't open %s for writing.",
 560                        pcap_filename);
 561                return -1;
 562        }
 563
 564        pcap_close(tx_pcap);
 565        return 0;
 566}
 567
 568static int
 569open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
 570{
 571        *pcap = pcap_open_offline(pcap_filename, errbuf);
 572        if (*pcap == NULL) {
 573                PMD_LOG(ERR, "Couldn't open %s: %s", pcap_filename,
 574                        errbuf);
 575                return -1;
 576        }
 577
 578        return 0;
 579}
 580
 581static uint64_t
 582count_packets_in_pcap(pcap_t **pcap, struct pcap_rx_queue *pcap_q)
 583{
 584        const u_char *packet;
 585        struct pcap_pkthdr header;
 586        uint64_t pcap_pkt_count = 0;
 587
 588        while ((packet = pcap_next(*pcap, &header)))
 589                pcap_pkt_count++;
 590
 591        /* The pcap is reopened so it can be used as normal later. */
 592        pcap_close(*pcap);
 593        *pcap = NULL;
 594        open_single_rx_pcap(pcap_q->name, pcap);
 595
 596        return pcap_pkt_count;
 597}
 598
 599static int
 600eth_dev_start(struct rte_eth_dev *dev)
 601{
 602        unsigned int i;
 603        struct pmd_internals *internals = dev->data->dev_private;
 604        struct pmd_process_private *pp = dev->process_private;
 605        struct pcap_tx_queue *tx;
 606        struct pcap_rx_queue *rx;
 607
 608        /* Special iface case. Single pcap is open and shared between tx/rx. */
 609        if (internals->single_iface) {
 610                tx = &internals->tx_queue[0];
 611                rx = &internals->rx_queue[0];
 612
 613                if (!pp->tx_pcap[0] &&
 614                        strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
 615                        if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
 616                                return -1;
 617                        pp->rx_pcap[0] = pp->tx_pcap[0];
 618                }
 619
 620                goto status_up;
 621        }
 622
 623        /* If not open already, open tx pcaps/dumpers */
 624        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 625                tx = &internals->tx_queue[i];
 626
 627                if (!pp->tx_dumper[i] &&
 628                                strcmp(tx->type, ETH_PCAP_TX_PCAP_ARG) == 0) {
 629                        if (open_single_tx_pcap(tx->name,
 630                                &pp->tx_dumper[i]) < 0)
 631                                return -1;
 632                } else if (!pp->tx_pcap[i] &&
 633                                strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
 634                        if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
 635                                return -1;
 636                }
 637        }
 638
 639        /* If not open already, open rx pcaps */
 640        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 641                rx = &internals->rx_queue[i];
 642
 643                if (pp->rx_pcap[i] != NULL)
 644                        continue;
 645
 646                if (strcmp(rx->type, ETH_PCAP_RX_PCAP_ARG) == 0) {
 647                        if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
 648                                return -1;
 649                } else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
 650                        if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
 651                                return -1;
 652                }
 653        }
 654
 655status_up:
 656        for (i = 0; i < dev->data->nb_rx_queues; i++)
 657                dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
 658
 659        for (i = 0; i < dev->data->nb_tx_queues; i++)
 660                dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
 661
 662        dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
 663
 664        return 0;
 665}
 666
 667/*
 668 * This function gets called when the current port gets stopped.
 669 * Is the only place for us to close all the tx streams dumpers.
 670 * If not called the dumpers will be flushed within each tx burst.
 671 */
 672static int
 673eth_dev_stop(struct rte_eth_dev *dev)
 674{
 675        unsigned int i;
 676        struct pmd_internals *internals = dev->data->dev_private;
 677        struct pmd_process_private *pp = dev->process_private;
 678
 679        /* Special iface case. Single pcap is open and shared between tx/rx. */
 680        if (internals->single_iface) {
 681                queue_missed_stat_on_stop_update(dev, 0);
 682                if (pp->tx_pcap[0] != NULL) {
 683                        pcap_close(pp->tx_pcap[0]);
 684                        pp->tx_pcap[0] = NULL;
 685                        pp->rx_pcap[0] = NULL;
 686                }
 687                goto status_down;
 688        }
 689
 690        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 691                if (pp->tx_dumper[i] != NULL) {
 692                        pcap_dump_close(pp->tx_dumper[i]);
 693                        pp->tx_dumper[i] = NULL;
 694                }
 695
 696                if (pp->tx_pcap[i] != NULL) {
 697                        pcap_close(pp->tx_pcap[i]);
 698                        pp->tx_pcap[i] = NULL;
 699                }
 700        }
 701
 702        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 703                if (pp->rx_pcap[i] != NULL) {
 704                        queue_missed_stat_on_stop_update(dev, i);
 705                        pcap_close(pp->rx_pcap[i]);
 706                        pp->rx_pcap[i] = NULL;
 707                }
 708        }
 709
 710status_down:
 711        for (i = 0; i < dev->data->nb_rx_queues; i++)
 712                dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 713
 714        for (i = 0; i < dev->data->nb_tx_queues; i++)
 715                dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 716
 717        dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
 718
 719        return 0;
 720}
 721
 722static int
 723eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
 724{
 725        return 0;
 726}
 727
 728static int
 729eth_dev_info(struct rte_eth_dev *dev,
 730                struct rte_eth_dev_info *dev_info)
 731{
 732        struct pmd_internals *internals = dev->data->dev_private;
 733
 734        dev_info->if_index = internals->if_index;
 735        dev_info->max_mac_addrs = 1;
 736        dev_info->max_rx_pktlen = (uint32_t) -1;
 737        dev_info->max_rx_queues = dev->data->nb_rx_queues;
 738        dev_info->max_tx_queues = dev->data->nb_tx_queues;
 739        dev_info->min_rx_bufsize = 0;
 740
 741        return 0;
 742}
 743
 744static int
 745eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 746{
 747        unsigned int i;
 748        unsigned long rx_packets_total = 0, rx_bytes_total = 0;
 749        unsigned long rx_missed_total = 0;
 750        unsigned long rx_nombuf_total = 0, rx_err_total = 0;
 751        unsigned long tx_packets_total = 0, tx_bytes_total = 0;
 752        unsigned long tx_packets_err_total = 0;
 753        const struct pmd_internals *internal = dev->data->dev_private;
 754
 755        for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 756                        i < dev->data->nb_rx_queues; i++) {
 757                stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
 758                stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
 759                rx_nombuf_total += internal->rx_queue[i].rx_stat.rx_nombuf;
 760                rx_err_total += internal->rx_queue[i].rx_stat.err_pkts;
 761                rx_packets_total += stats->q_ipackets[i];
 762                rx_bytes_total += stats->q_ibytes[i];
 763                rx_missed_total += queue_missed_stat_get(dev, i);
 764        }
 765
 766        for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 767                        i < dev->data->nb_tx_queues; i++) {
 768                stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
 769                stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
 770                tx_packets_total += stats->q_opackets[i];
 771                tx_bytes_total += stats->q_obytes[i];
 772                tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
 773        }
 774
 775        stats->ipackets = rx_packets_total;
 776        stats->ibytes = rx_bytes_total;
 777        stats->imissed = rx_missed_total;
 778        stats->ierrors = rx_err_total;
 779        stats->rx_nombuf = rx_nombuf_total;
 780        stats->opackets = tx_packets_total;
 781        stats->obytes = tx_bytes_total;
 782        stats->oerrors = tx_packets_err_total;
 783
 784        return 0;
 785}
 786
 787static int
 788eth_stats_reset(struct rte_eth_dev *dev)
 789{
 790        unsigned int i;
 791        struct pmd_internals *internal = dev->data->dev_private;
 792
 793        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 794                internal->rx_queue[i].rx_stat.pkts = 0;
 795                internal->rx_queue[i].rx_stat.bytes = 0;
 796                internal->rx_queue[i].rx_stat.err_pkts = 0;
 797                internal->rx_queue[i].rx_stat.rx_nombuf = 0;
 798                queue_missed_stat_reset(dev, i);
 799        }
 800
 801        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 802                internal->tx_queue[i].tx_stat.pkts = 0;
 803                internal->tx_queue[i].tx_stat.bytes = 0;
 804                internal->tx_queue[i].tx_stat.err_pkts = 0;
 805        }
 806
 807        return 0;
 808}
 809
 810static inline void
 811infinite_rx_ring_free(struct rte_ring *pkts)
 812{
 813        struct rte_mbuf *bufs;
 814
 815        while (!rte_ring_dequeue(pkts, (void **)&bufs))
 816                rte_pktmbuf_free(bufs);
 817
 818        rte_ring_free(pkts);
 819}
 820
 821static int
 822eth_dev_close(struct rte_eth_dev *dev)
 823{
 824        unsigned int i;
 825        struct pmd_internals *internals = dev->data->dev_private;
 826
 827        PMD_LOG(INFO, "Closing pcap ethdev on NUMA socket %d",
 828                        rte_socket_id());
 829
 830        eth_dev_stop(dev);
 831
 832        rte_free(dev->process_private);
 833
 834        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 835                return 0;
 836
 837        /* Device wide flag, but cleanup must be performed per queue. */
 838        if (internals->infinite_rx) {
 839                for (i = 0; i < dev->data->nb_rx_queues; i++) {
 840                        struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
 841
 842                        /*
 843                         * 'pcap_q->pkts' can be NULL if 'eth_dev_close()'
 844                         * called before 'eth_rx_queue_setup()' has been called
 845                         */
 846                        if (pcap_q->pkts == NULL)
 847                                continue;
 848
 849                        infinite_rx_ring_free(pcap_q->pkts);
 850                }
 851        }
 852
 853        if (internals->phy_mac == 0)
 854                /* not dynamically allocated, must not be freed */
 855                dev->data->mac_addrs = NULL;
 856
 857        return 0;
 858}
 859
 860static int
 861eth_link_update(struct rte_eth_dev *dev __rte_unused,
 862                int wait_to_complete __rte_unused)
 863{
 864        return 0;
 865}
 866
 867static int
 868eth_rx_queue_setup(struct rte_eth_dev *dev,
 869                uint16_t rx_queue_id,
 870                uint16_t nb_rx_desc __rte_unused,
 871                unsigned int socket_id __rte_unused,
 872                const struct rte_eth_rxconf *rx_conf __rte_unused,
 873                struct rte_mempool *mb_pool)
 874{
 875        struct pmd_internals *internals = dev->data->dev_private;
 876        struct pcap_rx_queue *pcap_q = &internals->rx_queue[rx_queue_id];
 877
 878        pcap_q->mb_pool = mb_pool;
 879        pcap_q->port_id = dev->data->port_id;
 880        pcap_q->queue_id = rx_queue_id;
 881        dev->data->rx_queues[rx_queue_id] = pcap_q;
 882
 883        if (internals->infinite_rx) {
 884                struct pmd_process_private *pp;
 885                char ring_name[RTE_RING_NAMESIZE];
 886                static uint32_t ring_number;
 887                uint64_t pcap_pkt_count = 0;
 888                struct rte_mbuf *bufs[1];
 889                pcap_t **pcap;
 890
 891                pp = rte_eth_devices[pcap_q->port_id].process_private;
 892                pcap = &pp->rx_pcap[pcap_q->queue_id];
 893
 894                if (unlikely(*pcap == NULL))
 895                        return -ENOENT;
 896
 897                pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q);
 898
 899                snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu32,
 900                                ring_number);
 901
 902                pcap_q->pkts = rte_ring_create(ring_name,
 903                                rte_align64pow2(pcap_pkt_count + 1), 0,
 904                                RING_F_SP_ENQ | RING_F_SC_DEQ);
 905                ring_number++;
 906                if (!pcap_q->pkts)
 907                        return -ENOENT;
 908
 909                /* Fill ring with packets from PCAP file one by one. */
 910                while (eth_pcap_rx(pcap_q, bufs, 1)) {
 911                        /* Check for multiseg mbufs. */
 912                        if (bufs[0]->nb_segs != 1) {
 913                                infinite_rx_ring_free(pcap_q->pkts);
 914                                PMD_LOG(ERR,
 915                                        "Multiseg mbufs are not supported in infinite_rx mode.");
 916                                return -EINVAL;
 917                        }
 918
 919                        rte_ring_enqueue_bulk(pcap_q->pkts,
 920                                        (void * const *)bufs, 1, NULL);
 921                }
 922
 923                if (rte_ring_count(pcap_q->pkts) < pcap_pkt_count) {
 924                        infinite_rx_ring_free(pcap_q->pkts);
 925                        PMD_LOG(ERR,
 926                                "Not enough mbufs to accommodate packets in pcap file. "
 927                                "At least %" PRIu64 " mbufs per queue is required.",
 928                                pcap_pkt_count);
 929                        return -EINVAL;
 930                }
 931
 932                /*
 933                 * Reset the stats for this queue since eth_pcap_rx calls above
 934                 * didn't result in the application receiving packets.
 935                 */
 936                pcap_q->rx_stat.pkts = 0;
 937                pcap_q->rx_stat.bytes = 0;
 938        }
 939
 940        return 0;
 941}
 942
 943static int
 944eth_tx_queue_setup(struct rte_eth_dev *dev,
 945                uint16_t tx_queue_id,
 946                uint16_t nb_tx_desc __rte_unused,
 947                unsigned int socket_id __rte_unused,
 948                const struct rte_eth_txconf *tx_conf __rte_unused)
 949{
 950        struct pmd_internals *internals = dev->data->dev_private;
 951        struct pcap_tx_queue *pcap_q = &internals->tx_queue[tx_queue_id];
 952
 953        pcap_q->port_id = dev->data->port_id;
 954        pcap_q->queue_id = tx_queue_id;
 955        dev->data->tx_queues[tx_queue_id] = pcap_q;
 956
 957        return 0;
 958}
 959
 960static int
 961eth_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 962{
 963        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 964
 965        return 0;
 966}
 967
 968static int
 969eth_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 970{
 971        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 972
 973        return 0;
 974}
 975
 976static int
 977eth_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 978{
 979        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 980
 981        return 0;
 982}
 983
 984static int
 985eth_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 986{
 987        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 988
 989        return 0;
 990}
 991
 992static const struct eth_dev_ops ops = {
 993        .dev_start = eth_dev_start,
 994        .dev_stop = eth_dev_stop,
 995        .dev_close = eth_dev_close,
 996        .dev_configure = eth_dev_configure,
 997        .dev_infos_get = eth_dev_info,
 998        .rx_queue_setup = eth_rx_queue_setup,
 999        .tx_queue_setup = eth_tx_queue_setup,
1000        .rx_queue_start = eth_rx_queue_start,
1001        .tx_queue_start = eth_tx_queue_start,
1002        .rx_queue_stop = eth_rx_queue_stop,
1003        .tx_queue_stop = eth_tx_queue_stop,
1004        .link_update = eth_link_update,
1005        .stats_get = eth_stats_get,
1006        .stats_reset = eth_stats_reset,
1007};
1008
1009static int
1010add_queue(struct pmd_devargs *pmd, const char *name, const char *type,
1011                pcap_t *pcap, pcap_dumper_t *dumper)
1012{
1013        if (pmd->num_of_queue >= RTE_PMD_PCAP_MAX_QUEUES)
1014                return -1;
1015        if (pcap)
1016                pmd->queue[pmd->num_of_queue].pcap = pcap;
1017        if (dumper)
1018                pmd->queue[pmd->num_of_queue].dumper = dumper;
1019        pmd->queue[pmd->num_of_queue].name = name;
1020        pmd->queue[pmd->num_of_queue].type = type;
1021        pmd->num_of_queue++;
1022        return 0;
1023}
1024
1025/*
1026 * Function handler that opens the pcap file for reading a stores a
1027 * reference of it for use it later on.
1028 */
1029static int
1030open_rx_pcap(const char *key, const char *value, void *extra_args)
1031{
1032        const char *pcap_filename = value;
1033        struct pmd_devargs *rx = extra_args;
1034        pcap_t *pcap = NULL;
1035
1036        if (open_single_rx_pcap(pcap_filename, &pcap) < 0)
1037                return -1;
1038
1039        if (add_queue(rx, pcap_filename, key, pcap, NULL) < 0) {
1040                pcap_close(pcap);
1041                return -1;
1042        }
1043
1044        return 0;
1045}
1046
1047/*
1048 * Opens a pcap file for writing and stores a reference to it
1049 * for use it later on.
1050 */
1051static int
1052open_tx_pcap(const char *key, const char *value, void *extra_args)
1053{
1054        const char *pcap_filename = value;
1055        struct pmd_devargs *dumpers = extra_args;
1056        pcap_dumper_t *dumper;
1057
1058        if (open_single_tx_pcap(pcap_filename, &dumper) < 0)
1059                return -1;
1060
1061        if (add_queue(dumpers, pcap_filename, key, NULL, dumper) < 0) {
1062                pcap_dump_close(dumper);
1063                return -1;
1064        }
1065
1066        return 0;
1067}
1068
1069/*
1070 * Opens an interface for reading and writing
1071 */
1072static inline int
1073open_rx_tx_iface(const char *key, const char *value, void *extra_args)
1074{
1075        const char *iface = value;
1076        struct pmd_devargs *tx = extra_args;
1077        pcap_t *pcap = NULL;
1078
1079        if (open_single_iface(iface, &pcap) < 0)
1080                return -1;
1081
1082        tx->queue[0].pcap = pcap;
1083        tx->queue[0].name = iface;
1084        tx->queue[0].type = key;
1085
1086        return 0;
1087}
1088
1089static inline int
1090set_iface_direction(const char *iface, pcap_t *pcap,
1091                pcap_direction_t direction)
1092{
1093        const char *direction_str = (direction == PCAP_D_IN) ? "IN" : "OUT";
1094        if (pcap_setdirection(pcap, direction) < 0) {
1095                PMD_LOG(ERR, "Setting %s pcap direction %s failed - %s\n",
1096                                iface, direction_str, pcap_geterr(pcap));
1097                return -1;
1098        }
1099        PMD_LOG(INFO, "Setting %s pcap direction %s\n",
1100                        iface, direction_str);
1101        return 0;
1102}
1103
1104static inline int
1105open_iface(const char *key, const char *value, void *extra_args)
1106{
1107        const char *iface = value;
1108        struct pmd_devargs *pmd = extra_args;
1109        pcap_t *pcap = NULL;
1110
1111        if (open_single_iface(iface, &pcap) < 0)
1112                return -1;
1113        if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
1114                pcap_close(pcap);
1115                return -1;
1116        }
1117
1118        return 0;
1119}
1120
1121/*
1122 * Opens a NIC for reading packets from it
1123 */
1124static inline int
1125open_rx_iface(const char *key, const char *value, void *extra_args)
1126{
1127        int ret = open_iface(key, value, extra_args);
1128        if (ret < 0)
1129                return ret;
1130        if (strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0) {
1131                struct pmd_devargs *pmd = extra_args;
1132                unsigned int qid = pmd->num_of_queue - 1;
1133
1134                set_iface_direction(pmd->queue[qid].name,
1135                                pmd->queue[qid].pcap,
1136                                PCAP_D_IN);
1137        }
1138
1139        return 0;
1140}
1141
1142static inline int
1143rx_iface_args_process(const char *key, const char *value, void *extra_args)
1144{
1145        if (strcmp(key, ETH_PCAP_RX_IFACE_ARG) == 0 ||
1146                        strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0)
1147                return open_rx_iface(key, value, extra_args);
1148
1149        return 0;
1150}
1151
1152/*
1153 * Opens a NIC for writing packets to it
1154 */
1155static int
1156open_tx_iface(const char *key, const char *value, void *extra_args)
1157{
1158        return open_iface(key, value, extra_args);
1159}
1160
1161static int
1162select_phy_mac(const char *key __rte_unused, const char *value,
1163                void *extra_args)
1164{
1165        if (extra_args) {
1166                const int phy_mac = atoi(value);
1167                int *enable_phy_mac = extra_args;
1168
1169                if (phy_mac)
1170                        *enable_phy_mac = 1;
1171        }
1172        return 0;
1173}
1174
1175static int
1176get_infinite_rx_arg(const char *key __rte_unused,
1177                const char *value, void *extra_args)
1178{
1179        if (extra_args) {
1180                const int infinite_rx = atoi(value);
1181                int *enable_infinite_rx = extra_args;
1182
1183                if (infinite_rx > 0)
1184                        *enable_infinite_rx = 1;
1185        }
1186        return 0;
1187}
1188
1189static int
1190pmd_init_internals(struct rte_vdev_device *vdev,
1191                const unsigned int nb_rx_queues,
1192                const unsigned int nb_tx_queues,
1193                struct pmd_internals **internals,
1194                struct rte_eth_dev **eth_dev)
1195{
1196        struct rte_eth_dev_data *data;
1197        struct pmd_process_private *pp;
1198        unsigned int numa_node = vdev->device.numa_node;
1199
1200        PMD_LOG(INFO, "Creating pcap-backed ethdev on numa socket %d",
1201                numa_node);
1202
1203        pp = (struct pmd_process_private *)
1204                rte_zmalloc(NULL, sizeof(struct pmd_process_private),
1205                                RTE_CACHE_LINE_SIZE);
1206
1207        if (pp == NULL) {
1208                PMD_LOG(ERR,
1209                        "Failed to allocate memory for process private");
1210                return -1;
1211        }
1212
1213        /* reserve an ethdev entry */
1214        *eth_dev = rte_eth_vdev_allocate(vdev, sizeof(**internals));
1215        if (!(*eth_dev)) {
1216                rte_free(pp);
1217                return -1;
1218        }
1219        (*eth_dev)->process_private = pp;
1220        /* now put it all together
1221         * - store queue data in internals,
1222         * - store numa_node info in eth_dev
1223         * - point eth_dev_data to internals
1224         * - and point eth_dev structure to new eth_dev_data structure
1225         */
1226        *internals = (*eth_dev)->data->dev_private;
1227        /*
1228         * Interface MAC = 02:70:63:61:70:<iface_idx>
1229         * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
1230         * where the middle 4 characters are converted to hex.
1231         */
1232        (*internals)->eth_addr = (struct rte_ether_addr) {
1233                .addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
1234        };
1235        (*internals)->phy_mac = 0;
1236        data = (*eth_dev)->data;
1237        data->nb_rx_queues = (uint16_t)nb_rx_queues;
1238        data->nb_tx_queues = (uint16_t)nb_tx_queues;
1239        data->dev_link = pmd_link;
1240        data->mac_addrs = &(*internals)->eth_addr;
1241        data->promiscuous = 1;
1242        data->all_multicast = 1;
1243        data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1244
1245        /*
1246         * NOTE: we'll replace the data element, of originally allocated
1247         * eth_dev so the rings are local per-process
1248         */
1249        (*eth_dev)->dev_ops = &ops;
1250
1251        strlcpy((*internals)->devargs, rte_vdev_device_args(vdev),
1252                        ETH_PCAP_ARG_MAXLEN);
1253
1254        return 0;
1255}
1256
1257static int
1258eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
1259                const unsigned int numa_node)
1260{
1261        void *mac_addrs;
1262        struct rte_ether_addr mac;
1263
1264        if (osdep_iface_mac_get(if_name, &mac) < 0)
1265                return -1;
1266
1267        mac_addrs = rte_zmalloc_socket(NULL, RTE_ETHER_ADDR_LEN, 0, numa_node);
1268        if (mac_addrs == NULL)
1269                return -1;
1270
1271        PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1272        rte_memcpy(mac_addrs, mac.addr_bytes, RTE_ETHER_ADDR_LEN);
1273        eth_dev->data->mac_addrs = mac_addrs;
1274        return 0;
1275}
1276
1277static int
1278eth_from_pcaps_common(struct rte_vdev_device *vdev,
1279                struct pmd_devargs_all *devargs_all,
1280                struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
1281{
1282        struct pmd_process_private *pp;
1283        struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1284        struct pmd_devargs *tx_queues = &devargs_all->tx_queues;
1285        const unsigned int nb_rx_queues = rx_queues->num_of_queue;
1286        const unsigned int nb_tx_queues = tx_queues->num_of_queue;
1287        unsigned int i;
1288
1289        if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
1290                        eth_dev) < 0)
1291                return -1;
1292
1293        pp = (*eth_dev)->process_private;
1294        for (i = 0; i < nb_rx_queues; i++) {
1295                struct pcap_rx_queue *rx = &(*internals)->rx_queue[i];
1296                struct devargs_queue *queue = &rx_queues->queue[i];
1297
1298                pp->rx_pcap[i] = queue->pcap;
1299                strlcpy(rx->name, queue->name, sizeof(rx->name));
1300                strlcpy(rx->type, queue->type, sizeof(rx->type));
1301        }
1302
1303        for (i = 0; i < nb_tx_queues; i++) {
1304                struct pcap_tx_queue *tx = &(*internals)->tx_queue[i];
1305                struct devargs_queue *queue = &tx_queues->queue[i];
1306
1307                pp->tx_dumper[i] = queue->dumper;
1308                pp->tx_pcap[i] = queue->pcap;
1309                strlcpy(tx->name, queue->name, sizeof(tx->name));
1310                strlcpy(tx->type, queue->type, sizeof(tx->type));
1311        }
1312
1313        return 0;
1314}
1315
1316static int
1317eth_from_pcaps(struct rte_vdev_device *vdev,
1318                struct pmd_devargs_all *devargs_all)
1319{
1320        struct pmd_internals *internals = NULL;
1321        struct rte_eth_dev *eth_dev = NULL;
1322        struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1323        int single_iface = devargs_all->single_iface;
1324        unsigned int infinite_rx = devargs_all->infinite_rx;
1325        int ret;
1326
1327        ret = eth_from_pcaps_common(vdev, devargs_all, &internals, &eth_dev);
1328
1329        if (ret < 0)
1330                return ret;
1331
1332        /* store weather we are using a single interface for rx/tx or not */
1333        internals->single_iface = single_iface;
1334
1335        if (single_iface) {
1336                internals->if_index =
1337                        osdep_iface_index_get(rx_queues->queue[0].name);
1338
1339                /* phy_mac arg is applied only only if "iface" devarg is provided */
1340                if (rx_queues->phy_mac) {
1341                        if (eth_pcap_update_mac(rx_queues->queue[0].name,
1342                                        eth_dev, vdev->device.numa_node) == 0)
1343                                internals->phy_mac = 1;
1344                }
1345        }
1346
1347        internals->infinite_rx = infinite_rx;
1348        /* Assign rx ops. */
1349        if (infinite_rx)
1350                eth_dev->rx_pkt_burst = eth_pcap_rx_infinite;
1351        else if (devargs_all->is_rx_pcap || devargs_all->is_rx_iface ||
1352                        single_iface)
1353                eth_dev->rx_pkt_burst = eth_pcap_rx;
1354        else
1355                eth_dev->rx_pkt_burst = eth_null_rx;
1356
1357        /* Assign tx ops. */
1358        if (devargs_all->is_tx_pcap)
1359                eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1360        else if (devargs_all->is_tx_iface || single_iface)
1361                eth_dev->tx_pkt_burst = eth_pcap_tx;
1362        else
1363                eth_dev->tx_pkt_burst = eth_tx_drop;
1364
1365        rte_eth_dev_probing_finish(eth_dev);
1366        return 0;
1367}
1368
1369static void
1370eth_release_pcaps(struct pmd_devargs *pcaps,
1371                struct pmd_devargs *dumpers,
1372                int single_iface)
1373{
1374        unsigned int i;
1375
1376        if (single_iface) {
1377                if (pcaps->queue[0].pcap)
1378                        pcap_close(pcaps->queue[0].pcap);
1379                return;
1380        }
1381
1382        for (i = 0; i < dumpers->num_of_queue; i++) {
1383                if (dumpers->queue[i].dumper)
1384                        pcap_dump_close(dumpers->queue[i].dumper);
1385
1386                if (dumpers->queue[i].pcap)
1387                        pcap_close(dumpers->queue[i].pcap);
1388        }
1389
1390        for (i = 0; i < pcaps->num_of_queue; i++) {
1391                if (pcaps->queue[i].pcap)
1392                        pcap_close(pcaps->queue[i].pcap);
1393        }
1394}
1395
1396static int
1397pmd_pcap_probe(struct rte_vdev_device *dev)
1398{
1399        const char *name;
1400        struct rte_kvargs *kvlist;
1401        struct pmd_devargs pcaps = {0};
1402        struct pmd_devargs dumpers = {0};
1403        struct rte_eth_dev *eth_dev =  NULL;
1404        struct pmd_internals *internal;
1405        int ret = 0;
1406
1407        struct pmd_devargs_all devargs_all = {
1408                .single_iface = 0,
1409                .is_tx_pcap = 0,
1410                .is_tx_iface = 0,
1411                .infinite_rx = 0,
1412        };
1413
1414        name = rte_vdev_device_name(dev);
1415        PMD_LOG(INFO, "Initializing pmd_pcap for %s", name);
1416
1417        timespec_get(&start_time, TIME_UTC);
1418        start_cycles = rte_get_timer_cycles();
1419        hz = rte_get_timer_hz();
1420
1421        ret = rte_mbuf_dyn_rx_timestamp_register(&timestamp_dynfield_offset,
1422                        &timestamp_rx_dynflag);
1423        if (ret != 0) {
1424                PMD_LOG(ERR, "Failed to register Rx timestamp field/flag");
1425                return -1;
1426        }
1427
1428        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1429                eth_dev = rte_eth_dev_attach_secondary(name);
1430                if (!eth_dev) {
1431                        PMD_LOG(ERR, "Failed to probe %s", name);
1432                        return -1;
1433                }
1434
1435                internal = eth_dev->data->dev_private;
1436
1437                kvlist = rte_kvargs_parse(internal->devargs, valid_arguments);
1438                if (kvlist == NULL)
1439                        return -1;
1440        } else {
1441                kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
1442                                valid_arguments);
1443                if (kvlist == NULL)
1444                        return -1;
1445        }
1446
1447        /*
1448         * If iface argument is passed we open the NICs and use them for
1449         * reading / writing
1450         */
1451        if (rte_kvargs_count(kvlist, ETH_PCAP_IFACE_ARG) == 1) {
1452
1453                ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
1454                                &open_rx_tx_iface, &pcaps);
1455                if (ret < 0)
1456                        goto free_kvlist;
1457
1458                dumpers.queue[0] = pcaps.queue[0];
1459
1460                ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
1461                                &select_phy_mac, &pcaps.phy_mac);
1462                if (ret < 0)
1463                        goto free_kvlist;
1464
1465                dumpers.phy_mac = pcaps.phy_mac;
1466
1467                devargs_all.single_iface = 1;
1468                pcaps.num_of_queue = 1;
1469                dumpers.num_of_queue = 1;
1470
1471                goto create_eth;
1472        }
1473
1474        /*
1475         * We check whether we want to open a RX stream from a real NIC, a
1476         * pcap file or open a dummy RX stream
1477         */
1478        devargs_all.is_rx_pcap =
1479                rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
1480        devargs_all.is_rx_iface =
1481                (rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_ARG) +
1482                 rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_IN_ARG)) ? 1 : 0;
1483        pcaps.num_of_queue = 0;
1484
1485        devargs_all.is_tx_pcap =
1486                rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
1487        devargs_all.is_tx_iface =
1488                rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG) ? 1 : 0;
1489        dumpers.num_of_queue = 0;
1490
1491        if (devargs_all.is_rx_pcap) {
1492                /*
1493                 * We check whether we want to infinitely rx the pcap file.
1494                 */
1495                unsigned int infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
1496                                ETH_PCAP_INFINITE_RX_ARG);
1497
1498                if (infinite_rx_arg_cnt == 1) {
1499                        ret = rte_kvargs_process(kvlist,
1500                                        ETH_PCAP_INFINITE_RX_ARG,
1501                                        &get_infinite_rx_arg,
1502                                        &devargs_all.infinite_rx);
1503                        if (ret < 0)
1504                                goto free_kvlist;
1505                        PMD_LOG(INFO, "infinite_rx has been %s for %s",
1506                                        devargs_all.infinite_rx ? "enabled" : "disabled",
1507                                        name);
1508
1509                } else if (infinite_rx_arg_cnt > 1) {
1510                        PMD_LOG(WARNING, "infinite_rx has not been enabled since the "
1511                                        "argument has been provided more than once "
1512                                        "for %s", name);
1513                }
1514
1515                ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
1516                                &open_rx_pcap, &pcaps);
1517        } else if (devargs_all.is_rx_iface) {
1518                ret = rte_kvargs_process(kvlist, NULL,
1519                                &rx_iface_args_process, &pcaps);
1520        } else if (devargs_all.is_tx_iface || devargs_all.is_tx_pcap) {
1521                unsigned int i;
1522
1523                /* Count number of tx queue args passed before dummy rx queue
1524                 * creation so a dummy rx queue can be created for each tx queue
1525                 */
1526                unsigned int num_tx_queues =
1527                        (rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) +
1528                        rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG));
1529
1530                PMD_LOG(INFO, "Creating null rx queue since no rx queues were provided.");
1531
1532                /* Creating a dummy rx queue for each tx queue passed */
1533                for (i = 0; i < num_tx_queues; i++)
1534                        ret = add_queue(&pcaps, "dummy_rx", "rx_null", NULL,
1535                                        NULL);
1536        } else {
1537                PMD_LOG(ERR, "Error - No rx or tx queues provided");
1538                ret = -ENOENT;
1539        }
1540        if (ret < 0)
1541                goto free_kvlist;
1542
1543        /*
1544         * We check whether we want to open a TX stream to a real NIC,
1545         * a pcap file, or drop packets on tx
1546         */
1547        if (devargs_all.is_tx_pcap) {
1548                ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
1549                                &open_tx_pcap, &dumpers);
1550        } else if (devargs_all.is_tx_iface) {
1551                ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
1552                                &open_tx_iface, &dumpers);
1553        } else {
1554                unsigned int i;
1555
1556                PMD_LOG(INFO, "Dropping packets on tx since no tx queues were provided.");
1557
1558                /* Add 1 dummy queue per rxq which counts and drops packets. */
1559                for (i = 0; i < pcaps.num_of_queue; i++)
1560                        ret = add_queue(&dumpers, "dummy_tx", "tx_drop", NULL,
1561                                        NULL);
1562        }
1563
1564        if (ret < 0)
1565                goto free_kvlist;
1566
1567create_eth:
1568        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1569                struct pmd_process_private *pp;
1570                unsigned int i;
1571
1572                internal = eth_dev->data->dev_private;
1573                        pp = (struct pmd_process_private *)
1574                                rte_zmalloc(NULL,
1575                                        sizeof(struct pmd_process_private),
1576                                        RTE_CACHE_LINE_SIZE);
1577
1578                if (pp == NULL) {
1579                        PMD_LOG(ERR,
1580                                "Failed to allocate memory for process private");
1581                        ret = -1;
1582                        goto free_kvlist;
1583                }
1584
1585                eth_dev->dev_ops = &ops;
1586                eth_dev->device = &dev->device;
1587
1588                /* setup process private */
1589                for (i = 0; i < pcaps.num_of_queue; i++)
1590                        pp->rx_pcap[i] = pcaps.queue[i].pcap;
1591
1592                for (i = 0; i < dumpers.num_of_queue; i++) {
1593                        pp->tx_dumper[i] = dumpers.queue[i].dumper;
1594                        pp->tx_pcap[i] = dumpers.queue[i].pcap;
1595                }
1596
1597                eth_dev->process_private = pp;
1598                eth_dev->rx_pkt_burst = eth_pcap_rx;
1599                if (devargs_all.is_tx_pcap)
1600                        eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1601                else
1602                        eth_dev->tx_pkt_burst = eth_pcap_tx;
1603
1604                rte_eth_dev_probing_finish(eth_dev);
1605                goto free_kvlist;
1606        }
1607
1608        devargs_all.rx_queues = pcaps;
1609        devargs_all.tx_queues = dumpers;
1610
1611        ret = eth_from_pcaps(dev, &devargs_all);
1612
1613free_kvlist:
1614        rte_kvargs_free(kvlist);
1615
1616        if (ret < 0)
1617                eth_release_pcaps(&pcaps, &dumpers, devargs_all.single_iface);
1618
1619        return ret;
1620}
1621
1622static int
1623pmd_pcap_remove(struct rte_vdev_device *dev)
1624{
1625        struct rte_eth_dev *eth_dev = NULL;
1626
1627        if (!dev)
1628                return -1;
1629
1630        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1631        if (eth_dev == NULL)
1632                return 0; /* port already released */
1633
1634        eth_dev_close(eth_dev);
1635        rte_eth_dev_release_port(eth_dev);
1636
1637        return 0;
1638}
1639
1640static struct rte_vdev_driver pmd_pcap_drv = {
1641        .probe = pmd_pcap_probe,
1642        .remove = pmd_pcap_remove,
1643};
1644
1645RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
1646RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
1647RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
1648        ETH_PCAP_RX_PCAP_ARG "=<string> "
1649        ETH_PCAP_TX_PCAP_ARG "=<string> "
1650        ETH_PCAP_RX_IFACE_ARG "=<ifc> "
1651        ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
1652        ETH_PCAP_TX_IFACE_ARG "=<ifc> "
1653        ETH_PCAP_IFACE_ARG "=<ifc> "
1654        ETH_PCAP_PHY_MAC_ARG "=<int>"
1655        ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
1656