dpdk/drivers/net/pcap/pcap_ethdev.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation.
   3 * Copyright(c) 2014 6WIND S.A.
   4 * All rights reserved.
   5 */
   6
   7#include <time.h>
   8
   9#include <pcap.h>
  10
  11#include <rte_cycles.h>
  12#include <ethdev_driver.h>
  13#include <ethdev_vdev.h>
  14#include <rte_kvargs.h>
  15#include <rte_malloc.h>
  16#include <rte_mbuf.h>
  17#include <rte_mbuf_dyn.h>
  18#include <rte_bus_vdev.h>
  19#include <rte_os_shim.h>
  20
  21#include "pcap_osdep.h"
  22
  23#define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
  24#define RTE_ETH_PCAP_SNAPLEN RTE_ETHER_MAX_JUMBO_FRAME_LEN
  25#define RTE_ETH_PCAP_PROMISC 1
  26#define RTE_ETH_PCAP_TIMEOUT -1
  27
  28#define ETH_PCAP_RX_PCAP_ARG  "rx_pcap"
  29#define ETH_PCAP_TX_PCAP_ARG  "tx_pcap"
  30#define ETH_PCAP_RX_IFACE_ARG "rx_iface"
  31#define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
  32#define ETH_PCAP_TX_IFACE_ARG "tx_iface"
  33#define ETH_PCAP_IFACE_ARG    "iface"
  34#define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
  35#define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
  36
  37#define ETH_PCAP_ARG_MAXLEN     64
  38
  39#define RTE_PMD_PCAP_MAX_QUEUES 16
  40
  41static char errbuf[PCAP_ERRBUF_SIZE];
  42static struct timespec start_time;
  43static uint64_t start_cycles;
  44static uint64_t hz;
  45static uint8_t iface_idx;
  46
  47static uint64_t timestamp_rx_dynflag;
  48static int timestamp_dynfield_offset = -1;
  49
  50struct queue_stat {
  51        volatile unsigned long pkts;
  52        volatile unsigned long bytes;
  53        volatile unsigned long err_pkts;
  54};
  55
  56struct queue_missed_stat {
  57        /* last value retrieved from pcap */
  58        unsigned int pcap;
  59        /* stores values lost by pcap stop or rollover */
  60        unsigned long mnemonic;
  61        /* value on last reset */
  62        unsigned long reset;
  63};
  64
  65struct pcap_rx_queue {
  66        uint16_t port_id;
  67        uint16_t queue_id;
  68        struct rte_mempool *mb_pool;
  69        struct queue_stat rx_stat;
  70        struct queue_missed_stat missed_stat;
  71        char name[PATH_MAX];
  72        char type[ETH_PCAP_ARG_MAXLEN];
  73
  74        /* Contains pre-generated packets to be looped through */
  75        struct rte_ring *pkts;
  76};
  77
  78struct pcap_tx_queue {
  79        uint16_t port_id;
  80        uint16_t queue_id;
  81        struct queue_stat tx_stat;
  82        char name[PATH_MAX];
  83        char type[ETH_PCAP_ARG_MAXLEN];
  84};
  85
  86struct pmd_internals {
  87        struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
  88        struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
  89        char devargs[ETH_PCAP_ARG_MAXLEN];
  90        struct rte_ether_addr eth_addr;
  91        int if_index;
  92        int single_iface;
  93        int phy_mac;
  94        unsigned int infinite_rx;
  95};
  96
  97struct pmd_process_private {
  98        pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
  99        pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
 100        pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
 101};
 102
 103struct pmd_devargs {
 104        unsigned int num_of_queue;
 105        struct devargs_queue {
 106                pcap_dumper_t *dumper;
 107                pcap_t *pcap;
 108                const char *name;
 109                const char *type;
 110        } queue[RTE_PMD_PCAP_MAX_QUEUES];
 111        int phy_mac;
 112};
 113
 114struct pmd_devargs_all {
 115        struct pmd_devargs rx_queues;
 116        struct pmd_devargs tx_queues;
 117        int single_iface;
 118        unsigned int is_tx_pcap;
 119        unsigned int is_tx_iface;
 120        unsigned int is_rx_pcap;
 121        unsigned int is_rx_iface;
 122        unsigned int infinite_rx;
 123};
 124
 125static const char *valid_arguments[] = {
 126        ETH_PCAP_RX_PCAP_ARG,
 127        ETH_PCAP_TX_PCAP_ARG,
 128        ETH_PCAP_RX_IFACE_ARG,
 129        ETH_PCAP_RX_IFACE_IN_ARG,
 130        ETH_PCAP_TX_IFACE_ARG,
 131        ETH_PCAP_IFACE_ARG,
 132        ETH_PCAP_PHY_MAC_ARG,
 133        ETH_PCAP_INFINITE_RX_ARG,
 134        NULL
 135};
 136
 137static struct rte_eth_link pmd_link = {
 138                .link_speed = ETH_SPEED_NUM_10G,
 139                .link_duplex = ETH_LINK_FULL_DUPLEX,
 140                .link_status = ETH_LINK_DOWN,
 141                .link_autoneg = ETH_LINK_FIXED,
 142};
 143
 144RTE_LOG_REGISTER_DEFAULT(eth_pcap_logtype, NOTICE);
 145
 146static struct queue_missed_stat*
 147queue_missed_stat_update(struct rte_eth_dev *dev, unsigned int qid)
 148{
 149        struct pmd_internals *internals = dev->data->dev_private;
 150        struct queue_missed_stat *missed_stat =
 151                        &internals->rx_queue[qid].missed_stat;
 152        const struct pmd_process_private *pp = dev->process_private;
 153        pcap_t *pcap = pp->rx_pcap[qid];
 154        struct pcap_stat stat;
 155
 156        if (!pcap || (pcap_stats(pcap, &stat) != 0))
 157                return missed_stat;
 158
 159        /* rollover check - best effort fixup assuming single rollover */
 160        if (stat.ps_drop < missed_stat->pcap)
 161                missed_stat->mnemonic += UINT_MAX;
 162        missed_stat->pcap = stat.ps_drop;
 163
 164        return missed_stat;
 165}
 166
 167static void
 168queue_missed_stat_on_stop_update(struct rte_eth_dev *dev, unsigned int qid)
 169{
 170        struct queue_missed_stat *missed_stat =
 171                        queue_missed_stat_update(dev, qid);
 172
 173        missed_stat->mnemonic += missed_stat->pcap;
 174        missed_stat->pcap = 0;
 175}
 176
 177static void
 178queue_missed_stat_reset(struct rte_eth_dev *dev, unsigned int qid)
 179{
 180        struct queue_missed_stat *missed_stat =
 181                        queue_missed_stat_update(dev, qid);
 182
 183        missed_stat->reset = missed_stat->pcap;
 184        missed_stat->mnemonic = 0;
 185}
 186
 187static unsigned long
 188queue_missed_stat_get(struct rte_eth_dev *dev, unsigned int qid)
 189{
 190        const struct queue_missed_stat *missed_stat =
 191                        queue_missed_stat_update(dev, qid);
 192
 193        return missed_stat->pcap + missed_stat->mnemonic - missed_stat->reset;
 194}
 195
 196static int
 197eth_pcap_rx_jumbo(struct rte_mempool *mb_pool, struct rte_mbuf *mbuf,
 198                const u_char *data, uint16_t data_len)
 199{
 200        /* Copy the first segment. */
 201        uint16_t len = rte_pktmbuf_tailroom(mbuf);
 202        struct rte_mbuf *m = mbuf;
 203
 204        rte_memcpy(rte_pktmbuf_append(mbuf, len), data, len);
 205        data_len -= len;
 206        data += len;
 207
 208        while (data_len > 0) {
 209                /* Allocate next mbuf and point to that. */
 210                m->next = rte_pktmbuf_alloc(mb_pool);
 211
 212                if (unlikely(!m->next))
 213                        return -1;
 214
 215                m = m->next;
 216
 217                /* Headroom is not needed in chained mbufs. */
 218                rte_pktmbuf_prepend(m, rte_pktmbuf_headroom(m));
 219                m->pkt_len = 0;
 220                m->data_len = 0;
 221
 222                /* Copy next segment. */
 223                len = RTE_MIN(rte_pktmbuf_tailroom(m), data_len);
 224                rte_memcpy(rte_pktmbuf_append(m, len), data, len);
 225
 226                mbuf->nb_segs++;
 227                data_len -= len;
 228                data += len;
 229        }
 230
 231        return mbuf->nb_segs;
 232}
 233
 234static uint16_t
 235eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 236{
 237        int i;
 238        struct pcap_rx_queue *pcap_q = queue;
 239        uint32_t rx_bytes = 0;
 240
 241        if (unlikely(nb_pkts == 0))
 242                return 0;
 243
 244        if (rte_pktmbuf_alloc_bulk(pcap_q->mb_pool, bufs, nb_pkts) != 0)
 245                return 0;
 246
 247        for (i = 0; i < nb_pkts; i++) {
 248                struct rte_mbuf *pcap_buf;
 249                int err = rte_ring_dequeue(pcap_q->pkts, (void **)&pcap_buf);
 250                if (err)
 251                        return i;
 252
 253                rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
 254                                rte_pktmbuf_mtod(pcap_buf, void *),
 255                                pcap_buf->data_len);
 256                bufs[i]->data_len = pcap_buf->data_len;
 257                bufs[i]->pkt_len = pcap_buf->pkt_len;
 258                bufs[i]->port = pcap_q->port_id;
 259                rx_bytes += pcap_buf->data_len;
 260
 261                /* Enqueue packet back on ring to allow infinite rx. */
 262                rte_ring_enqueue(pcap_q->pkts, pcap_buf);
 263        }
 264
 265        pcap_q->rx_stat.pkts += i;
 266        pcap_q->rx_stat.bytes += rx_bytes;
 267
 268        return i;
 269}
 270
 271static uint16_t
 272eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 273{
 274        unsigned int i;
 275        struct pcap_pkthdr header;
 276        struct pmd_process_private *pp;
 277        const u_char *packet;
 278        struct rte_mbuf *mbuf;
 279        struct pcap_rx_queue *pcap_q = queue;
 280        uint16_t num_rx = 0;
 281        uint32_t rx_bytes = 0;
 282        pcap_t *pcap;
 283
 284        pp = rte_eth_devices[pcap_q->port_id].process_private;
 285        pcap = pp->rx_pcap[pcap_q->queue_id];
 286
 287        if (unlikely(pcap == NULL || nb_pkts == 0))
 288                return 0;
 289
 290        /* Reads the given number of packets from the pcap file one by one
 291         * and copies the packet data into a newly allocated mbuf to return.
 292         */
 293        for (i = 0; i < nb_pkts; i++) {
 294                /* Get the next PCAP packet */
 295                packet = pcap_next(pcap, &header);
 296                if (unlikely(packet == NULL))
 297                        break;
 298
 299                mbuf = rte_pktmbuf_alloc(pcap_q->mb_pool);
 300                if (unlikely(mbuf == NULL))
 301                        break;
 302
 303                if (header.caplen <= rte_pktmbuf_tailroom(mbuf)) {
 304                        /* pcap packet will fit in the mbuf, can copy it */
 305                        rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
 306                                        header.caplen);
 307                        mbuf->data_len = (uint16_t)header.caplen;
 308                } else {
 309                        /* Try read jumbo frame into multi mbufs. */
 310                        if (unlikely(eth_pcap_rx_jumbo(pcap_q->mb_pool,
 311                                                       mbuf,
 312                                                       packet,
 313                                                       header.caplen) == -1)) {
 314                                rte_pktmbuf_free(mbuf);
 315                                break;
 316                        }
 317                }
 318
 319                mbuf->pkt_len = (uint16_t)header.caplen;
 320                *RTE_MBUF_DYNFIELD(mbuf, timestamp_dynfield_offset,
 321                        rte_mbuf_timestamp_t *) =
 322                                (uint64_t)header.ts.tv_sec * 1000000 +
 323                                header.ts.tv_usec;
 324                mbuf->ol_flags |= timestamp_rx_dynflag;
 325                mbuf->port = pcap_q->port_id;
 326                bufs[num_rx] = mbuf;
 327                num_rx++;
 328                rx_bytes += header.caplen;
 329        }
 330        pcap_q->rx_stat.pkts += num_rx;
 331        pcap_q->rx_stat.bytes += rx_bytes;
 332
 333        return num_rx;
 334}
 335
 336static uint16_t
 337eth_null_rx(void *queue __rte_unused,
 338                struct rte_mbuf **bufs __rte_unused,
 339                uint16_t nb_pkts __rte_unused)
 340{
 341        return 0;
 342}
 343
 344#define NSEC_PER_SEC    1000000000L
 345
 346/*
 347 * This function stores nanoseconds in `tv_usec` field of `struct timeval`,
 348 * because `ts` goes directly to nanosecond-precision dump.
 349 */
 350static inline void
 351calculate_timestamp(struct timeval *ts) {
 352        uint64_t cycles;
 353        struct timespec cur_time;
 354
 355        cycles = rte_get_timer_cycles() - start_cycles;
 356        cur_time.tv_sec = cycles / hz;
 357        cur_time.tv_nsec = (cycles % hz) * NSEC_PER_SEC / hz;
 358
 359        ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
 360        ts->tv_usec = start_time.tv_nsec + cur_time.tv_nsec;
 361        if (ts->tv_usec >= NSEC_PER_SEC) {
 362                ts->tv_usec -= NSEC_PER_SEC;
 363                ts->tv_sec += 1;
 364        }
 365}
 366
 367/*
 368 * Callback to handle writing packets to a pcap file.
 369 */
 370static uint16_t
 371eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 372{
 373        unsigned int i;
 374        struct rte_mbuf *mbuf;
 375        struct pmd_process_private *pp;
 376        struct pcap_tx_queue *dumper_q = queue;
 377        uint16_t num_tx = 0;
 378        uint32_t tx_bytes = 0;
 379        struct pcap_pkthdr header;
 380        pcap_dumper_t *dumper;
 381        unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 382        size_t len, caplen;
 383
 384        pp = rte_eth_devices[dumper_q->port_id].process_private;
 385        dumper = pp->tx_dumper[dumper_q->queue_id];
 386
 387        if (dumper == NULL || nb_pkts == 0)
 388                return 0;
 389
 390        /* writes the nb_pkts packets to the previously opened pcap file
 391         * dumper */
 392        for (i = 0; i < nb_pkts; i++) {
 393                mbuf = bufs[i];
 394                len = caplen = rte_pktmbuf_pkt_len(mbuf);
 395                if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 396                                len > sizeof(temp_data))) {
 397                        caplen = sizeof(temp_data);
 398                }
 399
 400                calculate_timestamp(&header.ts);
 401                header.len = len;
 402                header.caplen = caplen;
 403                /* rte_pktmbuf_read() returns a pointer to the data directly
 404                 * in the mbuf (when the mbuf is contiguous) or, otherwise,
 405                 * a pointer to temp_data after copying into it.
 406                 */
 407                pcap_dump((u_char *)dumper, &header,
 408                        rte_pktmbuf_read(mbuf, 0, caplen, temp_data));
 409
 410                num_tx++;
 411                tx_bytes += caplen;
 412                rte_pktmbuf_free(mbuf);
 413        }
 414
 415        /*
 416         * Since there's no place to hook a callback when the forwarding
 417         * process stops and to make sure the pcap file is actually written,
 418         * we flush the pcap dumper within each burst.
 419         */
 420        pcap_dump_flush(dumper);
 421        dumper_q->tx_stat.pkts += num_tx;
 422        dumper_q->tx_stat.bytes += tx_bytes;
 423        dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;
 424
 425        return nb_pkts;
 426}
 427
 428/*
 429 * Callback to handle dropping packets in the infinite rx case.
 430 */
 431static uint16_t
 432eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 433{
 434        unsigned int i;
 435        uint32_t tx_bytes = 0;
 436        struct pcap_tx_queue *tx_queue = queue;
 437
 438        if (unlikely(nb_pkts == 0))
 439                return 0;
 440
 441        for (i = 0; i < nb_pkts; i++) {
 442                tx_bytes += bufs[i]->pkt_len;
 443                rte_pktmbuf_free(bufs[i]);
 444        }
 445
 446        tx_queue->tx_stat.pkts += nb_pkts;
 447        tx_queue->tx_stat.bytes += tx_bytes;
 448
 449        return i;
 450}
 451
 452/*
 453 * Callback to handle sending packets through a real NIC.
 454 */
 455static uint16_t
 456eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 457{
 458        unsigned int i;
 459        int ret;
 460        struct rte_mbuf *mbuf;
 461        struct pmd_process_private *pp;
 462        struct pcap_tx_queue *tx_queue = queue;
 463        uint16_t num_tx = 0;
 464        uint32_t tx_bytes = 0;
 465        pcap_t *pcap;
 466        unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 467        size_t len;
 468
 469        pp = rte_eth_devices[tx_queue->port_id].process_private;
 470        pcap = pp->tx_pcap[tx_queue->queue_id];
 471
 472        if (unlikely(nb_pkts == 0 || pcap == NULL))
 473                return 0;
 474
 475        for (i = 0; i < nb_pkts; i++) {
 476                mbuf = bufs[i];
 477                len = rte_pktmbuf_pkt_len(mbuf);
 478                if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 479                                len > sizeof(temp_data))) {
 480                        PMD_LOG(ERR,
 481                                "Dropping multi segment PCAP packet. Size (%zd) > max size (%zd).",
 482                                len, sizeof(temp_data));
 483                        rte_pktmbuf_free(mbuf);
 484                        continue;
 485                }
 486
 487                /* rte_pktmbuf_read() returns a pointer to the data directly
 488                 * in the mbuf (when the mbuf is contiguous) or, otherwise,
 489                 * a pointer to temp_data after copying into it.
 490                 */
 491                ret = pcap_sendpacket(pcap,
 492                        rte_pktmbuf_read(mbuf, 0, len, temp_data), len);
 493                if (unlikely(ret != 0))
 494                        break;
 495                num_tx++;
 496                tx_bytes += len;
 497                rte_pktmbuf_free(mbuf);
 498        }
 499
 500        tx_queue->tx_stat.pkts += num_tx;
 501        tx_queue->tx_stat.bytes += tx_bytes;
 502        tx_queue->tx_stat.err_pkts += i - num_tx;
 503
 504        return i;
 505}
 506
 507/*
 508 * pcap_open_live wrapper function
 509 */
 510static inline int
 511open_iface_live(const char *iface, pcap_t **pcap) {
 512        *pcap = pcap_open_live(iface, RTE_ETH_PCAP_SNAPLEN,
 513                        RTE_ETH_PCAP_PROMISC, RTE_ETH_PCAP_TIMEOUT, errbuf);
 514
 515        if (*pcap == NULL) {
 516                PMD_LOG(ERR, "Couldn't open %s: %s", iface, errbuf);
 517                return -1;
 518        }
 519
 520        return 0;
 521}
 522
 523static int
 524open_single_iface(const char *iface, pcap_t **pcap)
 525{
 526        if (open_iface_live(iface, pcap) < 0) {
 527                PMD_LOG(ERR, "Couldn't open interface %s", iface);
 528                return -1;
 529        }
 530
 531        return 0;
 532}
 533
 534static int
 535open_single_tx_pcap(const char *pcap_filename, pcap_dumper_t **dumper)
 536{
 537        pcap_t *tx_pcap;
 538
 539        /*
 540         * We need to create a dummy empty pcap_t to use it
 541         * with pcap_dump_open(). We create big enough an Ethernet
 542         * pcap holder.
 543         */
 544        tx_pcap = pcap_open_dead_with_tstamp_precision(DLT_EN10MB,
 545                        RTE_ETH_PCAP_SNAPSHOT_LEN, PCAP_TSTAMP_PRECISION_NANO);
 546        if (tx_pcap == NULL) {
 547                PMD_LOG(ERR, "Couldn't create dead pcap");
 548                return -1;
 549        }
 550
 551        /* The dumper is created using the previous pcap_t reference */
 552        *dumper = pcap_dump_open(tx_pcap, pcap_filename);
 553        if (*dumper == NULL) {
 554                pcap_close(tx_pcap);
 555                PMD_LOG(ERR, "Couldn't open %s for writing.",
 556                        pcap_filename);
 557                return -1;
 558        }
 559
 560        pcap_close(tx_pcap);
 561        return 0;
 562}
 563
 564static int
 565open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
 566{
 567        *pcap = pcap_open_offline(pcap_filename, errbuf);
 568        if (*pcap == NULL) {
 569                PMD_LOG(ERR, "Couldn't open %s: %s", pcap_filename,
 570                        errbuf);
 571                return -1;
 572        }
 573
 574        return 0;
 575}
 576
 577static uint64_t
 578count_packets_in_pcap(pcap_t **pcap, struct pcap_rx_queue *pcap_q)
 579{
 580        const u_char *packet;
 581        struct pcap_pkthdr header;
 582        uint64_t pcap_pkt_count = 0;
 583
 584        while ((packet = pcap_next(*pcap, &header)))
 585                pcap_pkt_count++;
 586
 587        /* The pcap is reopened so it can be used as normal later. */
 588        pcap_close(*pcap);
 589        *pcap = NULL;
 590        open_single_rx_pcap(pcap_q->name, pcap);
 591
 592        return pcap_pkt_count;
 593}
 594
 595static int
 596eth_dev_start(struct rte_eth_dev *dev)
 597{
 598        unsigned int i;
 599        struct pmd_internals *internals = dev->data->dev_private;
 600        struct pmd_process_private *pp = dev->process_private;
 601        struct pcap_tx_queue *tx;
 602        struct pcap_rx_queue *rx;
 603
 604        /* Special iface case. Single pcap is open and shared between tx/rx. */
 605        if (internals->single_iface) {
 606                tx = &internals->tx_queue[0];
 607                rx = &internals->rx_queue[0];
 608
 609                if (!pp->tx_pcap[0] &&
 610                        strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
 611                        if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
 612                                return -1;
 613                        pp->rx_pcap[0] = pp->tx_pcap[0];
 614                }
 615
 616                goto status_up;
 617        }
 618
 619        /* If not open already, open tx pcaps/dumpers */
 620        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 621                tx = &internals->tx_queue[i];
 622
 623                if (!pp->tx_dumper[i] &&
 624                                strcmp(tx->type, ETH_PCAP_TX_PCAP_ARG) == 0) {
 625                        if (open_single_tx_pcap(tx->name,
 626                                &pp->tx_dumper[i]) < 0)
 627                                return -1;
 628                } else if (!pp->tx_pcap[i] &&
 629                                strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
 630                        if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
 631                                return -1;
 632                }
 633        }
 634
 635        /* If not open already, open rx pcaps */
 636        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 637                rx = &internals->rx_queue[i];
 638
 639                if (pp->rx_pcap[i] != NULL)
 640                        continue;
 641
 642                if (strcmp(rx->type, ETH_PCAP_RX_PCAP_ARG) == 0) {
 643                        if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
 644                                return -1;
 645                } else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
 646                        if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
 647                                return -1;
 648                }
 649        }
 650
 651status_up:
 652        for (i = 0; i < dev->data->nb_rx_queues; i++)
 653                dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
 654
 655        for (i = 0; i < dev->data->nb_tx_queues; i++)
 656                dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
 657
 658        dev->data->dev_link.link_status = ETH_LINK_UP;
 659
 660        return 0;
 661}
 662
 663/*
 664 * This function gets called when the current port gets stopped.
 665 * Is the only place for us to close all the tx streams dumpers.
 666 * If not called the dumpers will be flushed within each tx burst.
 667 */
 668static int
 669eth_dev_stop(struct rte_eth_dev *dev)
 670{
 671        unsigned int i;
 672        struct pmd_internals *internals = dev->data->dev_private;
 673        struct pmd_process_private *pp = dev->process_private;
 674
 675        /* Special iface case. Single pcap is open and shared between tx/rx. */
 676        if (internals->single_iface) {
 677                queue_missed_stat_on_stop_update(dev, 0);
 678                if (pp->tx_pcap[0] != NULL) {
 679                        pcap_close(pp->tx_pcap[0]);
 680                        pp->tx_pcap[0] = NULL;
 681                        pp->rx_pcap[0] = NULL;
 682                }
 683                goto status_down;
 684        }
 685
 686        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 687                if (pp->tx_dumper[i] != NULL) {
 688                        pcap_dump_close(pp->tx_dumper[i]);
 689                        pp->tx_dumper[i] = NULL;
 690                }
 691
 692                if (pp->tx_pcap[i] != NULL) {
 693                        pcap_close(pp->tx_pcap[i]);
 694                        pp->tx_pcap[i] = NULL;
 695                }
 696        }
 697
 698        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 699                if (pp->rx_pcap[i] != NULL) {
 700                        queue_missed_stat_on_stop_update(dev, i);
 701                        pcap_close(pp->rx_pcap[i]);
 702                        pp->rx_pcap[i] = NULL;
 703                }
 704        }
 705
 706status_down:
 707        for (i = 0; i < dev->data->nb_rx_queues; i++)
 708                dev->data->rx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 709
 710        for (i = 0; i < dev->data->nb_tx_queues; i++)
 711                dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STOPPED;
 712
 713        dev->data->dev_link.link_status = ETH_LINK_DOWN;
 714
 715        return 0;
 716}
 717
 718static int
 719eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
 720{
 721        return 0;
 722}
 723
 724static int
 725eth_dev_info(struct rte_eth_dev *dev,
 726                struct rte_eth_dev_info *dev_info)
 727{
 728        struct pmd_internals *internals = dev->data->dev_private;
 729
 730        dev_info->if_index = internals->if_index;
 731        dev_info->max_mac_addrs = 1;
 732        dev_info->max_rx_pktlen = (uint32_t) -1;
 733        dev_info->max_rx_queues = dev->data->nb_rx_queues;
 734        dev_info->max_tx_queues = dev->data->nb_tx_queues;
 735        dev_info->min_rx_bufsize = 0;
 736
 737        return 0;
 738}
 739
 740static int
 741eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 742{
 743        unsigned int i;
 744        unsigned long rx_packets_total = 0, rx_bytes_total = 0;
 745        unsigned long rx_missed_total = 0;
 746        unsigned long tx_packets_total = 0, tx_bytes_total = 0;
 747        unsigned long tx_packets_err_total = 0;
 748        const struct pmd_internals *internal = dev->data->dev_private;
 749
 750        for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 751                        i < dev->data->nb_rx_queues; i++) {
 752                stats->q_ipackets[i] = internal->rx_queue[i].rx_stat.pkts;
 753                stats->q_ibytes[i] = internal->rx_queue[i].rx_stat.bytes;
 754                rx_packets_total += stats->q_ipackets[i];
 755                rx_bytes_total += stats->q_ibytes[i];
 756                rx_missed_total += queue_missed_stat_get(dev, i);
 757        }
 758
 759        for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
 760                        i < dev->data->nb_tx_queues; i++) {
 761                stats->q_opackets[i] = internal->tx_queue[i].tx_stat.pkts;
 762                stats->q_obytes[i] = internal->tx_queue[i].tx_stat.bytes;
 763                tx_packets_total += stats->q_opackets[i];
 764                tx_bytes_total += stats->q_obytes[i];
 765                tx_packets_err_total += internal->tx_queue[i].tx_stat.err_pkts;
 766        }
 767
 768        stats->ipackets = rx_packets_total;
 769        stats->ibytes = rx_bytes_total;
 770        stats->imissed = rx_missed_total;
 771        stats->opackets = tx_packets_total;
 772        stats->obytes = tx_bytes_total;
 773        stats->oerrors = tx_packets_err_total;
 774
 775        return 0;
 776}
 777
 778static int
 779eth_stats_reset(struct rte_eth_dev *dev)
 780{
 781        unsigned int i;
 782        struct pmd_internals *internal = dev->data->dev_private;
 783
 784        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 785                internal->rx_queue[i].rx_stat.pkts = 0;
 786                internal->rx_queue[i].rx_stat.bytes = 0;
 787                queue_missed_stat_reset(dev, i);
 788        }
 789
 790        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 791                internal->tx_queue[i].tx_stat.pkts = 0;
 792                internal->tx_queue[i].tx_stat.bytes = 0;
 793                internal->tx_queue[i].tx_stat.err_pkts = 0;
 794        }
 795
 796        return 0;
 797}
 798
 799static inline void
 800infinite_rx_ring_free(struct rte_ring *pkts)
 801{
 802        struct rte_mbuf *bufs;
 803
 804        while (!rte_ring_dequeue(pkts, (void **)&bufs))
 805                rte_pktmbuf_free(bufs);
 806
 807        rte_ring_free(pkts);
 808}
 809
 810static int
 811eth_dev_close(struct rte_eth_dev *dev)
 812{
 813        unsigned int i;
 814        struct pmd_internals *internals = dev->data->dev_private;
 815
 816        PMD_LOG(INFO, "Closing pcap ethdev on NUMA socket %d",
 817                        rte_socket_id());
 818
 819        eth_dev_stop(dev);
 820
 821        rte_free(dev->process_private);
 822
 823        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 824                return 0;
 825
 826        /* Device wide flag, but cleanup must be performed per queue. */
 827        if (internals->infinite_rx) {
 828                for (i = 0; i < dev->data->nb_rx_queues; i++) {
 829                        struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
 830
 831                        /*
 832                         * 'pcap_q->pkts' can be NULL if 'eth_dev_close()'
 833                         * called before 'eth_rx_queue_setup()' has been called
 834                         */
 835                        if (pcap_q->pkts == NULL)
 836                                continue;
 837
 838                        infinite_rx_ring_free(pcap_q->pkts);
 839                }
 840        }
 841
 842        if (internals->phy_mac == 0)
 843                /* not dynamically allocated, must not be freed */
 844                dev->data->mac_addrs = NULL;
 845
 846        return 0;
 847}
 848
 849static void
 850eth_queue_release(void *q __rte_unused)
 851{
 852}
 853
 854static int
 855eth_link_update(struct rte_eth_dev *dev __rte_unused,
 856                int wait_to_complete __rte_unused)
 857{
 858        return 0;
 859}
 860
 861static int
 862eth_rx_queue_setup(struct rte_eth_dev *dev,
 863                uint16_t rx_queue_id,
 864                uint16_t nb_rx_desc __rte_unused,
 865                unsigned int socket_id __rte_unused,
 866                const struct rte_eth_rxconf *rx_conf __rte_unused,
 867                struct rte_mempool *mb_pool)
 868{
 869        struct pmd_internals *internals = dev->data->dev_private;
 870        struct pcap_rx_queue *pcap_q = &internals->rx_queue[rx_queue_id];
 871
 872        pcap_q->mb_pool = mb_pool;
 873        pcap_q->port_id = dev->data->port_id;
 874        pcap_q->queue_id = rx_queue_id;
 875        dev->data->rx_queues[rx_queue_id] = pcap_q;
 876
 877        if (internals->infinite_rx) {
 878                struct pmd_process_private *pp;
 879                char ring_name[RTE_RING_NAMESIZE];
 880                static uint32_t ring_number;
 881                uint64_t pcap_pkt_count = 0;
 882                struct rte_mbuf *bufs[1];
 883                pcap_t **pcap;
 884
 885                pp = rte_eth_devices[pcap_q->port_id].process_private;
 886                pcap = &pp->rx_pcap[pcap_q->queue_id];
 887
 888                if (unlikely(*pcap == NULL))
 889                        return -ENOENT;
 890
 891                pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q);
 892
 893                snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu32,
 894                                ring_number);
 895
 896                pcap_q->pkts = rte_ring_create(ring_name,
 897                                rte_align64pow2(pcap_pkt_count + 1), 0,
 898                                RING_F_SP_ENQ | RING_F_SC_DEQ);
 899                ring_number++;
 900                if (!pcap_q->pkts)
 901                        return -ENOENT;
 902
 903                /* Fill ring with packets from PCAP file one by one. */
 904                while (eth_pcap_rx(pcap_q, bufs, 1)) {
 905                        /* Check for multiseg mbufs. */
 906                        if (bufs[0]->nb_segs != 1) {
 907                                infinite_rx_ring_free(pcap_q->pkts);
 908                                PMD_LOG(ERR,
 909                                        "Multiseg mbufs are not supported in infinite_rx mode.");
 910                                return -EINVAL;
 911                        }
 912
 913                        rte_ring_enqueue_bulk(pcap_q->pkts,
 914                                        (void * const *)bufs, 1, NULL);
 915                }
 916
 917                if (rte_ring_count(pcap_q->pkts) < pcap_pkt_count) {
 918                        infinite_rx_ring_free(pcap_q->pkts);
 919                        PMD_LOG(ERR,
 920                                "Not enough mbufs to accommodate packets in pcap file. "
 921                                "At least %" PRIu64 " mbufs per queue is required.",
 922                                pcap_pkt_count);
 923                        return -EINVAL;
 924                }
 925
 926                /*
 927                 * Reset the stats for this queue since eth_pcap_rx calls above
 928                 * didn't result in the application receiving packets.
 929                 */
 930                pcap_q->rx_stat.pkts = 0;
 931                pcap_q->rx_stat.bytes = 0;
 932        }
 933
 934        return 0;
 935}
 936
 937static int
 938eth_tx_queue_setup(struct rte_eth_dev *dev,
 939                uint16_t tx_queue_id,
 940                uint16_t nb_tx_desc __rte_unused,
 941                unsigned int socket_id __rte_unused,
 942                const struct rte_eth_txconf *tx_conf __rte_unused)
 943{
 944        struct pmd_internals *internals = dev->data->dev_private;
 945        struct pcap_tx_queue *pcap_q = &internals->tx_queue[tx_queue_id];
 946
 947        pcap_q->port_id = dev->data->port_id;
 948        pcap_q->queue_id = tx_queue_id;
 949        dev->data->tx_queues[tx_queue_id] = pcap_q;
 950
 951        return 0;
 952}
 953
 954static int
 955eth_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 956{
 957        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 958
 959        return 0;
 960}
 961
 962static int
 963eth_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 964{
 965        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 966
 967        return 0;
 968}
 969
 970static int
 971eth_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 972{
 973        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 974
 975        return 0;
 976}
 977
 978static int
 979eth_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 980{
 981        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
 982
 983        return 0;
 984}
 985
 986static const struct eth_dev_ops ops = {
 987        .dev_start = eth_dev_start,
 988        .dev_stop = eth_dev_stop,
 989        .dev_close = eth_dev_close,
 990        .dev_configure = eth_dev_configure,
 991        .dev_infos_get = eth_dev_info,
 992        .rx_queue_setup = eth_rx_queue_setup,
 993        .tx_queue_setup = eth_tx_queue_setup,
 994        .rx_queue_start = eth_rx_queue_start,
 995        .tx_queue_start = eth_tx_queue_start,
 996        .rx_queue_stop = eth_rx_queue_stop,
 997        .tx_queue_stop = eth_tx_queue_stop,
 998        .rx_queue_release = eth_queue_release,
 999        .tx_queue_release = eth_queue_release,
1000        .link_update = eth_link_update,
1001        .stats_get = eth_stats_get,
1002        .stats_reset = eth_stats_reset,
1003};
1004
1005static int
1006add_queue(struct pmd_devargs *pmd, const char *name, const char *type,
1007                pcap_t *pcap, pcap_dumper_t *dumper)
1008{
1009        if (pmd->num_of_queue >= RTE_PMD_PCAP_MAX_QUEUES)
1010                return -1;
1011        if (pcap)
1012                pmd->queue[pmd->num_of_queue].pcap = pcap;
1013        if (dumper)
1014                pmd->queue[pmd->num_of_queue].dumper = dumper;
1015        pmd->queue[pmd->num_of_queue].name = name;
1016        pmd->queue[pmd->num_of_queue].type = type;
1017        pmd->num_of_queue++;
1018        return 0;
1019}
1020
1021/*
1022 * Function handler that opens the pcap file for reading a stores a
1023 * reference of it for use it later on.
1024 */
1025static int
1026open_rx_pcap(const char *key, const char *value, void *extra_args)
1027{
1028        const char *pcap_filename = value;
1029        struct pmd_devargs *rx = extra_args;
1030        pcap_t *pcap = NULL;
1031
1032        if (open_single_rx_pcap(pcap_filename, &pcap) < 0)
1033                return -1;
1034
1035        if (add_queue(rx, pcap_filename, key, pcap, NULL) < 0) {
1036                pcap_close(pcap);
1037                return -1;
1038        }
1039
1040        return 0;
1041}
1042
1043/*
1044 * Opens a pcap file for writing and stores a reference to it
1045 * for use it later on.
1046 */
1047static int
1048open_tx_pcap(const char *key, const char *value, void *extra_args)
1049{
1050        const char *pcap_filename = value;
1051        struct pmd_devargs *dumpers = extra_args;
1052        pcap_dumper_t *dumper;
1053
1054        if (open_single_tx_pcap(pcap_filename, &dumper) < 0)
1055                return -1;
1056
1057        if (add_queue(dumpers, pcap_filename, key, NULL, dumper) < 0) {
1058                pcap_dump_close(dumper);
1059                return -1;
1060        }
1061
1062        return 0;
1063}
1064
1065/*
1066 * Opens an interface for reading and writing
1067 */
1068static inline int
1069open_rx_tx_iface(const char *key, const char *value, void *extra_args)
1070{
1071        const char *iface = value;
1072        struct pmd_devargs *tx = extra_args;
1073        pcap_t *pcap = NULL;
1074
1075        if (open_single_iface(iface, &pcap) < 0)
1076                return -1;
1077
1078        tx->queue[0].pcap = pcap;
1079        tx->queue[0].name = iface;
1080        tx->queue[0].type = key;
1081
1082        return 0;
1083}
1084
1085static inline int
1086set_iface_direction(const char *iface, pcap_t *pcap,
1087                pcap_direction_t direction)
1088{
1089        const char *direction_str = (direction == PCAP_D_IN) ? "IN" : "OUT";
1090        if (pcap_setdirection(pcap, direction) < 0) {
1091                PMD_LOG(ERR, "Setting %s pcap direction %s failed - %s\n",
1092                                iface, direction_str, pcap_geterr(pcap));
1093                return -1;
1094        }
1095        PMD_LOG(INFO, "Setting %s pcap direction %s\n",
1096                        iface, direction_str);
1097        return 0;
1098}
1099
1100static inline int
1101open_iface(const char *key, const char *value, void *extra_args)
1102{
1103        const char *iface = value;
1104        struct pmd_devargs *pmd = extra_args;
1105        pcap_t *pcap = NULL;
1106
1107        if (open_single_iface(iface, &pcap) < 0)
1108                return -1;
1109        if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
1110                pcap_close(pcap);
1111                return -1;
1112        }
1113
1114        return 0;
1115}
1116
1117/*
1118 * Opens a NIC for reading packets from it
1119 */
1120static inline int
1121open_rx_iface(const char *key, const char *value, void *extra_args)
1122{
1123        int ret = open_iface(key, value, extra_args);
1124        if (ret < 0)
1125                return ret;
1126        if (strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0) {
1127                struct pmd_devargs *pmd = extra_args;
1128                unsigned int qid = pmd->num_of_queue - 1;
1129
1130                set_iface_direction(pmd->queue[qid].name,
1131                                pmd->queue[qid].pcap,
1132                                PCAP_D_IN);
1133        }
1134
1135        return 0;
1136}
1137
1138static inline int
1139rx_iface_args_process(const char *key, const char *value, void *extra_args)
1140{
1141        if (strcmp(key, ETH_PCAP_RX_IFACE_ARG) == 0 ||
1142                        strcmp(key, ETH_PCAP_RX_IFACE_IN_ARG) == 0)
1143                return open_rx_iface(key, value, extra_args);
1144
1145        return 0;
1146}
1147
1148/*
1149 * Opens a NIC for writing packets to it
1150 */
1151static int
1152open_tx_iface(const char *key, const char *value, void *extra_args)
1153{
1154        return open_iface(key, value, extra_args);
1155}
1156
1157static int
1158select_phy_mac(const char *key __rte_unused, const char *value,
1159                void *extra_args)
1160{
1161        if (extra_args) {
1162                const int phy_mac = atoi(value);
1163                int *enable_phy_mac = extra_args;
1164
1165                if (phy_mac)
1166                        *enable_phy_mac = 1;
1167        }
1168        return 0;
1169}
1170
1171static int
1172get_infinite_rx_arg(const char *key __rte_unused,
1173                const char *value, void *extra_args)
1174{
1175        if (extra_args) {
1176                const int infinite_rx = atoi(value);
1177                int *enable_infinite_rx = extra_args;
1178
1179                if (infinite_rx > 0)
1180                        *enable_infinite_rx = 1;
1181        }
1182        return 0;
1183}
1184
1185static int
1186pmd_init_internals(struct rte_vdev_device *vdev,
1187                const unsigned int nb_rx_queues,
1188                const unsigned int nb_tx_queues,
1189                struct pmd_internals **internals,
1190                struct rte_eth_dev **eth_dev)
1191{
1192        struct rte_eth_dev_data *data;
1193        struct pmd_process_private *pp;
1194        unsigned int numa_node = vdev->device.numa_node;
1195
1196        PMD_LOG(INFO, "Creating pcap-backed ethdev on numa socket %d",
1197                numa_node);
1198
1199        pp = (struct pmd_process_private *)
1200                rte_zmalloc(NULL, sizeof(struct pmd_process_private),
1201                                RTE_CACHE_LINE_SIZE);
1202
1203        if (pp == NULL) {
1204                PMD_LOG(ERR,
1205                        "Failed to allocate memory for process private");
1206                return -1;
1207        }
1208
1209        /* reserve an ethdev entry */
1210        *eth_dev = rte_eth_vdev_allocate(vdev, sizeof(**internals));
1211        if (!(*eth_dev)) {
1212                rte_free(pp);
1213                return -1;
1214        }
1215        (*eth_dev)->process_private = pp;
1216        /* now put it all together
1217         * - store queue data in internals,
1218         * - store numa_node info in eth_dev
1219         * - point eth_dev_data to internals
1220         * - and point eth_dev structure to new eth_dev_data structure
1221         */
1222        *internals = (*eth_dev)->data->dev_private;
1223        /*
1224         * Interface MAC = 02:70:63:61:70:<iface_idx>
1225         * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
1226         * where the middle 4 characters are converted to hex.
1227         */
1228        (*internals)->eth_addr = (struct rte_ether_addr) {
1229                .addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
1230        };
1231        (*internals)->phy_mac = 0;
1232        data = (*eth_dev)->data;
1233        data->nb_rx_queues = (uint16_t)nb_rx_queues;
1234        data->nb_tx_queues = (uint16_t)nb_tx_queues;
1235        data->dev_link = pmd_link;
1236        data->mac_addrs = &(*internals)->eth_addr;
1237        data->promiscuous = 1;
1238        data->all_multicast = 1;
1239        data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1240
1241        /*
1242         * NOTE: we'll replace the data element, of originally allocated
1243         * eth_dev so the rings are local per-process
1244         */
1245        (*eth_dev)->dev_ops = &ops;
1246
1247        strlcpy((*internals)->devargs, rte_vdev_device_args(vdev),
1248                        ETH_PCAP_ARG_MAXLEN);
1249
1250        return 0;
1251}
1252
1253static int
1254eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
1255                const unsigned int numa_node)
1256{
1257        void *mac_addrs;
1258        struct rte_ether_addr mac;
1259
1260        if (osdep_iface_mac_get(if_name, &mac) < 0)
1261                return -1;
1262
1263        mac_addrs = rte_zmalloc_socket(NULL, RTE_ETHER_ADDR_LEN, 0, numa_node);
1264        if (mac_addrs == NULL)
1265                return -1;
1266
1267        PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
1268        rte_memcpy(mac_addrs, mac.addr_bytes, RTE_ETHER_ADDR_LEN);
1269        eth_dev->data->mac_addrs = mac_addrs;
1270        return 0;
1271}
1272
1273static int
1274eth_from_pcaps_common(struct rte_vdev_device *vdev,
1275                struct pmd_devargs_all *devargs_all,
1276                struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
1277{
1278        struct pmd_process_private *pp;
1279        struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1280        struct pmd_devargs *tx_queues = &devargs_all->tx_queues;
1281        const unsigned int nb_rx_queues = rx_queues->num_of_queue;
1282        const unsigned int nb_tx_queues = tx_queues->num_of_queue;
1283        unsigned int i;
1284
1285        if (pmd_init_internals(vdev, nb_rx_queues, nb_tx_queues, internals,
1286                        eth_dev) < 0)
1287                return -1;
1288
1289        pp = (*eth_dev)->process_private;
1290        for (i = 0; i < nb_rx_queues; i++) {
1291                struct pcap_rx_queue *rx = &(*internals)->rx_queue[i];
1292                struct devargs_queue *queue = &rx_queues->queue[i];
1293
1294                pp->rx_pcap[i] = queue->pcap;
1295                strlcpy(rx->name, queue->name, sizeof(rx->name));
1296                strlcpy(rx->type, queue->type, sizeof(rx->type));
1297        }
1298
1299        for (i = 0; i < nb_tx_queues; i++) {
1300                struct pcap_tx_queue *tx = &(*internals)->tx_queue[i];
1301                struct devargs_queue *queue = &tx_queues->queue[i];
1302
1303                pp->tx_dumper[i] = queue->dumper;
1304                pp->tx_pcap[i] = queue->pcap;
1305                strlcpy(tx->name, queue->name, sizeof(tx->name));
1306                strlcpy(tx->type, queue->type, sizeof(tx->type));
1307        }
1308
1309        return 0;
1310}
1311
1312static int
1313eth_from_pcaps(struct rte_vdev_device *vdev,
1314                struct pmd_devargs_all *devargs_all)
1315{
1316        struct pmd_internals *internals = NULL;
1317        struct rte_eth_dev *eth_dev = NULL;
1318        struct pmd_devargs *rx_queues = &devargs_all->rx_queues;
1319        int single_iface = devargs_all->single_iface;
1320        unsigned int infinite_rx = devargs_all->infinite_rx;
1321        int ret;
1322
1323        ret = eth_from_pcaps_common(vdev, devargs_all, &internals, &eth_dev);
1324
1325        if (ret < 0)
1326                return ret;
1327
1328        /* store weather we are using a single interface for rx/tx or not */
1329        internals->single_iface = single_iface;
1330
1331        if (single_iface) {
1332                internals->if_index =
1333                        osdep_iface_index_get(rx_queues->queue[0].name);
1334
1335                /* phy_mac arg is applied only only if "iface" devarg is provided */
1336                if (rx_queues->phy_mac) {
1337                        if (eth_pcap_update_mac(rx_queues->queue[0].name,
1338                                        eth_dev, vdev->device.numa_node) == 0)
1339                                internals->phy_mac = 1;
1340                }
1341        }
1342
1343        internals->infinite_rx = infinite_rx;
1344        /* Assign rx ops. */
1345        if (infinite_rx)
1346                eth_dev->rx_pkt_burst = eth_pcap_rx_infinite;
1347        else if (devargs_all->is_rx_pcap || devargs_all->is_rx_iface ||
1348                        single_iface)
1349                eth_dev->rx_pkt_burst = eth_pcap_rx;
1350        else
1351                eth_dev->rx_pkt_burst = eth_null_rx;
1352
1353        /* Assign tx ops. */
1354        if (devargs_all->is_tx_pcap)
1355                eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1356        else if (devargs_all->is_tx_iface || single_iface)
1357                eth_dev->tx_pkt_burst = eth_pcap_tx;
1358        else
1359                eth_dev->tx_pkt_burst = eth_tx_drop;
1360
1361        rte_eth_dev_probing_finish(eth_dev);
1362        return 0;
1363}
1364
1365static int
1366pmd_pcap_probe(struct rte_vdev_device *dev)
1367{
1368        const char *name;
1369        struct rte_kvargs *kvlist;
1370        struct pmd_devargs pcaps = {0};
1371        struct pmd_devargs dumpers = {0};
1372        struct rte_eth_dev *eth_dev =  NULL;
1373        struct pmd_internals *internal;
1374        int ret = 0;
1375
1376        struct pmd_devargs_all devargs_all = {
1377                .single_iface = 0,
1378                .is_tx_pcap = 0,
1379                .is_tx_iface = 0,
1380                .infinite_rx = 0,
1381        };
1382
1383        name = rte_vdev_device_name(dev);
1384        PMD_LOG(INFO, "Initializing pmd_pcap for %s", name);
1385
1386        timespec_get(&start_time, TIME_UTC);
1387        start_cycles = rte_get_timer_cycles();
1388        hz = rte_get_timer_hz();
1389
1390        ret = rte_mbuf_dyn_rx_timestamp_register(&timestamp_dynfield_offset,
1391                        &timestamp_rx_dynflag);
1392        if (ret != 0) {
1393                PMD_LOG(ERR, "Failed to register Rx timestamp field/flag");
1394                return -1;
1395        }
1396
1397        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1398                eth_dev = rte_eth_dev_attach_secondary(name);
1399                if (!eth_dev) {
1400                        PMD_LOG(ERR, "Failed to probe %s", name);
1401                        return -1;
1402                }
1403
1404                internal = eth_dev->data->dev_private;
1405
1406                kvlist = rte_kvargs_parse(internal->devargs, valid_arguments);
1407                if (kvlist == NULL)
1408                        return -1;
1409        } else {
1410                kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
1411                                valid_arguments);
1412                if (kvlist == NULL)
1413                        return -1;
1414        }
1415
1416        /*
1417         * If iface argument is passed we open the NICs and use them for
1418         * reading / writing
1419         */
1420        if (rte_kvargs_count(kvlist, ETH_PCAP_IFACE_ARG) == 1) {
1421
1422                ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
1423                                &open_rx_tx_iface, &pcaps);
1424                if (ret < 0)
1425                        goto free_kvlist;
1426
1427                dumpers.queue[0] = pcaps.queue[0];
1428
1429                ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
1430                                &select_phy_mac, &pcaps.phy_mac);
1431                if (ret < 0)
1432                        goto free_kvlist;
1433
1434                dumpers.phy_mac = pcaps.phy_mac;
1435
1436                devargs_all.single_iface = 1;
1437                pcaps.num_of_queue = 1;
1438                dumpers.num_of_queue = 1;
1439
1440                goto create_eth;
1441        }
1442
1443        /*
1444         * We check whether we want to open a RX stream from a real NIC, a
1445         * pcap file or open a dummy RX stream
1446         */
1447        devargs_all.is_rx_pcap =
1448                rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
1449        devargs_all.is_rx_iface =
1450                (rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_ARG) +
1451                 rte_kvargs_count(kvlist, ETH_PCAP_RX_IFACE_IN_ARG)) ? 1 : 0;
1452        pcaps.num_of_queue = 0;
1453
1454        devargs_all.is_tx_pcap =
1455                rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
1456        devargs_all.is_tx_iface =
1457                rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG) ? 1 : 0;
1458        dumpers.num_of_queue = 0;
1459
1460        if (devargs_all.is_rx_pcap) {
1461                /*
1462                 * We check whether we want to infinitely rx the pcap file.
1463                 */
1464                unsigned int infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
1465                                ETH_PCAP_INFINITE_RX_ARG);
1466
1467                if (infinite_rx_arg_cnt == 1) {
1468                        ret = rte_kvargs_process(kvlist,
1469                                        ETH_PCAP_INFINITE_RX_ARG,
1470                                        &get_infinite_rx_arg,
1471                                        &devargs_all.infinite_rx);
1472                        if (ret < 0)
1473                                goto free_kvlist;
1474                        PMD_LOG(INFO, "infinite_rx has been %s for %s",
1475                                        devargs_all.infinite_rx ? "enabled" : "disabled",
1476                                        name);
1477
1478                } else if (infinite_rx_arg_cnt > 1) {
1479                        PMD_LOG(WARNING, "infinite_rx has not been enabled since the "
1480                                        "argument has been provided more than once "
1481                                        "for %s", name);
1482                }
1483
1484                ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
1485                                &open_rx_pcap, &pcaps);
1486        } else if (devargs_all.is_rx_iface) {
1487                ret = rte_kvargs_process(kvlist, NULL,
1488                                &rx_iface_args_process, &pcaps);
1489        } else if (devargs_all.is_tx_iface || devargs_all.is_tx_pcap) {
1490                unsigned int i;
1491
1492                /* Count number of tx queue args passed before dummy rx queue
1493                 * creation so a dummy rx queue can be created for each tx queue
1494                 */
1495                unsigned int num_tx_queues =
1496                        (rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) +
1497                        rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG));
1498
1499                PMD_LOG(INFO, "Creating null rx queue since no rx queues were provided.");
1500
1501                /* Creating a dummy rx queue for each tx queue passed */
1502                for (i = 0; i < num_tx_queues; i++)
1503                        ret = add_queue(&pcaps, "dummy_rx", "rx_null", NULL,
1504                                        NULL);
1505        } else {
1506                PMD_LOG(ERR, "Error - No rx or tx queues provided");
1507                ret = -ENOENT;
1508        }
1509        if (ret < 0)
1510                goto free_kvlist;
1511
1512        /*
1513         * We check whether we want to open a TX stream to a real NIC,
1514         * a pcap file, or drop packets on tx
1515         */
1516        if (devargs_all.is_tx_pcap) {
1517                ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
1518                                &open_tx_pcap, &dumpers);
1519        } else if (devargs_all.is_tx_iface) {
1520                ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
1521                                &open_tx_iface, &dumpers);
1522        } else {
1523                unsigned int i;
1524
1525                PMD_LOG(INFO, "Dropping packets on tx since no tx queues were provided.");
1526
1527                /* Add 1 dummy queue per rxq which counts and drops packets. */
1528                for (i = 0; i < pcaps.num_of_queue; i++)
1529                        ret = add_queue(&dumpers, "dummy_tx", "tx_drop", NULL,
1530                                        NULL);
1531        }
1532
1533        if (ret < 0)
1534                goto free_kvlist;
1535
1536create_eth:
1537        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1538                struct pmd_process_private *pp;
1539                unsigned int i;
1540
1541                internal = eth_dev->data->dev_private;
1542                        pp = (struct pmd_process_private *)
1543                                rte_zmalloc(NULL,
1544                                        sizeof(struct pmd_process_private),
1545                                        RTE_CACHE_LINE_SIZE);
1546
1547                if (pp == NULL) {
1548                        PMD_LOG(ERR,
1549                                "Failed to allocate memory for process private");
1550                        ret = -1;
1551                        goto free_kvlist;
1552                }
1553
1554                eth_dev->dev_ops = &ops;
1555                eth_dev->device = &dev->device;
1556
1557                /* setup process private */
1558                for (i = 0; i < pcaps.num_of_queue; i++)
1559                        pp->rx_pcap[i] = pcaps.queue[i].pcap;
1560
1561                for (i = 0; i < dumpers.num_of_queue; i++) {
1562                        pp->tx_dumper[i] = dumpers.queue[i].dumper;
1563                        pp->tx_pcap[i] = dumpers.queue[i].pcap;
1564                }
1565
1566                eth_dev->process_private = pp;
1567                eth_dev->rx_pkt_burst = eth_pcap_rx;
1568                if (devargs_all.is_tx_pcap)
1569                        eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
1570                else
1571                        eth_dev->tx_pkt_burst = eth_pcap_tx;
1572
1573                rte_eth_dev_probing_finish(eth_dev);
1574                goto free_kvlist;
1575        }
1576
1577        devargs_all.rx_queues = pcaps;
1578        devargs_all.tx_queues = dumpers;
1579
1580        ret = eth_from_pcaps(dev, &devargs_all);
1581
1582free_kvlist:
1583        rte_kvargs_free(kvlist);
1584
1585        return ret;
1586}
1587
1588static int
1589pmd_pcap_remove(struct rte_vdev_device *dev)
1590{
1591        struct rte_eth_dev *eth_dev = NULL;
1592
1593        if (!dev)
1594                return -1;
1595
1596        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
1597        if (eth_dev == NULL)
1598                return 0; /* port already released */
1599
1600        eth_dev_close(eth_dev);
1601        rte_eth_dev_release_port(eth_dev);
1602
1603        return 0;
1604}
1605
1606static struct rte_vdev_driver pmd_pcap_drv = {
1607        .probe = pmd_pcap_probe,
1608        .remove = pmd_pcap_remove,
1609};
1610
1611RTE_PMD_REGISTER_VDEV(net_pcap, pmd_pcap_drv);
1612RTE_PMD_REGISTER_ALIAS(net_pcap, eth_pcap);
1613RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
1614        ETH_PCAP_RX_PCAP_ARG "=<string> "
1615        ETH_PCAP_TX_PCAP_ARG "=<string> "
1616        ETH_PCAP_RX_IFACE_ARG "=<ifc> "
1617        ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
1618        ETH_PCAP_TX_IFACE_ARG "=<ifc> "
1619        ETH_PCAP_IFACE_ARG "=<ifc> "
1620        ETH_PCAP_PHY_MAC_ARG "=<int>"
1621        ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
1622