dpdk/examples/l3fwd-power/main.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2018 Intel Corporation
   3 */
   4
   5#include <stdio.h>
   6#include <stdlib.h>
   7#include <stdint.h>
   8#include <inttypes.h>
   9#include <sys/types.h>
  10#include <string.h>
  11#include <sys/queue.h>
  12#include <stdarg.h>
  13#include <errno.h>
  14#include <getopt.h>
  15#include <unistd.h>
  16#include <signal.h>
  17#include <math.h>
  18
  19#include <rte_common.h>
  20#include <rte_byteorder.h>
  21#include <rte_log.h>
  22#include <rte_malloc.h>
  23#include <rte_memory.h>
  24#include <rte_memcpy.h>
  25#include <rte_eal.h>
  26#include <rte_launch.h>
  27#include <rte_cycles.h>
  28#include <rte_prefetch.h>
  29#include <rte_lcore.h>
  30#include <rte_per_lcore.h>
  31#include <rte_branch_prediction.h>
  32#include <rte_interrupts.h>
  33#include <rte_random.h>
  34#include <rte_debug.h>
  35#include <rte_ether.h>
  36#include <rte_ethdev.h>
  37#include <rte_mempool.h>
  38#include <rte_mbuf.h>
  39#include <rte_ip.h>
  40#include <rte_tcp.h>
  41#include <rte_udp.h>
  42#include <rte_string_fns.h>
  43#include <rte_timer.h>
  44#include <rte_power.h>
  45#include <rte_spinlock.h>
  46#include <rte_power_empty_poll.h>
  47#include <rte_metrics.h>
  48#include <rte_telemetry.h>
  49#include <rte_power_pmd_mgmt.h>
  50
  51#include "perf_core.h"
  52#include "main.h"
  53
  54#define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
  55
  56#define MAX_PKT_BURST 32
  57
  58#define MIN_ZERO_POLL_COUNT 10
  59
  60/* 100 ms interval */
  61#define TIMER_NUMBER_PER_SECOND           10
  62/* (10ms) */
  63#define INTERVALS_PER_SECOND             100
  64/* 100000 us */
  65#define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
  66#define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
  67
  68#define APP_LOOKUP_EXACT_MATCH          0
  69#define APP_LOOKUP_LPM                  1
  70#define DO_RFC_1812_CHECKS
  71
  72#ifndef APP_LOOKUP_METHOD
  73#define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
  74#endif
  75
  76#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
  77#include <rte_hash.h>
  78#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
  79#include <rte_lpm.h>
  80#else
  81#error "APP_LOOKUP_METHOD set to incorrect value"
  82#endif
  83
  84#ifndef IPv6_BYTES
  85#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
  86                       "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
  87#define IPv6_BYTES(addr) \
  88        addr[0],  addr[1], addr[2],  addr[3], \
  89        addr[4],  addr[5], addr[6],  addr[7], \
  90        addr[8],  addr[9], addr[10], addr[11],\
  91        addr[12], addr[13],addr[14], addr[15]
  92#endif
  93
  94#define MAX_JUMBO_PKT_LEN  9600
  95
  96#define IPV6_ADDR_LEN 16
  97
  98#define MEMPOOL_CACHE_SIZE 256
  99
 100/*
 101 * This expression is used to calculate the number of mbufs needed depending on
 102 * user input, taking into account memory for rx and tx hardware rings, cache
 103 * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
 104 * NB_MBUF never goes below a minimum value of 8192.
 105 */
 106
 107#define NB_MBUF RTE_MAX ( \
 108        (nb_ports*nb_rx_queue*nb_rxd + \
 109        nb_ports*nb_lcores*MAX_PKT_BURST + \
 110        nb_ports*n_tx_queue*nb_txd + \
 111        nb_lcores*MEMPOOL_CACHE_SIZE), \
 112        (unsigned)8192)
 113
 114#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
 115
 116#define NB_SOCKETS 8
 117
 118/* Configure how many packets ahead to prefetch, when reading packets */
 119#define PREFETCH_OFFSET 3
 120
 121/*
 122 * Configurable number of RX/TX ring descriptors
 123 */
 124#define RTE_TEST_RX_DESC_DEFAULT 1024
 125#define RTE_TEST_TX_DESC_DEFAULT 1024
 126
 127/*
 128 * These two thresholds were decided on by running the training algorithm on
 129 * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values
 130 * for the med_threshold and high_threshold parameters on the command line.
 131 */
 132#define EMPTY_POLL_MED_THRESHOLD 350000UL
 133#define EMPTY_POLL_HGH_THRESHOLD 580000UL
 134
 135#define NUM_TELSTATS RTE_DIM(telstats_strings)
 136
 137static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
 138static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
 139
 140/* ethernet addresses of ports */
 141static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
 142
 143/* ethernet addresses of ports */
 144static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
 145
 146/* mask of enabled ports */
 147static uint32_t enabled_port_mask = 0;
 148/* Ports set in promiscuous mode off by default. */
 149static int promiscuous_on = 0;
 150/* NUMA is enabled by default. */
 151static int numa_on = 1;
 152static bool empty_poll_stop;
 153static bool empty_poll_train;
 154volatile bool quit_signal;
 155static struct  ep_params *ep_params;
 156static struct  ep_policy policy;
 157static long  ep_med_edpi, ep_hgh_edpi;
 158/* timer to update telemetry every 500ms */
 159static struct rte_timer telemetry_timer;
 160
 161/* stats index returned by metrics lib */
 162int telstats_index;
 163
 164struct telstats_name {
 165        char name[RTE_ETH_XSTATS_NAME_SIZE];
 166};
 167
 168/* telemetry stats to be reported */
 169const struct telstats_name telstats_strings[] = {
 170        {"empty_poll"},
 171        {"full_poll"},
 172        {"busy_percent"}
 173};
 174
 175/* core busyness in percentage */
 176enum busy_rate {
 177        ZERO = 0,
 178        PARTIAL = 50,
 179        FULL = 100
 180};
 181
 182/* reference poll count to measure core busyness */
 183#define DEFAULT_COUNT 10000
 184/*
 185 * reference CYCLES to be used to
 186 * measure core busyness based on poll count
 187 */
 188#define MIN_CYCLES  1500000ULL
 189#define MAX_CYCLES 22000000ULL
 190
 191/* (500ms) */
 192#define TELEMETRY_INTERVALS_PER_SEC 2
 193
 194static int parse_ptype; /**< Parse packet type using rx callback, and */
 195                        /**< disabled by default */
 196
 197enum appmode {
 198        APP_MODE_DEFAULT = 0,
 199        APP_MODE_LEGACY,
 200        APP_MODE_EMPTY_POLL,
 201        APP_MODE_TELEMETRY,
 202        APP_MODE_INTERRUPT,
 203        APP_MODE_PMD_MGMT
 204};
 205
 206enum appmode app_mode;
 207
 208static enum rte_power_pmd_mgmt_type pmgmt_type;
 209bool baseline_enabled;
 210
 211enum freq_scale_hint_t
 212{
 213        FREQ_LOWER    =      -1,
 214        FREQ_CURRENT  =       0,
 215        FREQ_HIGHER   =       1,
 216        FREQ_HIGHEST  =       2
 217};
 218
 219struct lcore_rx_queue {
 220        uint16_t port_id;
 221        uint8_t queue_id;
 222        enum freq_scale_hint_t freq_up_hint;
 223        uint32_t zero_rx_packet_count;
 224        uint32_t idle_hint;
 225} __rte_cache_aligned;
 226
 227#define MAX_RX_QUEUE_PER_LCORE 16
 228#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
 229#define MAX_RX_QUEUE_PER_PORT 128
 230
 231#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
 232
 233
 234struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
 235static struct lcore_params lcore_params_array_default[] = {
 236        {0, 0, 2},
 237        {0, 1, 2},
 238        {0, 2, 2},
 239        {1, 0, 2},
 240        {1, 1, 2},
 241        {1, 2, 2},
 242        {2, 0, 2},
 243        {3, 0, 3},
 244        {3, 1, 3},
 245};
 246
 247struct lcore_params *lcore_params = lcore_params_array_default;
 248uint16_t nb_lcore_params = RTE_DIM(lcore_params_array_default);
 249
 250static struct rte_eth_conf port_conf = {
 251        .rxmode = {
 252                .mq_mode        = RTE_ETH_MQ_RX_RSS,
 253                .split_hdr_size = 0,
 254                .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM,
 255        },
 256        .rx_adv_conf = {
 257                .rss_conf = {
 258                        .rss_key = NULL,
 259                        .rss_hf = RTE_ETH_RSS_UDP,
 260                },
 261        },
 262        .txmode = {
 263                .mq_mode = RTE_ETH_MQ_TX_NONE,
 264        }
 265};
 266
 267static uint32_t max_pkt_len;
 268
 269static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
 270
 271
 272#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
 273
 274#ifdef RTE_ARCH_X86
 275#include <rte_hash_crc.h>
 276#define DEFAULT_HASH_FUNC       rte_hash_crc
 277#else
 278#include <rte_jhash.h>
 279#define DEFAULT_HASH_FUNC       rte_jhash
 280#endif
 281
 282struct ipv4_5tuple {
 283        uint32_t ip_dst;
 284        uint32_t ip_src;
 285        uint16_t port_dst;
 286        uint16_t port_src;
 287        uint8_t  proto;
 288} __rte_packed;
 289
 290struct ipv6_5tuple {
 291        uint8_t  ip_dst[IPV6_ADDR_LEN];
 292        uint8_t  ip_src[IPV6_ADDR_LEN];
 293        uint16_t port_dst;
 294        uint16_t port_src;
 295        uint8_t  proto;
 296} __rte_packed;
 297
 298struct ipv4_l3fwd_route {
 299        struct ipv4_5tuple key;
 300        uint8_t if_out;
 301};
 302
 303struct ipv6_l3fwd_route {
 304        struct ipv6_5tuple key;
 305        uint8_t if_out;
 306};
 307
 308static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
 309        {{RTE_IPV4(100,10,0,1), RTE_IPV4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
 310        {{RTE_IPV4(100,20,0,2), RTE_IPV4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
 311        {{RTE_IPV4(100,30,0,3), RTE_IPV4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
 312        {{RTE_IPV4(100,40,0,4), RTE_IPV4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
 313};
 314
 315static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
 316        {
 317                {
 318                        {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 319                         0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
 320                        {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 321                         0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a},
 322                         1, 10, IPPROTO_UDP
 323                }, 4
 324        },
 325};
 326
 327typedef struct rte_hash lookup_struct_t;
 328static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
 329static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
 330
 331#define L3FWD_HASH_ENTRIES      1024
 332
 333static uint16_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
 334static uint16_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
 335#endif
 336
 337#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
 338struct ipv4_l3fwd_route {
 339        uint32_t ip;
 340        uint8_t  depth;
 341        uint8_t  if_out;
 342};
 343
 344static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
 345        {RTE_IPV4(1,1,1,0), 24, 0},
 346        {RTE_IPV4(2,1,1,0), 24, 1},
 347        {RTE_IPV4(3,1,1,0), 24, 2},
 348        {RTE_IPV4(4,1,1,0), 24, 3},
 349        {RTE_IPV4(5,1,1,0), 24, 4},
 350        {RTE_IPV4(6,1,1,0), 24, 5},
 351        {RTE_IPV4(7,1,1,0), 24, 6},
 352        {RTE_IPV4(8,1,1,0), 24, 7},
 353};
 354
 355#define IPV4_L3FWD_LPM_MAX_RULES     1024
 356
 357typedef struct rte_lpm lookup_struct_t;
 358static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
 359#endif
 360
 361struct lcore_conf {
 362        uint16_t n_rx_queue;
 363        struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
 364        uint16_t n_tx_port;
 365        uint16_t tx_port_id[RTE_MAX_ETHPORTS];
 366        uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
 367        struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
 368        lookup_struct_t * ipv4_lookup_struct;
 369        lookup_struct_t * ipv6_lookup_struct;
 370} __rte_cache_aligned;
 371
 372struct lcore_stats {
 373        /* total sleep time in ms since last frequency scaling down */
 374        uint32_t sleep_time;
 375        /* number of long sleep recently */
 376        uint32_t nb_long_sleep;
 377        /* freq. scaling up trend */
 378        uint32_t trend;
 379        /* total packet processed recently */
 380        uint64_t nb_rx_processed;
 381        /* total iterations looped recently */
 382        uint64_t nb_iteration_looped;
 383        /*
 384         * Represents empty and non empty polls
 385         * of rte_eth_rx_burst();
 386         * ep_nep[0] holds non empty polls
 387         * i.e. 0 < nb_rx <= MAX_BURST
 388         * ep_nep[1] holds empty polls.
 389         * i.e. nb_rx == 0
 390         */
 391        uint64_t ep_nep[2];
 392        /*
 393         * Represents full and empty+partial
 394         * polls of rte_eth_rx_burst();
 395         * ep_nep[0] holds empty+partial polls.
 396         * i.e. 0 <= nb_rx < MAX_BURST
 397         * ep_nep[1] holds full polls
 398         * i.e. nb_rx == MAX_BURST
 399         */
 400        uint64_t fp_nfp[2];
 401        enum busy_rate br;
 402        rte_spinlock_t telemetry_lock;
 403} __rte_cache_aligned;
 404
 405static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned;
 406static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned;
 407static struct rte_timer power_timers[RTE_MAX_LCORE];
 408
 409static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
 410static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
 411                unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
 412
 413
 414/*
 415 * These defaults are using the max frequency index (1), a medium index (9)
 416 * and a typical low frequency index (14). These can be adjusted to use
 417 * different indexes using the relevant command line parameters.
 418 */
 419static uint8_t  freq_tlb[] = {14, 9, 1};
 420
 421static int is_done(void)
 422{
 423        return quit_signal;
 424}
 425
 426/* exit signal handler */
 427static void
 428signal_exit_now(int sigtype)
 429{
 430
 431        if (sigtype == SIGINT)
 432                quit_signal = true;
 433
 434}
 435
 436/*  Freqency scale down timer callback */
 437static void
 438power_timer_cb(__rte_unused struct rte_timer *tim,
 439                          __rte_unused void *arg)
 440{
 441        uint64_t hz;
 442        float sleep_time_ratio;
 443        unsigned lcore_id = rte_lcore_id();
 444
 445        /* accumulate total execution time in us when callback is invoked */
 446        sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
 447                                        (float)SCALING_PERIOD;
 448        /**
 449         * check whether need to scale down frequency a step if it sleep a lot.
 450         */
 451        if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
 452                if (rte_power_freq_down)
 453                        rte_power_freq_down(lcore_id);
 454        }
 455        else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
 456                stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
 457                /**
 458                 * scale down a step if average packet per iteration less
 459                 * than expectation.
 460                 */
 461                if (rte_power_freq_down)
 462                        rte_power_freq_down(lcore_id);
 463        }
 464
 465        /**
 466         * initialize another timer according to current frequency to ensure
 467         * timer interval is relatively fixed.
 468         */
 469        hz = rte_get_timer_hz();
 470        rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND,
 471                                SINGLE, lcore_id, power_timer_cb, NULL);
 472
 473        stats[lcore_id].nb_rx_processed = 0;
 474        stats[lcore_id].nb_iteration_looped = 0;
 475
 476        stats[lcore_id].sleep_time = 0;
 477}
 478
 479/* Enqueue a single packet, and send burst if queue is filled */
 480static inline int
 481send_single_packet(struct rte_mbuf *m, uint16_t port)
 482{
 483        uint32_t lcore_id;
 484        struct lcore_conf *qconf;
 485
 486        lcore_id = rte_lcore_id();
 487        qconf = &lcore_conf[lcore_id];
 488
 489        rte_eth_tx_buffer(port, qconf->tx_queue_id[port],
 490                        qconf->tx_buffer[port], m);
 491
 492        return 0;
 493}
 494
 495#ifdef DO_RFC_1812_CHECKS
 496static inline int
 497is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len)
 498{
 499        /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
 500        /*
 501         * 1. The packet length reported by the Link Layer must be large
 502         * enough to hold the minimum length legal IP datagram (20 bytes).
 503         */
 504        if (link_len < sizeof(struct rte_ipv4_hdr))
 505                return -1;
 506
 507        /* 2. The IP checksum must be correct. */
 508        /* if this is not checked in H/W, check it. */
 509        if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) {
 510                uint16_t actual_cksum, expected_cksum;
 511                actual_cksum = pkt->hdr_checksum;
 512                pkt->hdr_checksum = 0;
 513                expected_cksum = rte_ipv4_cksum(pkt);
 514                if (actual_cksum != expected_cksum)
 515                        return -2;
 516        }
 517
 518        /*
 519         * 3. The IP version number must be 4. If the version number is not 4
 520         * then the packet may be another version of IP, such as IPng or
 521         * ST-II.
 522         */
 523        if (((pkt->version_ihl) >> 4) != 4)
 524                return -3;
 525        /*
 526         * 4. The IP header length field must be large enough to hold the
 527         * minimum length legal IP datagram (20 bytes = 5 words).
 528         */
 529        if ((pkt->version_ihl & 0xf) < 5)
 530                return -4;
 531
 532        /*
 533         * 5. The IP total length field must be large enough to hold the IP
 534         * datagram header, whose length is specified in the IP header length
 535         * field.
 536         */
 537        if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct rte_ipv4_hdr))
 538                return -5;
 539
 540        return 0;
 541}
 542#endif
 543
 544#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
 545static void
 546print_ipv4_key(struct ipv4_5tuple key)
 547{
 548        printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, "
 549                "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src,
 550                                key.port_dst, key.port_src, key.proto);
 551}
 552static void
 553print_ipv6_key(struct ipv6_5tuple key)
 554{
 555        printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", "
 556                "port dst = %d, port src = %d, proto = %d\n",
 557                IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src),
 558                key.port_dst, key.port_src, key.proto);
 559}
 560
 561static inline uint16_t
 562get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid,
 563                lookup_struct_t * ipv4_l3fwd_lookup_struct)
 564{
 565        struct ipv4_5tuple key;
 566        struct rte_tcp_hdr *tcp;
 567        struct rte_udp_hdr *udp;
 568        int ret = 0;
 569
 570        key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
 571        key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
 572        key.proto = ipv4_hdr->next_proto_id;
 573
 574        switch (ipv4_hdr->next_proto_id) {
 575        case IPPROTO_TCP:
 576                tcp = (struct rte_tcp_hdr *)((unsigned char *)ipv4_hdr +
 577                                        sizeof(struct rte_ipv4_hdr));
 578                key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
 579                key.port_src = rte_be_to_cpu_16(tcp->src_port);
 580                break;
 581
 582        case IPPROTO_UDP:
 583                udp = (struct rte_udp_hdr *)((unsigned char *)ipv4_hdr +
 584                                        sizeof(struct rte_ipv4_hdr));
 585                key.port_dst = rte_be_to_cpu_16(udp->dst_port);
 586                key.port_src = rte_be_to_cpu_16(udp->src_port);
 587                break;
 588
 589        default:
 590                key.port_dst = 0;
 591                key.port_src = 0;
 592                break;
 593        }
 594
 595        /* Find destination port */
 596        ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
 597        return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
 598}
 599
 600static inline uint16_t
 601get_ipv6_dst_port(struct rte_ipv6_hdr *ipv6_hdr, uint16_t portid,
 602                        lookup_struct_t *ipv6_l3fwd_lookup_struct)
 603{
 604        struct ipv6_5tuple key;
 605        struct rte_tcp_hdr *tcp;
 606        struct rte_udp_hdr *udp;
 607        int ret = 0;
 608
 609        memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN);
 610        memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN);
 611
 612        key.proto = ipv6_hdr->proto;
 613
 614        switch (ipv6_hdr->proto) {
 615        case IPPROTO_TCP:
 616                tcp = (struct rte_tcp_hdr *)((unsigned char *) ipv6_hdr +
 617                                        sizeof(struct rte_ipv6_hdr));
 618                key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
 619                key.port_src = rte_be_to_cpu_16(tcp->src_port);
 620                break;
 621
 622        case IPPROTO_UDP:
 623                udp = (struct rte_udp_hdr *)((unsigned char *) ipv6_hdr +
 624                                        sizeof(struct rte_ipv6_hdr));
 625                key.port_dst = rte_be_to_cpu_16(udp->dst_port);
 626                key.port_src = rte_be_to_cpu_16(udp->src_port);
 627                break;
 628
 629        default:
 630                key.port_dst = 0;
 631                key.port_src = 0;
 632                break;
 633        }
 634
 635        /* Find destination port */
 636        ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
 637        return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
 638}
 639#endif
 640
 641#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
 642static inline uint16_t
 643get_ipv4_dst_port(struct rte_ipv4_hdr *ipv4_hdr, uint16_t portid,
 644                lookup_struct_t *ipv4_l3fwd_lookup_struct)
 645{
 646        uint32_t next_hop;
 647
 648        return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
 649                        rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
 650                        next_hop : portid);
 651}
 652#endif
 653
 654static inline void
 655parse_ptype_one(struct rte_mbuf *m)
 656{
 657        struct rte_ether_hdr *eth_hdr;
 658        uint32_t packet_type = RTE_PTYPE_UNKNOWN;
 659        uint16_t ether_type;
 660
 661        eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 662        ether_type = eth_hdr->ether_type;
 663        if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4))
 664                packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
 665        else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6))
 666                packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
 667
 668        m->packet_type = packet_type;
 669}
 670
 671static uint16_t
 672cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
 673               struct rte_mbuf *pkts[], uint16_t nb_pkts,
 674               uint16_t max_pkts __rte_unused,
 675               void *user_param __rte_unused)
 676{
 677        unsigned int i;
 678
 679        for (i = 0; i < nb_pkts; ++i)
 680                parse_ptype_one(pkts[i]);
 681
 682        return nb_pkts;
 683}
 684
 685static int
 686add_cb_parse_ptype(uint16_t portid, uint16_t queueid)
 687{
 688        printf("Port %d: softly parse packet type info\n", portid);
 689        if (rte_eth_add_rx_callback(portid, queueid, cb_parse_ptype, NULL))
 690                return 0;
 691
 692        printf("Failed to add rx callback: port=%d\n", portid);
 693        return -1;
 694}
 695
 696static inline void
 697l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid,
 698                                struct lcore_conf *qconf)
 699{
 700        struct rte_ether_hdr *eth_hdr;
 701        struct rte_ipv4_hdr *ipv4_hdr;
 702        void *d_addr_bytes;
 703        uint16_t dst_port;
 704
 705        eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 706
 707        if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
 708                /* Handle IPv4 headers.*/
 709                ipv4_hdr =
 710                        rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
 711                                                sizeof(struct rte_ether_hdr));
 712
 713#ifdef DO_RFC_1812_CHECKS
 714                /* Check to make sure the packet is valid (RFC1812) */
 715                if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
 716                        rte_pktmbuf_free(m);
 717                        return;
 718                }
 719#endif
 720
 721                dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
 722                                        qconf->ipv4_lookup_struct);
 723                if (dst_port >= RTE_MAX_ETHPORTS ||
 724                                (enabled_port_mask & 1 << dst_port) == 0)
 725                        dst_port = portid;
 726
 727                /* 02:00:00:00:00:xx */
 728                d_addr_bytes = &eth_hdr->dst_addr.addr_bytes[0];
 729                *((uint64_t *)d_addr_bytes) =
 730                        0x000000000002 + ((uint64_t)dst_port << 40);
 731
 732#ifdef DO_RFC_1812_CHECKS
 733                /* Update time to live and header checksum */
 734                --(ipv4_hdr->time_to_live);
 735                ++(ipv4_hdr->hdr_checksum);
 736#endif
 737
 738                /* src addr */
 739                rte_ether_addr_copy(&ports_eth_addr[dst_port],
 740                                &eth_hdr->src_addr);
 741
 742                send_single_packet(m, dst_port);
 743        } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
 744                /* Handle IPv6 headers.*/
 745#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
 746                struct rte_ipv6_hdr *ipv6_hdr;
 747
 748                ipv6_hdr =
 749                        rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
 750                                                sizeof(struct rte_ether_hdr));
 751
 752                dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
 753                                        qconf->ipv6_lookup_struct);
 754
 755                if (dst_port >= RTE_MAX_ETHPORTS ||
 756                                (enabled_port_mask & 1 << dst_port) == 0)
 757                        dst_port = portid;
 758
 759                /* 02:00:00:00:00:xx */
 760                d_addr_bytes = &eth_hdr->dst_addr.addr_bytes[0];
 761                *((uint64_t *)d_addr_bytes) =
 762                        0x000000000002 + ((uint64_t)dst_port << 40);
 763
 764                /* src addr */
 765                rte_ether_addr_copy(&ports_eth_addr[dst_port],
 766                                &eth_hdr->src_addr);
 767
 768                send_single_packet(m, dst_port);
 769#else
 770                /* We don't currently handle IPv6 packets in LPM mode. */
 771                rte_pktmbuf_free(m);
 772#endif
 773        } else
 774                rte_pktmbuf_free(m);
 775
 776}
 777
 778#define MINIMUM_SLEEP_TIME         1
 779#define SUSPEND_THRESHOLD          300
 780
 781static inline uint32_t
 782power_idle_heuristic(uint32_t zero_rx_packet_count)
 783{
 784        /* If zero count is less than 100,  sleep 1us */
 785        if (zero_rx_packet_count < SUSPEND_THRESHOLD)
 786                return MINIMUM_SLEEP_TIME;
 787        /* If zero count is less than 1000, sleep 100 us which is the
 788                minimum latency switching from C3/C6 to C0
 789        */
 790        else
 791                return SUSPEND_THRESHOLD;
 792}
 793
 794static inline enum freq_scale_hint_t
 795power_freq_scaleup_heuristic(unsigned lcore_id,
 796                             uint16_t port_id,
 797                             uint16_t queue_id)
 798{
 799        uint32_t rxq_count = rte_eth_rx_queue_count(port_id, queue_id);
 800/**
 801 * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries
 802 * per iteration
 803 */
 804#define FREQ_GEAR1_RX_PACKET_THRESHOLD             MAX_PKT_BURST
 805#define FREQ_GEAR2_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*2)
 806#define FREQ_GEAR3_RX_PACKET_THRESHOLD             (MAX_PKT_BURST*3)
 807#define FREQ_UP_TREND1_ACC   1
 808#define FREQ_UP_TREND2_ACC   100
 809#define FREQ_UP_THRESHOLD    10000
 810
 811        if (likely(rxq_count > FREQ_GEAR3_RX_PACKET_THRESHOLD)) {
 812                stats[lcore_id].trend = 0;
 813                return FREQ_HIGHEST;
 814        } else if (likely(rxq_count > FREQ_GEAR2_RX_PACKET_THRESHOLD))
 815                stats[lcore_id].trend += FREQ_UP_TREND2_ACC;
 816        else if (likely(rxq_count > FREQ_GEAR1_RX_PACKET_THRESHOLD))
 817                stats[lcore_id].trend += FREQ_UP_TREND1_ACC;
 818
 819        if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) {
 820                stats[lcore_id].trend = 0;
 821                return FREQ_HIGHER;
 822        }
 823
 824        return FREQ_CURRENT;
 825}
 826
 827/**
 828 * force polling thread sleep until one-shot rx interrupt triggers
 829 * @param port_id
 830 *  Port id.
 831 * @param queue_id
 832 *  Rx queue id.
 833 * @return
 834 *  0 on success
 835 */
 836static int
 837sleep_until_rx_interrupt(int num, int lcore)
 838{
 839        /*
 840         * we want to track when we are woken up by traffic so that we can go
 841         * back to sleep again without log spamming. Avoid cache line sharing
 842         * to prevent threads stepping on each others' toes.
 843         */
 844        static struct {
 845                bool wakeup;
 846        } __rte_cache_aligned status[RTE_MAX_LCORE];
 847        struct rte_epoll_event event[num];
 848        int n, i;
 849        uint16_t port_id;
 850        uint8_t queue_id;
 851        void *data;
 852
 853        if (status[lcore].wakeup) {
 854                RTE_LOG(INFO, L3FWD_POWER,
 855                                "lcore %u sleeps until interrupt triggers\n",
 856                                rte_lcore_id());
 857        }
 858
 859        n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, 10);
 860        for (i = 0; i < n; i++) {
 861                data = event[i].epdata.data;
 862                port_id = ((uintptr_t)data) >> CHAR_BIT;
 863                queue_id = ((uintptr_t)data) &
 864                        RTE_LEN2MASK(CHAR_BIT, uint8_t);
 865                RTE_LOG(INFO, L3FWD_POWER,
 866                        "lcore %u is waked up from rx interrupt on"
 867                        " port %d queue %d\n",
 868                        rte_lcore_id(), port_id, queue_id);
 869        }
 870        status[lcore].wakeup = n != 0;
 871
 872        return 0;
 873}
 874
 875static void turn_on_off_intr(struct lcore_conf *qconf, bool on)
 876{
 877        int i;
 878        struct lcore_rx_queue *rx_queue;
 879        uint8_t queue_id;
 880        uint16_t port_id;
 881
 882        for (i = 0; i < qconf->n_rx_queue; ++i) {
 883                rx_queue = &(qconf->rx_queue_list[i]);
 884                port_id = rx_queue->port_id;
 885                queue_id = rx_queue->queue_id;
 886
 887                rte_spinlock_lock(&(locks[port_id]));
 888                if (on)
 889                        rte_eth_dev_rx_intr_enable(port_id, queue_id);
 890                else
 891                        rte_eth_dev_rx_intr_disable(port_id, queue_id);
 892                rte_spinlock_unlock(&(locks[port_id]));
 893        }
 894}
 895
 896static int event_register(struct lcore_conf *qconf)
 897{
 898        struct lcore_rx_queue *rx_queue;
 899        uint8_t queueid;
 900        uint16_t portid;
 901        uint32_t data;
 902        int ret;
 903        int i;
 904
 905        for (i = 0; i < qconf->n_rx_queue; ++i) {
 906                rx_queue = &(qconf->rx_queue_list[i]);
 907                portid = rx_queue->port_id;
 908                queueid = rx_queue->queue_id;
 909                data = portid << CHAR_BIT | queueid;
 910
 911                ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
 912                                                RTE_EPOLL_PER_THREAD,
 913                                                RTE_INTR_EVENT_ADD,
 914                                                (void *)((uintptr_t)data));
 915                if (ret)
 916                        return ret;
 917        }
 918
 919        return 0;
 920}
 921
 922/* Main processing loop. 8< */
 923static int main_intr_loop(__rte_unused void *dummy)
 924{
 925        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 926        unsigned int lcore_id;
 927        uint64_t prev_tsc, diff_tsc, cur_tsc;
 928        int i, j, nb_rx;
 929        uint8_t queueid;
 930        uint16_t portid;
 931        struct lcore_conf *qconf;
 932        struct lcore_rx_queue *rx_queue;
 933        uint32_t lcore_rx_idle_count = 0;
 934        uint32_t lcore_idle_hint = 0;
 935        int intr_en = 0;
 936
 937        const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
 938                                   US_PER_S * BURST_TX_DRAIN_US;
 939
 940        prev_tsc = 0;
 941
 942        lcore_id = rte_lcore_id();
 943        qconf = &lcore_conf[lcore_id];
 944
 945        if (qconf->n_rx_queue == 0) {
 946                RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n",
 947                                lcore_id);
 948                return 0;
 949        }
 950
 951        RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n",
 952                        lcore_id);
 953
 954        for (i = 0; i < qconf->n_rx_queue; i++) {
 955                portid = qconf->rx_queue_list[i].port_id;
 956                queueid = qconf->rx_queue_list[i].queue_id;
 957                RTE_LOG(INFO, L3FWD_POWER,
 958                                " -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
 959                                lcore_id, portid, queueid);
 960        }
 961
 962        /* add into event wait list */
 963        if (event_register(qconf) == 0)
 964                intr_en = 1;
 965        else
 966                RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
 967
 968        while (!is_done()) {
 969                stats[lcore_id].nb_iteration_looped++;
 970
 971                cur_tsc = rte_rdtsc();
 972
 973                /*
 974                 * TX burst queue drain
 975                 */
 976                diff_tsc = cur_tsc - prev_tsc;
 977                if (unlikely(diff_tsc > drain_tsc)) {
 978                        for (i = 0; i < qconf->n_tx_port; ++i) {
 979                                portid = qconf->tx_port_id[i];
 980                                rte_eth_tx_buffer_flush(portid,
 981                                                qconf->tx_queue_id[portid],
 982                                                qconf->tx_buffer[portid]);
 983                        }
 984                        prev_tsc = cur_tsc;
 985                }
 986
 987start_rx:
 988                /*
 989                 * Read packet from RX queues
 990                 */
 991                lcore_rx_idle_count = 0;
 992                for (i = 0; i < qconf->n_rx_queue; ++i) {
 993                        rx_queue = &(qconf->rx_queue_list[i]);
 994                        rx_queue->idle_hint = 0;
 995                        portid = rx_queue->port_id;
 996                        queueid = rx_queue->queue_id;
 997
 998                        nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
 999                                        MAX_PKT_BURST);
1000
1001                        stats[lcore_id].nb_rx_processed += nb_rx;
1002                        if (unlikely(nb_rx == 0)) {
1003                                /**
1004                                 * no packet received from rx queue, try to
1005                                 * sleep for a while forcing CPU enter deeper
1006                                 * C states.
1007                                 */
1008                                rx_queue->zero_rx_packet_count++;
1009
1010                                if (rx_queue->zero_rx_packet_count <=
1011                                                MIN_ZERO_POLL_COUNT)
1012                                        continue;
1013
1014                                rx_queue->idle_hint = power_idle_heuristic(
1015                                                rx_queue->zero_rx_packet_count);
1016                                lcore_rx_idle_count++;
1017                        } else {
1018                                rx_queue->zero_rx_packet_count = 0;
1019                        }
1020
1021                        /* Prefetch first packets */
1022                        for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
1023                                rte_prefetch0(rte_pktmbuf_mtod(
1024                                                pkts_burst[j], void *));
1025                        }
1026
1027                        /* Prefetch and forward already prefetched packets */
1028                        for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1029                                rte_prefetch0(rte_pktmbuf_mtod(
1030                                                pkts_burst[j + PREFETCH_OFFSET],
1031                                                void *));
1032                                l3fwd_simple_forward(
1033                                                pkts_burst[j], portid, qconf);
1034                        }
1035
1036                        /* Forward remaining prefetched packets */
1037                        for (; j < nb_rx; j++) {
1038                                l3fwd_simple_forward(
1039                                                pkts_burst[j], portid, qconf);
1040                        }
1041                }
1042
1043                if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) {
1044                        /**
1045                         * All Rx queues empty in recent consecutive polls,
1046                         * sleep in a conservative manner, meaning sleep as
1047                         * less as possible.
1048                         */
1049                        for (i = 1,
1050                            lcore_idle_hint = qconf->rx_queue_list[0].idle_hint;
1051                                        i < qconf->n_rx_queue; ++i) {
1052                                rx_queue = &(qconf->rx_queue_list[i]);
1053                                if (rx_queue->idle_hint < lcore_idle_hint)
1054                                        lcore_idle_hint = rx_queue->idle_hint;
1055                        }
1056
1057                        if (lcore_idle_hint < SUSPEND_THRESHOLD)
1058                                /**
1059                                 * execute "pause" instruction to avoid context
1060                                 * switch which generally take hundred of
1061                                 * microseconds for short sleep.
1062                                 */
1063                                rte_delay_us(lcore_idle_hint);
1064                        else {
1065                                /* suspend until rx interrupt triggers */
1066                                if (intr_en) {
1067                                        turn_on_off_intr(qconf, 1);
1068                                        sleep_until_rx_interrupt(
1069                                                        qconf->n_rx_queue,
1070                                                        lcore_id);
1071                                        turn_on_off_intr(qconf, 0);
1072                                        /**
1073                                         * start receiving packets immediately
1074                                         */
1075                                        if (likely(!is_done()))
1076                                                goto start_rx;
1077                                }
1078                        }
1079                        stats[lcore_id].sleep_time += lcore_idle_hint;
1080                }
1081        }
1082
1083        return 0;
1084}
1085/* >8 End of main processing loop. */
1086
1087/* main processing loop */
1088static int
1089main_telemetry_loop(__rte_unused void *dummy)
1090{
1091        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1092        unsigned int lcore_id;
1093        uint64_t prev_tsc, diff_tsc, cur_tsc, prev_tel_tsc;
1094        int i, j, nb_rx;
1095        uint8_t queueid;
1096        uint16_t portid;
1097        struct lcore_conf *qconf;
1098        struct lcore_rx_queue *rx_queue;
1099        uint64_t ep_nep[2] = {0}, fp_nfp[2] = {0};
1100        uint64_t poll_count;
1101        enum busy_rate br;
1102
1103        const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
1104                                        US_PER_S * BURST_TX_DRAIN_US;
1105
1106        poll_count = 0;
1107        prev_tsc = 0;
1108        prev_tel_tsc = 0;
1109
1110        lcore_id = rte_lcore_id();
1111        qconf = &lcore_conf[lcore_id];
1112
1113        if (qconf->n_rx_queue == 0) {
1114                RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n",
1115                        lcore_id);
1116                return 0;
1117        }
1118
1119        RTE_LOG(INFO, L3FWD_POWER, "entering main telemetry loop on lcore %u\n",
1120                lcore_id);
1121
1122        for (i = 0; i < qconf->n_rx_queue; i++) {
1123                portid = qconf->rx_queue_list[i].port_id;
1124                queueid = qconf->rx_queue_list[i].queue_id;
1125                RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
1126                        "rxqueueid=%hhu\n", lcore_id, portid, queueid);
1127        }
1128
1129        while (!is_done()) {
1130
1131                cur_tsc = rte_rdtsc();
1132                /*
1133                 * TX burst queue drain
1134                 */
1135                diff_tsc = cur_tsc - prev_tsc;
1136                if (unlikely(diff_tsc > drain_tsc)) {
1137                        for (i = 0; i < qconf->n_tx_port; ++i) {
1138                                portid = qconf->tx_port_id[i];
1139                                rte_eth_tx_buffer_flush(portid,
1140                                                qconf->tx_queue_id[portid],
1141                                                qconf->tx_buffer[portid]);
1142                        }
1143                        prev_tsc = cur_tsc;
1144                }
1145
1146                /*
1147                 * Read packet from RX queues
1148                 */
1149                for (i = 0; i < qconf->n_rx_queue; ++i) {
1150                        rx_queue = &(qconf->rx_queue_list[i]);
1151                        portid = rx_queue->port_id;
1152                        queueid = rx_queue->queue_id;
1153
1154                        nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
1155                                                                MAX_PKT_BURST);
1156                        ep_nep[nb_rx == 0]++;
1157                        fp_nfp[nb_rx == MAX_PKT_BURST]++;
1158                        poll_count++;
1159                        if (unlikely(nb_rx == 0))
1160                                continue;
1161
1162                        /* Prefetch first packets */
1163                        for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
1164                                rte_prefetch0(rte_pktmbuf_mtod(
1165                                                pkts_burst[j], void *));
1166                        }
1167
1168                        /* Prefetch and forward already prefetched packets */
1169                        for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1170                                rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1171                                                j + PREFETCH_OFFSET], void *));
1172                                l3fwd_simple_forward(pkts_burst[j], portid,
1173                                                                qconf);
1174                        }
1175
1176                        /* Forward remaining prefetched packets */
1177                        for (; j < nb_rx; j++) {
1178                                l3fwd_simple_forward(pkts_burst[j], portid,
1179                                                                qconf);
1180                        }
1181                }
1182                if (unlikely(poll_count >= DEFAULT_COUNT)) {
1183                        diff_tsc = cur_tsc - prev_tel_tsc;
1184                        if (diff_tsc >= MAX_CYCLES) {
1185                                br = FULL;
1186                        } else if (diff_tsc > MIN_CYCLES &&
1187                                        diff_tsc < MAX_CYCLES) {
1188                                br = (diff_tsc * 100) / MAX_CYCLES;
1189                        } else {
1190                                br = ZERO;
1191                        }
1192                        poll_count = 0;
1193                        prev_tel_tsc = cur_tsc;
1194                        /* update stats for telemetry */
1195                        rte_spinlock_lock(&stats[lcore_id].telemetry_lock);
1196                        stats[lcore_id].ep_nep[0] = ep_nep[0];
1197                        stats[lcore_id].ep_nep[1] = ep_nep[1];
1198                        stats[lcore_id].fp_nfp[0] = fp_nfp[0];
1199                        stats[lcore_id].fp_nfp[1] = fp_nfp[1];
1200                        stats[lcore_id].br = br;
1201                        rte_spinlock_unlock(&stats[lcore_id].telemetry_lock);
1202                }
1203        }
1204
1205        return 0;
1206}
1207/* main processing loop */
1208static int
1209main_empty_poll_loop(__rte_unused void *dummy)
1210{
1211        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1212        unsigned int lcore_id;
1213        uint64_t prev_tsc, diff_tsc, cur_tsc;
1214        int i, j, nb_rx;
1215        uint8_t queueid;
1216        uint16_t portid;
1217        struct lcore_conf *qconf;
1218        struct lcore_rx_queue *rx_queue;
1219
1220        const uint64_t drain_tsc =
1221                (rte_get_tsc_hz() + US_PER_S - 1) /
1222                US_PER_S * BURST_TX_DRAIN_US;
1223
1224        prev_tsc = 0;
1225
1226        lcore_id = rte_lcore_id();
1227        qconf = &lcore_conf[lcore_id];
1228
1229        if (qconf->n_rx_queue == 0) {
1230                RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n",
1231                        lcore_id);
1232                return 0;
1233        }
1234
1235        for (i = 0; i < qconf->n_rx_queue; i++) {
1236                portid = qconf->rx_queue_list[i].port_id;
1237                queueid = qconf->rx_queue_list[i].queue_id;
1238                RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
1239                                "rxqueueid=%hhu\n", lcore_id, portid, queueid);
1240        }
1241
1242        while (!is_done()) {
1243                stats[lcore_id].nb_iteration_looped++;
1244
1245                cur_tsc = rte_rdtsc();
1246                /*
1247                 * TX burst queue drain
1248                 */
1249                diff_tsc = cur_tsc - prev_tsc;
1250                if (unlikely(diff_tsc > drain_tsc)) {
1251                        for (i = 0; i < qconf->n_tx_port; ++i) {
1252                                portid = qconf->tx_port_id[i];
1253                                rte_eth_tx_buffer_flush(portid,
1254                                                qconf->tx_queue_id[portid],
1255                                                qconf->tx_buffer[portid]);
1256                        }
1257                        prev_tsc = cur_tsc;
1258                }
1259
1260                /*
1261                 * Read packet from RX queues
1262                 */
1263                for (i = 0; i < qconf->n_rx_queue; ++i) {
1264                        rx_queue = &(qconf->rx_queue_list[i]);
1265                        rx_queue->idle_hint = 0;
1266                        portid = rx_queue->port_id;
1267                        queueid = rx_queue->queue_id;
1268
1269                        nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
1270                                        MAX_PKT_BURST);
1271
1272                        stats[lcore_id].nb_rx_processed += nb_rx;
1273
1274                        if (nb_rx == 0) {
1275
1276                                rte_power_empty_poll_stat_update(lcore_id);
1277
1278                                continue;
1279                        } else {
1280                                rte_power_poll_stat_update(lcore_id, nb_rx);
1281                        }
1282
1283
1284                        /* Prefetch first packets */
1285                        for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
1286                                rte_prefetch0(rte_pktmbuf_mtod(
1287                                                        pkts_burst[j], void *));
1288                        }
1289
1290                        /* Prefetch and forward already prefetched packets */
1291                        for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1292                                rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1293                                                        j + PREFETCH_OFFSET],
1294                                                        void *));
1295                                l3fwd_simple_forward(pkts_burst[j], portid,
1296                                                qconf);
1297                        }
1298
1299                        /* Forward remaining prefetched packets */
1300                        for (; j < nb_rx; j++) {
1301                                l3fwd_simple_forward(pkts_burst[j], portid,
1302                                                qconf);
1303                        }
1304
1305                }
1306
1307        }
1308
1309        return 0;
1310}
1311/* main processing loop */
1312static int
1313main_legacy_loop(__rte_unused void *dummy)
1314{
1315        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1316        unsigned lcore_id;
1317        uint64_t prev_tsc, diff_tsc, cur_tsc, tim_res_tsc, hz;
1318        uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power;
1319        int i, j, nb_rx;
1320        uint8_t queueid;
1321        uint16_t portid;
1322        struct lcore_conf *qconf;
1323        struct lcore_rx_queue *rx_queue;
1324        enum freq_scale_hint_t lcore_scaleup_hint;
1325        uint32_t lcore_rx_idle_count = 0;
1326        uint32_t lcore_idle_hint = 0;
1327        int intr_en = 0;
1328
1329        const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
1330
1331        prev_tsc = 0;
1332        hz = rte_get_timer_hz();
1333        tim_res_tsc = hz/TIMER_NUMBER_PER_SECOND;
1334
1335        lcore_id = rte_lcore_id();
1336        qconf = &lcore_conf[lcore_id];
1337
1338        if (qconf->n_rx_queue == 0) {
1339                RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
1340                return 0;
1341        }
1342
1343        RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id);
1344
1345        for (i = 0; i < qconf->n_rx_queue; i++) {
1346                portid = qconf->rx_queue_list[i].port_id;
1347                queueid = qconf->rx_queue_list[i].queue_id;
1348                RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
1349                        "rxqueueid=%hhu\n", lcore_id, portid, queueid);
1350        }
1351
1352        /* add into event wait list */
1353        if (event_register(qconf) == 0)
1354                intr_en = 1;
1355        else
1356                RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
1357
1358        while (!is_done()) {
1359                stats[lcore_id].nb_iteration_looped++;
1360
1361                cur_tsc = rte_rdtsc();
1362                cur_tsc_power = cur_tsc;
1363
1364                /*
1365                 * TX burst queue drain
1366                 */
1367                diff_tsc = cur_tsc - prev_tsc;
1368                if (unlikely(diff_tsc > drain_tsc)) {
1369                        for (i = 0; i < qconf->n_tx_port; ++i) {
1370                                portid = qconf->tx_port_id[i];
1371                                rte_eth_tx_buffer_flush(portid,
1372                                                qconf->tx_queue_id[portid],
1373                                                qconf->tx_buffer[portid]);
1374                        }
1375                        prev_tsc = cur_tsc;
1376                }
1377
1378                diff_tsc_power = cur_tsc_power - prev_tsc_power;
1379                if (diff_tsc_power > tim_res_tsc) {
1380                        rte_timer_manage();
1381                        prev_tsc_power = cur_tsc_power;
1382                }
1383
1384start_rx:
1385                /*
1386                 * Read packet from RX queues
1387                 */
1388                lcore_scaleup_hint = FREQ_CURRENT;
1389                lcore_rx_idle_count = 0;
1390                for (i = 0; i < qconf->n_rx_queue; ++i) {
1391                        rx_queue = &(qconf->rx_queue_list[i]);
1392                        rx_queue->idle_hint = 0;
1393                        portid = rx_queue->port_id;
1394                        queueid = rx_queue->queue_id;
1395
1396                        nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
1397                                                                MAX_PKT_BURST);
1398
1399                        stats[lcore_id].nb_rx_processed += nb_rx;
1400                        if (unlikely(nb_rx == 0)) {
1401                                /**
1402                                 * no packet received from rx queue, try to
1403                                 * sleep for a while forcing CPU enter deeper
1404                                 * C states.
1405                                 */
1406                                rx_queue->zero_rx_packet_count++;
1407
1408                                if (rx_queue->zero_rx_packet_count <=
1409                                                        MIN_ZERO_POLL_COUNT)
1410                                        continue;
1411
1412                                rx_queue->idle_hint = power_idle_heuristic(\
1413                                        rx_queue->zero_rx_packet_count);
1414                                lcore_rx_idle_count++;
1415                        } else {
1416                                rx_queue->zero_rx_packet_count = 0;
1417
1418                                /**
1419                                 * do not scale up frequency immediately as
1420                                 * user to kernel space communication is costly
1421                                 * which might impact packet I/O for received
1422                                 * packets.
1423                                 */
1424                                rx_queue->freq_up_hint =
1425                                        power_freq_scaleup_heuristic(lcore_id,
1426                                                        portid, queueid);
1427                        }
1428
1429                        /* Prefetch first packets */
1430                        for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
1431                                rte_prefetch0(rte_pktmbuf_mtod(
1432                                                pkts_burst[j], void *));
1433                        }
1434
1435                        /* Prefetch and forward already prefetched packets */
1436                        for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1437                                rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1438                                                j + PREFETCH_OFFSET], void *));
1439                                l3fwd_simple_forward(pkts_burst[j], portid,
1440                                                                qconf);
1441                        }
1442
1443                        /* Forward remaining prefetched packets */
1444                        for (; j < nb_rx; j++) {
1445                                l3fwd_simple_forward(pkts_burst[j], portid,
1446                                                                qconf);
1447                        }
1448                }
1449
1450                if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) {
1451                        for (i = 1, lcore_scaleup_hint =
1452                                qconf->rx_queue_list[0].freq_up_hint;
1453                                        i < qconf->n_rx_queue; ++i) {
1454                                rx_queue = &(qconf->rx_queue_list[i]);
1455                                if (rx_queue->freq_up_hint >
1456                                                lcore_scaleup_hint)
1457                                        lcore_scaleup_hint =
1458                                                rx_queue->freq_up_hint;
1459                        }
1460
1461                        if (lcore_scaleup_hint == FREQ_HIGHEST) {
1462                                if (rte_power_freq_max)
1463                                        rte_power_freq_max(lcore_id);
1464                        } else if (lcore_scaleup_hint == FREQ_HIGHER) {
1465                                if (rte_power_freq_up)
1466                                        rte_power_freq_up(lcore_id);
1467                        }
1468                } else {
1469                        /**
1470                         * All Rx queues empty in recent consecutive polls,
1471                         * sleep in a conservative manner, meaning sleep as
1472                         * less as possible.
1473                         */
1474                        for (i = 1, lcore_idle_hint =
1475                                qconf->rx_queue_list[0].idle_hint;
1476                                        i < qconf->n_rx_queue; ++i) {
1477                                rx_queue = &(qconf->rx_queue_list[i]);
1478                                if (rx_queue->idle_hint < lcore_idle_hint)
1479                                        lcore_idle_hint = rx_queue->idle_hint;
1480                        }
1481
1482                        if (lcore_idle_hint < SUSPEND_THRESHOLD)
1483                                /**
1484                                 * execute "pause" instruction to avoid context
1485                                 * switch which generally take hundred of
1486                                 * microseconds for short sleep.
1487                                 */
1488                                rte_delay_us(lcore_idle_hint);
1489                        else {
1490                                /* suspend until rx interrupt triggers */
1491                                if (intr_en) {
1492                                        turn_on_off_intr(qconf, 1);
1493                                        sleep_until_rx_interrupt(
1494                                                        qconf->n_rx_queue,
1495                                                        lcore_id);
1496                                        turn_on_off_intr(qconf, 0);
1497                                        /**
1498                                         * start receiving packets immediately
1499                                         */
1500                                        if (likely(!is_done()))
1501                                                goto start_rx;
1502                                }
1503                        }
1504                        stats[lcore_id].sleep_time += lcore_idle_hint;
1505                }
1506        }
1507
1508        return 0;
1509}
1510
1511static int
1512check_lcore_params(void)
1513{
1514        uint8_t queue, lcore;
1515        uint16_t i;
1516        int socketid;
1517
1518        for (i = 0; i < nb_lcore_params; ++i) {
1519                queue = lcore_params[i].queue_id;
1520                if (queue >= MAX_RX_QUEUE_PER_PORT) {
1521                        printf("invalid queue number: %hhu\n", queue);
1522                        return -1;
1523                }
1524                lcore = lcore_params[i].lcore_id;
1525                if (!rte_lcore_is_enabled(lcore)) {
1526                        printf("error: lcore %hhu is not enabled in lcore "
1527                                                        "mask\n", lcore);
1528                        return -1;
1529                }
1530                if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
1531                                                        (numa_on == 0)) {
1532                        printf("warning: lcore %hhu is on socket %d with numa "
1533                                                "off\n", lcore, socketid);
1534                }
1535                if (app_mode == APP_MODE_TELEMETRY && lcore == rte_lcore_id()) {
1536                        printf("cannot enable main core %d in config for telemetry mode\n",
1537                                rte_lcore_id());
1538                        return -1;
1539                }
1540        }
1541        return 0;
1542}
1543
1544static int
1545check_port_config(void)
1546{
1547        unsigned portid;
1548        uint16_t i;
1549
1550        for (i = 0; i < nb_lcore_params; ++i) {
1551                portid = lcore_params[i].port_id;
1552                if ((enabled_port_mask & (1 << portid)) == 0) {
1553                        printf("port %u is not enabled in port mask\n",
1554                                                                portid);
1555                        return -1;
1556                }
1557                if (!rte_eth_dev_is_valid_port(portid)) {
1558                        printf("port %u is not present on the board\n",
1559                                                                portid);
1560                        return -1;
1561                }
1562        }
1563        return 0;
1564}
1565
1566static uint8_t
1567get_port_n_rx_queues(const uint16_t port)
1568{
1569        int queue = -1;
1570        uint16_t i;
1571
1572        for (i = 0; i < nb_lcore_params; ++i) {
1573                if (lcore_params[i].port_id == port &&
1574                                lcore_params[i].queue_id > queue)
1575                        queue = lcore_params[i].queue_id;
1576        }
1577        return (uint8_t)(++queue);
1578}
1579
1580static int
1581init_lcore_rx_queues(void)
1582{
1583        uint16_t i, nb_rx_queue;
1584        uint8_t lcore;
1585
1586        for (i = 0; i < nb_lcore_params; ++i) {
1587                lcore = lcore_params[i].lcore_id;
1588                nb_rx_queue = lcore_conf[lcore].n_rx_queue;
1589                if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
1590                        printf("error: too many queues (%u) for lcore: %u\n",
1591                                (unsigned)nb_rx_queue + 1, (unsigned)lcore);
1592                        return -1;
1593                } else {
1594                        lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
1595                                lcore_params[i].port_id;
1596                        lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
1597                                lcore_params[i].queue_id;
1598                        lcore_conf[lcore].n_rx_queue++;
1599                }
1600        }
1601        return 0;
1602}
1603
1604/* display usage */
1605static void
1606print_usage(const char *prgname)
1607{
1608        printf ("%s [EAL options] -- -p PORTMASK -P"
1609                "  [--config (port,queue,lcore)[,(port,queue,lcore]]"
1610                "  [--high-perf-cores CORELIST"
1611                "  [--perf-config (port,queue,hi_perf,lcore_index)[,(port,queue,hi_perf,lcore_index]]"
1612                "  [--max-pkt-len PKTLEN]\n"
1613                "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
1614                "  -P: enable promiscuous mode\n"
1615                "  --config (port,queue,lcore): rx queues configuration\n"
1616                "  --high-perf-cores CORELIST: list of high performance cores\n"
1617                "  --perf-config: similar as config, cores specified as indices"
1618                " for bins containing high or regular performance cores\n"
1619                "  --no-numa: optional, disable numa awareness\n"
1620                "  --max-pkt-len PKTLEN: maximum packet length in decimal (64-9600)\n"
1621                "  --parse-ptype: parse packet type by software\n"
1622                "  --legacy: use legacy interrupt-based scaling\n"
1623                "  --empty-poll: enable empty poll detection"
1624                " follow (training_flag, high_threshold, med_threshold)\n"
1625                " --telemetry: enable telemetry mode, to update"
1626                " empty polls, full polls, and core busyness to telemetry\n"
1627                " --interrupt-only: enable interrupt-only mode\n"
1628                " --pmd-mgmt MODE: enable PMD power management mode. "
1629                "Currently supported modes: baseline, monitor, pause, scale\n",
1630                prgname);
1631}
1632
1633static int parse_max_pkt_len(const char *pktlen)
1634{
1635        char *end = NULL;
1636        unsigned long len;
1637
1638        /* parse decimal string */
1639        len = strtoul(pktlen, &end, 10);
1640        if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
1641                return -1;
1642
1643        if (len == 0)
1644                return -1;
1645
1646        return len;
1647}
1648
1649static int
1650parse_portmask(const char *portmask)
1651{
1652        char *end = NULL;
1653        unsigned long pm;
1654
1655        /* parse hexadecimal string */
1656        pm = strtoul(portmask, &end, 16);
1657        if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
1658                return 0;
1659
1660        return pm;
1661}
1662
1663static int
1664parse_config(const char *q_arg)
1665{
1666        char s[256];
1667        const char *p, *p0 = q_arg;
1668        char *end;
1669        enum fieldnames {
1670                FLD_PORT = 0,
1671                FLD_QUEUE,
1672                FLD_LCORE,
1673                _NUM_FLD
1674        };
1675        unsigned long int_fld[_NUM_FLD];
1676        char *str_fld[_NUM_FLD];
1677        int i;
1678        unsigned size;
1679
1680        nb_lcore_params = 0;
1681
1682        while ((p = strchr(p0,'(')) != NULL) {
1683                ++p;
1684                if((p0 = strchr(p,')')) == NULL)
1685                        return -1;
1686
1687                size = p0 - p;
1688                if(size >= sizeof(s))
1689                        return -1;
1690
1691                snprintf(s, sizeof(s), "%.*s", size, p);
1692                if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
1693                                                                _NUM_FLD)
1694                        return -1;
1695                for (i = 0; i < _NUM_FLD; i++){
1696                        errno = 0;
1697                        int_fld[i] = strtoul(str_fld[i], &end, 0);
1698                        if (errno != 0 || end == str_fld[i] || int_fld[i] >
1699                                                                        255)
1700                                return -1;
1701                }
1702                if (nb_lcore_params >= MAX_LCORE_PARAMS) {
1703                        printf("exceeded max number of lcore params: %hu\n",
1704                                nb_lcore_params);
1705                        return -1;
1706                }
1707                lcore_params_array[nb_lcore_params].port_id =
1708                                (uint8_t)int_fld[FLD_PORT];
1709                lcore_params_array[nb_lcore_params].queue_id =
1710                                (uint8_t)int_fld[FLD_QUEUE];
1711                lcore_params_array[nb_lcore_params].lcore_id =
1712                                (uint8_t)int_fld[FLD_LCORE];
1713                ++nb_lcore_params;
1714        }
1715        lcore_params = lcore_params_array;
1716
1717        return 0;
1718}
1719
1720static int
1721parse_pmd_mgmt_config(const char *name)
1722{
1723#define PMD_MGMT_MONITOR "monitor"
1724#define PMD_MGMT_PAUSE   "pause"
1725#define PMD_MGMT_SCALE   "scale"
1726#define PMD_MGMT_BASELINE  "baseline"
1727
1728        if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) {
1729                pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR;
1730                return 0;
1731        }
1732
1733        if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) {
1734                pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE;
1735                return 0;
1736        }
1737
1738        if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) {
1739                pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE;
1740                return 0;
1741        }
1742        if (strncmp(PMD_MGMT_BASELINE, name, sizeof(PMD_MGMT_BASELINE)) == 0) {
1743                baseline_enabled = true;
1744                return 0;
1745        }
1746        /* unknown PMD power management mode */
1747        return -1;
1748}
1749
1750static int
1751parse_ep_config(const char *q_arg)
1752{
1753        char s[256];
1754        const char *p = q_arg;
1755        char *end;
1756        int  num_arg;
1757
1758        char *str_fld[3];
1759
1760        int training_flag;
1761        int med_edpi;
1762        int hgh_edpi;
1763
1764        ep_med_edpi = EMPTY_POLL_MED_THRESHOLD;
1765        ep_hgh_edpi = EMPTY_POLL_HGH_THRESHOLD;
1766
1767        strlcpy(s, p, sizeof(s));
1768
1769        num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ',');
1770
1771        empty_poll_train = false;
1772
1773        if (num_arg == 0)
1774                return 0;
1775
1776        if (num_arg == 3) {
1777
1778                training_flag = strtoul(str_fld[0], &end, 0);
1779                med_edpi = strtoul(str_fld[1], &end, 0);
1780                hgh_edpi = strtoul(str_fld[2], &end, 0);
1781
1782                if (training_flag == 1)
1783                        empty_poll_train = true;
1784
1785                if (med_edpi > 0)
1786                        ep_med_edpi = med_edpi;
1787
1788                if (hgh_edpi > 0)
1789                        ep_hgh_edpi = hgh_edpi;
1790
1791        } else {
1792
1793                return -1;
1794        }
1795
1796        return 0;
1797
1798}
1799#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
1800#define CMD_LINE_OPT_LEGACY "legacy"
1801#define CMD_LINE_OPT_EMPTY_POLL "empty-poll"
1802#define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only"
1803#define CMD_LINE_OPT_TELEMETRY "telemetry"
1804#define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt"
1805#define CMD_LINE_OPT_MAX_PKT_LEN "max-pkt-len"
1806
1807/* Parse the argument given in the command line of the application */
1808static int
1809parse_args(int argc, char **argv)
1810{
1811        int opt, ret;
1812        char **argvopt;
1813        int option_index;
1814        uint32_t limit;
1815        char *prgname = argv[0];
1816        static struct option lgopts[] = {
1817                {"config", 1, 0, 0},
1818                {"perf-config", 1, 0, 0},
1819                {"high-perf-cores", 1, 0, 0},
1820                {"no-numa", 0, 0, 0},
1821                {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0},
1822                {CMD_LINE_OPT_EMPTY_POLL, 1, 0, 0},
1823                {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
1824                {CMD_LINE_OPT_LEGACY, 0, 0, 0},
1825                {CMD_LINE_OPT_TELEMETRY, 0, 0, 0},
1826                {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0},
1827                {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0},
1828                {NULL, 0, 0, 0}
1829        };
1830
1831        argvopt = argv;
1832
1833        while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P",
1834                                lgopts, &option_index)) != EOF) {
1835
1836                switch (opt) {
1837                /* portmask */
1838                case 'p':
1839                        enabled_port_mask = parse_portmask(optarg);
1840                        if (enabled_port_mask == 0) {
1841                                printf("invalid portmask\n");
1842                                print_usage(prgname);
1843                                return -1;
1844                        }
1845                        break;
1846                case 'P':
1847                        printf("Promiscuous mode selected\n");
1848                        promiscuous_on = 1;
1849                        break;
1850                case 'l':
1851                        limit = parse_max_pkt_len(optarg);
1852                        freq_tlb[LOW] = limit;
1853                        break;
1854                case 'm':
1855                        limit = parse_max_pkt_len(optarg);
1856                        freq_tlb[MED] = limit;
1857                        break;
1858                case 'h':
1859                        limit = parse_max_pkt_len(optarg);
1860                        freq_tlb[HGH] = limit;
1861                        break;
1862                /* long options */
1863                case 0:
1864                        if (!strncmp(lgopts[option_index].name, "config", 6)) {
1865                                ret = parse_config(optarg);
1866                                if (ret) {
1867                                        printf("invalid config\n");
1868                                        print_usage(prgname);
1869                                        return -1;
1870                                }
1871                        }
1872
1873                        if (!strncmp(lgopts[option_index].name,
1874                                        "perf-config", 11)) {
1875                                ret = parse_perf_config(optarg);
1876                                if (ret) {
1877                                        printf("invalid perf-config\n");
1878                                        print_usage(prgname);
1879                                        return -1;
1880                                }
1881                        }
1882
1883                        if (!strncmp(lgopts[option_index].name,
1884                                        "high-perf-cores", 15)) {
1885                                ret = parse_perf_core_list(optarg);
1886                                if (ret) {
1887                                        printf("invalid high-perf-cores\n");
1888                                        print_usage(prgname);
1889                                        return -1;
1890                                }
1891                        }
1892
1893                        if (!strncmp(lgopts[option_index].name,
1894                                                "no-numa", 7)) {
1895                                printf("numa is disabled \n");
1896                                numa_on = 0;
1897                        }
1898
1899                        if (!strncmp(lgopts[option_index].name,
1900                                        CMD_LINE_OPT_LEGACY,
1901                                        sizeof(CMD_LINE_OPT_LEGACY))) {
1902                                if (app_mode != APP_MODE_DEFAULT) {
1903                                        printf(" legacy mode is mutually exclusive with other modes\n");
1904                                        return -1;
1905                                }
1906                                app_mode = APP_MODE_LEGACY;
1907                                printf("legacy mode is enabled\n");
1908                        }
1909
1910                        if (!strncmp(lgopts[option_index].name,
1911                                        CMD_LINE_OPT_EMPTY_POLL, 10)) {
1912                                if (app_mode != APP_MODE_DEFAULT) {
1913                                        printf(" empty-poll mode is mutually exclusive with other modes\n");
1914                                        return -1;
1915                                }
1916                                app_mode = APP_MODE_EMPTY_POLL;
1917                                ret = parse_ep_config(optarg);
1918
1919                                if (ret) {
1920                                        printf("invalid empty poll config\n");
1921                                        print_usage(prgname);
1922                                        return -1;
1923                                }
1924                                printf("empty-poll is enabled\n");
1925                        }
1926
1927                        if (!strncmp(lgopts[option_index].name,
1928                                        CMD_LINE_OPT_TELEMETRY,
1929                                        sizeof(CMD_LINE_OPT_TELEMETRY))) {
1930                                if (app_mode != APP_MODE_DEFAULT) {
1931                                        printf(" telemetry mode is mutually exclusive with other modes\n");
1932                                        return -1;
1933                                }
1934                                app_mode = APP_MODE_TELEMETRY;
1935                                printf("telemetry mode is enabled\n");
1936                        }
1937
1938                        if (!strncmp(lgopts[option_index].name,
1939                                        CMD_LINE_OPT_PMD_MGMT,
1940                                        sizeof(CMD_LINE_OPT_PMD_MGMT))) {
1941                                if (app_mode != APP_MODE_DEFAULT) {
1942                                        printf(" power mgmt mode is mutually exclusive with other modes\n");
1943                                        return -1;
1944                                }
1945                                if (parse_pmd_mgmt_config(optarg) < 0) {
1946                                        printf(" Invalid PMD power management mode: %s\n",
1947                                                        optarg);
1948                                        return -1;
1949                                }
1950                                app_mode = APP_MODE_PMD_MGMT;
1951                                printf("PMD power mgmt mode is enabled\n");
1952                        }
1953                        if (!strncmp(lgopts[option_index].name,
1954                                        CMD_LINE_OPT_INTERRUPT_ONLY,
1955                                        sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) {
1956                                if (app_mode != APP_MODE_DEFAULT) {
1957                                        printf(" interrupt-only mode is mutually exclusive with other modes\n");
1958                                        return -1;
1959                                }
1960                                app_mode = APP_MODE_INTERRUPT;
1961                                printf("interrupt-only mode is enabled\n");
1962                        }
1963
1964                        if (!strncmp(lgopts[option_index].name,
1965                                        CMD_LINE_OPT_MAX_PKT_LEN,
1966                                        sizeof(CMD_LINE_OPT_MAX_PKT_LEN))) {
1967                                printf("Custom frame size is configured\n");
1968                                max_pkt_len = parse_max_pkt_len(optarg);
1969                        }
1970
1971                        if (!strncmp(lgopts[option_index].name,
1972                                     CMD_LINE_OPT_PARSE_PTYPE,
1973                                     sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
1974                                printf("soft parse-ptype is enabled\n");
1975                                parse_ptype = 1;
1976                        }
1977
1978                        break;
1979
1980                default:
1981                        print_usage(prgname);
1982                        return -1;
1983                }
1984        }
1985
1986        if (optind >= 0)
1987                argv[optind-1] = prgname;
1988
1989        ret = optind-1;
1990        optind = 1; /* reset getopt lib */
1991        return ret;
1992}
1993
1994static void
1995print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr)
1996{
1997        char buf[RTE_ETHER_ADDR_FMT_SIZE];
1998        rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr);
1999        printf("%s%s", name, buf);
2000}
2001
2002#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2003static void
2004setup_hash(int socketid)
2005{
2006        struct rte_hash_parameters ipv4_l3fwd_hash_params = {
2007                .name = NULL,
2008                .entries = L3FWD_HASH_ENTRIES,
2009                .key_len = sizeof(struct ipv4_5tuple),
2010                .hash_func = DEFAULT_HASH_FUNC,
2011                .hash_func_init_val = 0,
2012        };
2013
2014        struct rte_hash_parameters ipv6_l3fwd_hash_params = {
2015                .name = NULL,
2016                .entries = L3FWD_HASH_ENTRIES,
2017                .key_len = sizeof(struct ipv6_5tuple),
2018                .hash_func = DEFAULT_HASH_FUNC,
2019                .hash_func_init_val = 0,
2020        };
2021
2022        unsigned i;
2023        int ret;
2024        char s[64];
2025
2026        /* create ipv4 hash */
2027        snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
2028        ipv4_l3fwd_hash_params.name = s;
2029        ipv4_l3fwd_hash_params.socket_id = socketid;
2030        ipv4_l3fwd_lookup_struct[socketid] =
2031                rte_hash_create(&ipv4_l3fwd_hash_params);
2032        if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
2033                rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
2034                                "socket %d\n", socketid);
2035
2036        /* create ipv6 hash */
2037        snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
2038        ipv6_l3fwd_hash_params.name = s;
2039        ipv6_l3fwd_hash_params.socket_id = socketid;
2040        ipv6_l3fwd_lookup_struct[socketid] =
2041                rte_hash_create(&ipv6_l3fwd_hash_params);
2042        if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
2043                rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
2044                                "socket %d\n", socketid);
2045
2046
2047        /* populate the ipv4 hash */
2048        for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) {
2049                ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid],
2050                                (void *) &ipv4_l3fwd_route_array[i].key);
2051                if (ret < 0) {
2052                        rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
2053                                "l3fwd hash on socket %d\n", i, socketid);
2054                }
2055                ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out;
2056                printf("Hash: Adding key\n");
2057                print_ipv4_key(ipv4_l3fwd_route_array[i].key);
2058        }
2059
2060        /* populate the ipv6 hash */
2061        for (i = 0; i < RTE_DIM(ipv6_l3fwd_route_array); i++) {
2062                ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid],
2063                                (void *) &ipv6_l3fwd_route_array[i].key);
2064                if (ret < 0) {
2065                        rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
2066                                "l3fwd hash on socket %d\n", i, socketid);
2067                }
2068                ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out;
2069                printf("Hash: Adding key\n");
2070                print_ipv6_key(ipv6_l3fwd_route_array[i].key);
2071        }
2072}
2073#endif
2074
2075#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
2076static void
2077setup_lpm(int socketid)
2078{
2079        unsigned i;
2080        int ret;
2081        char s[64];
2082
2083        /* create the LPM table */
2084        struct rte_lpm_config lpm_ipv4_config;
2085
2086        lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
2087        lpm_ipv4_config.number_tbl8s = 256;
2088        lpm_ipv4_config.flags = 0;
2089
2090        snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
2091        ipv4_l3fwd_lookup_struct[socketid] =
2092                        rte_lpm_create(s, socketid, &lpm_ipv4_config);
2093        if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
2094                rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
2095                                " on socket %d\n", socketid);
2096
2097        /* populate the LPM table */
2098        for (i = 0; i < RTE_DIM(ipv4_l3fwd_route_array); i++) {
2099                ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
2100                        ipv4_l3fwd_route_array[i].ip,
2101                        ipv4_l3fwd_route_array[i].depth,
2102                        ipv4_l3fwd_route_array[i].if_out);
2103
2104                if (ret < 0) {
2105                        rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
2106                                "l3fwd LPM table on socket %d\n",
2107                                i, socketid);
2108                }
2109
2110                printf("LPM: Adding route 0x%08x / %d (%d)\n",
2111                        (unsigned)ipv4_l3fwd_route_array[i].ip,
2112                        ipv4_l3fwd_route_array[i].depth,
2113                        ipv4_l3fwd_route_array[i].if_out);
2114        }
2115}
2116#endif
2117
2118static int
2119init_mem(unsigned nb_mbuf)
2120{
2121        struct lcore_conf *qconf;
2122        int socketid;
2123        unsigned lcore_id;
2124        char s[64];
2125
2126        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
2127                if (rte_lcore_is_enabled(lcore_id) == 0)
2128                        continue;
2129
2130                if (numa_on)
2131                        socketid = rte_lcore_to_socket_id(lcore_id);
2132                else
2133                        socketid = 0;
2134
2135                if (socketid >= NB_SOCKETS) {
2136                        rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is "
2137                                        "out of range %d\n", socketid,
2138                                                lcore_id, NB_SOCKETS);
2139                }
2140                if (pktmbuf_pool[socketid] == NULL) {
2141                        snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
2142                        pktmbuf_pool[socketid] =
2143                                rte_pktmbuf_pool_create(s, nb_mbuf,
2144                                        MEMPOOL_CACHE_SIZE, 0,
2145                                        RTE_MBUF_DEFAULT_BUF_SIZE,
2146                                        socketid);
2147                        if (pktmbuf_pool[socketid] == NULL)
2148                                rte_exit(EXIT_FAILURE,
2149                                        "Cannot init mbuf pool on socket %d\n",
2150                                                                socketid);
2151                        else
2152                                printf("Allocated mbuf pool on socket %d\n",
2153                                                                socketid);
2154
2155#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
2156                        setup_lpm(socketid);
2157#else
2158                        setup_hash(socketid);
2159#endif
2160                }
2161                qconf = &lcore_conf[lcore_id];
2162                qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
2163#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2164                qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
2165#endif
2166        }
2167        return 0;
2168}
2169
2170/* Check the link status of all ports in up to 9s, and print them finally */
2171static void
2172check_all_ports_link_status(uint32_t port_mask)
2173{
2174#define CHECK_INTERVAL 100 /* 100ms */
2175#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
2176        uint8_t count, all_ports_up, print_flag = 0;
2177        uint16_t portid;
2178        struct rte_eth_link link;
2179        int ret;
2180        char link_status_text[RTE_ETH_LINK_MAX_STR_LEN];
2181
2182        printf("\nChecking link status");
2183        fflush(stdout);
2184        for (count = 0; count <= MAX_CHECK_TIME; count++) {
2185                all_ports_up = 1;
2186                RTE_ETH_FOREACH_DEV(portid) {
2187                        if ((port_mask & (1 << portid)) == 0)
2188                                continue;
2189                        memset(&link, 0, sizeof(link));
2190                        ret = rte_eth_link_get_nowait(portid, &link);
2191                        if (ret < 0) {
2192                                all_ports_up = 0;
2193                                if (print_flag == 1)
2194                                        printf("Port %u link get failed: %s\n",
2195                                                portid, rte_strerror(-ret));
2196                                continue;
2197                        }
2198                        /* print link status if flag set */
2199                        if (print_flag == 1) {
2200                                rte_eth_link_to_str(link_status_text,
2201                                        sizeof(link_status_text), &link);
2202                                printf("Port %d %s\n", portid,
2203                                       link_status_text);
2204                                continue;
2205                        }
2206                        /* clear all_ports_up flag if any link down */
2207                        if (link.link_status == RTE_ETH_LINK_DOWN) {
2208                                all_ports_up = 0;
2209                                break;
2210                        }
2211                }
2212                /* after finally printing all link status, get out */
2213                if (print_flag == 1)
2214                        break;
2215
2216                if (all_ports_up == 0) {
2217                        printf(".");
2218                        fflush(stdout);
2219                        rte_delay_ms(CHECK_INTERVAL);
2220                }
2221
2222                /* set the print_flag if all ports up or timeout */
2223                if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
2224                        print_flag = 1;
2225                        printf("done\n");
2226                }
2227        }
2228}
2229
2230static int check_ptype(uint16_t portid)
2231{
2232        int i, ret;
2233        int ptype_l3_ipv4 = 0;
2234#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2235        int ptype_l3_ipv6 = 0;
2236#endif
2237        uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
2238
2239        ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
2240        if (ret <= 0)
2241                return 0;
2242
2243        uint32_t ptypes[ret];
2244
2245        ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
2246        for (i = 0; i < ret; ++i) {
2247                if (ptypes[i] & RTE_PTYPE_L3_IPV4)
2248                        ptype_l3_ipv4 = 1;
2249#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2250                if (ptypes[i] & RTE_PTYPE_L3_IPV6)
2251                        ptype_l3_ipv6 = 1;
2252#endif
2253        }
2254
2255        if (ptype_l3_ipv4 == 0)
2256                printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
2257
2258#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2259        if (ptype_l3_ipv6 == 0)
2260                printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
2261#endif
2262
2263#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
2264        if (ptype_l3_ipv4)
2265#else /* APP_LOOKUP_EXACT_MATCH */
2266        if (ptype_l3_ipv4 && ptype_l3_ipv6)
2267#endif
2268                return 1;
2269
2270        return 0;
2271
2272}
2273
2274static int
2275init_power_library(void)
2276{
2277        enum power_management_env env;
2278        unsigned int lcore_id;
2279        int ret = 0;
2280
2281        RTE_LCORE_FOREACH(lcore_id) {
2282                /* init power management library */
2283                ret = rte_power_init(lcore_id);
2284                if (ret) {
2285                        RTE_LOG(ERR, POWER,
2286                                "Library initialization failed on core %u\n",
2287                                lcore_id);
2288                        return ret;
2289                }
2290                /* we're not supporting the VM channel mode */
2291                env = rte_power_get_env();
2292                if (env != PM_ENV_ACPI_CPUFREQ &&
2293                                env != PM_ENV_PSTATE_CPUFREQ) {
2294                        RTE_LOG(ERR, POWER,
2295                                "Only ACPI and PSTATE mode are supported\n");
2296                        return -1;
2297                }
2298        }
2299        return ret;
2300}
2301
2302static int
2303deinit_power_library(void)
2304{
2305        unsigned int lcore_id;
2306        int ret = 0;
2307
2308        RTE_LCORE_FOREACH(lcore_id) {
2309                /* deinit power management library */
2310                ret = rte_power_exit(lcore_id);
2311                if (ret) {
2312                        RTE_LOG(ERR, POWER,
2313                                "Library deinitialization failed on core %u\n",
2314                                lcore_id);
2315                        return ret;
2316                }
2317        }
2318        return ret;
2319}
2320
2321static void
2322get_current_stat_values(uint64_t *values)
2323{
2324        unsigned int lcore_id = rte_lcore_id();
2325        struct lcore_conf *qconf;
2326        uint64_t app_eps = 0, app_fps = 0, app_br = 0;
2327        uint64_t count = 0;
2328
2329        RTE_LCORE_FOREACH_WORKER(lcore_id) {
2330                qconf = &lcore_conf[lcore_id];
2331                if (qconf->n_rx_queue == 0)
2332                        continue;
2333                count++;
2334                rte_spinlock_lock(&stats[lcore_id].telemetry_lock);
2335                app_eps += stats[lcore_id].ep_nep[1];
2336                app_fps += stats[lcore_id].fp_nfp[1];
2337                app_br += stats[lcore_id].br;
2338                rte_spinlock_unlock(&stats[lcore_id].telemetry_lock);
2339        }
2340
2341        if (count > 0) {
2342                values[0] = app_eps/count;
2343                values[1] = app_fps/count;
2344                values[2] = app_br/count;
2345        } else
2346                memset(values, 0, sizeof(uint64_t) * NUM_TELSTATS);
2347
2348}
2349
2350static void
2351update_telemetry(__rte_unused struct rte_timer *tim,
2352                __rte_unused void *arg)
2353{
2354        int ret;
2355        uint64_t values[NUM_TELSTATS] = {0};
2356
2357        get_current_stat_values(values);
2358        ret = rte_metrics_update_values(RTE_METRICS_GLOBAL, telstats_index,
2359                                        values, RTE_DIM(values));
2360        if (ret < 0)
2361                RTE_LOG(WARNING, POWER, "failed to update metrcis\n");
2362}
2363
2364static int
2365handle_app_stats(const char *cmd __rte_unused,
2366                const char *params __rte_unused,
2367                struct rte_tel_data *d)
2368{
2369        uint64_t values[NUM_TELSTATS] = {0};
2370        uint32_t i;
2371
2372        rte_tel_data_start_dict(d);
2373        get_current_stat_values(values);
2374        for (i = 0; i < NUM_TELSTATS; i++)
2375                rte_tel_data_add_dict_u64(d, telstats_strings[i].name,
2376                                values[i]);
2377        return 0;
2378}
2379
2380static void
2381telemetry_setup_timer(void)
2382{
2383        int lcore_id = rte_lcore_id();
2384        uint64_t hz = rte_get_timer_hz();
2385        uint64_t ticks;
2386
2387        ticks = hz / TELEMETRY_INTERVALS_PER_SEC;
2388        rte_timer_reset_sync(&telemetry_timer,
2389                        ticks,
2390                        PERIODICAL,
2391                        lcore_id,
2392                        update_telemetry,
2393                        NULL);
2394}
2395static void
2396empty_poll_setup_timer(void)
2397{
2398        int lcore_id = rte_lcore_id();
2399        uint64_t hz = rte_get_timer_hz();
2400
2401        struct  ep_params *ep_ptr = ep_params;
2402
2403        ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND;
2404
2405        rte_timer_reset_sync(&ep_ptr->timer0,
2406                        ep_ptr->interval_ticks,
2407                        PERIODICAL,
2408                        lcore_id,
2409                        rte_empty_poll_detection,
2410                        (void *)ep_ptr);
2411
2412}
2413static int
2414launch_timer(unsigned int lcore_id)
2415{
2416        int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms;
2417
2418        RTE_SET_USED(lcore_id);
2419
2420
2421        if (rte_get_main_lcore() != lcore_id) {
2422                rte_panic("timer on lcore:%d which is not main core:%d\n",
2423                                lcore_id,
2424                                rte_get_main_lcore());
2425        }
2426
2427        RTE_LOG(INFO, POWER, "Bring up the Timer\n");
2428
2429        if (app_mode == APP_MODE_EMPTY_POLL)
2430                empty_poll_setup_timer();
2431        else
2432                telemetry_setup_timer();
2433
2434        cycles_10ms = rte_get_timer_hz() / 100;
2435
2436        while (!is_done()) {
2437                cur_tsc = rte_rdtsc();
2438                diff_tsc = cur_tsc - prev_tsc;
2439                if (diff_tsc > cycles_10ms) {
2440                        rte_timer_manage();
2441                        prev_tsc = cur_tsc;
2442                        cycles_10ms = rte_get_timer_hz() / 100;
2443                }
2444        }
2445
2446        RTE_LOG(INFO, POWER, "Timer_subsystem is done\n");
2447
2448        return 0;
2449}
2450
2451static int
2452autodetect_mode(void)
2453{
2454        RTE_LOG(NOTICE, L3FWD_POWER, "Operating mode not specified, probing frequency scaling support...\n");
2455
2456        /*
2457         * Empty poll and telemetry modes have to be specifically requested to
2458         * be enabled, but we can auto-detect between interrupt mode with or
2459         * without frequency scaling. Both ACPI and pstate can be used.
2460         */
2461        if (rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ))
2462                return APP_MODE_LEGACY;
2463        if (rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ))
2464                return APP_MODE_LEGACY;
2465
2466        RTE_LOG(NOTICE, L3FWD_POWER, "Frequency scaling not supported, selecting interrupt-only mode\n");
2467
2468        return APP_MODE_INTERRUPT;
2469}
2470
2471static const char *
2472mode_to_str(enum appmode mode)
2473{
2474        switch (mode) {
2475        case APP_MODE_LEGACY:
2476                return "legacy";
2477        case APP_MODE_EMPTY_POLL:
2478                return "empty poll";
2479        case APP_MODE_TELEMETRY:
2480                return "telemetry";
2481        case APP_MODE_INTERRUPT:
2482                return "interrupt-only";
2483        case APP_MODE_PMD_MGMT:
2484                return "pmd mgmt";
2485        default:
2486                return "invalid";
2487        }
2488}
2489
2490static uint32_t
2491eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
2492{
2493        uint32_t overhead_len;
2494
2495        if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
2496                overhead_len = max_rx_pktlen - max_mtu;
2497        else
2498                overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
2499
2500        return overhead_len;
2501}
2502
2503static int
2504config_port_max_pkt_len(struct rte_eth_conf *conf,
2505                struct rte_eth_dev_info *dev_info)
2506{
2507        uint32_t overhead_len;
2508
2509        if (max_pkt_len == 0)
2510                return 0;
2511
2512        if (max_pkt_len < RTE_ETHER_MIN_LEN || max_pkt_len > MAX_JUMBO_PKT_LEN)
2513                return -1;
2514
2515        overhead_len = eth_dev_get_overhead_len(dev_info->max_rx_pktlen,
2516                        dev_info->max_mtu);
2517        conf->rxmode.mtu = max_pkt_len - overhead_len;
2518
2519        if (conf->rxmode.mtu > RTE_ETHER_MTU)
2520                conf->txmode.offloads |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2521
2522        return 0;
2523}
2524
2525/* Power library initialized in the main routine. 8< */
2526int
2527main(int argc, char **argv)
2528{
2529        struct lcore_conf *qconf;
2530        struct rte_eth_dev_info dev_info;
2531        struct rte_eth_txconf *txconf;
2532        int ret;
2533        uint16_t nb_ports;
2534        uint16_t queueid;
2535        unsigned lcore_id;
2536        uint64_t hz;
2537        uint32_t n_tx_queue, nb_lcores;
2538        uint32_t dev_rxq_num, dev_txq_num;
2539        uint8_t nb_rx_queue, queue, socketid;
2540        uint16_t portid;
2541        const char *ptr_strings[NUM_TELSTATS];
2542
2543        /* init EAL */
2544        ret = rte_eal_init(argc, argv);
2545        if (ret < 0)
2546                rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
2547        argc -= ret;
2548        argv += ret;
2549
2550        /* catch SIGINT and restore cpufreq governor to ondemand */
2551        signal(SIGINT, signal_exit_now);
2552
2553        /* init RTE timer library to be used late */
2554        rte_timer_subsystem_init();
2555
2556        /* if we're running pmd-mgmt mode, don't default to baseline mode */
2557        baseline_enabled = false;
2558
2559        /* parse application arguments (after the EAL ones) */
2560        ret = parse_args(argc, argv);
2561        if (ret < 0)
2562                rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
2563
2564        if (app_mode == APP_MODE_DEFAULT)
2565                app_mode = autodetect_mode();
2566
2567        RTE_LOG(INFO, L3FWD_POWER, "Selected operation mode: %s\n",
2568                        mode_to_str(app_mode));
2569
2570        /* only legacy and empty poll mode rely on power library */
2571        if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) &&
2572                        init_power_library())
2573                rte_exit(EXIT_FAILURE, "init_power_library failed\n");
2574
2575        if (update_lcore_params() < 0)
2576                rte_exit(EXIT_FAILURE, "update_lcore_params failed\n");
2577
2578        if (check_lcore_params() < 0)
2579                rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
2580
2581        ret = init_lcore_rx_queues();
2582        if (ret < 0)
2583                rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
2584
2585        nb_ports = rte_eth_dev_count_avail();
2586
2587        if (check_port_config() < 0)
2588                rte_exit(EXIT_FAILURE, "check_port_config failed\n");
2589
2590        nb_lcores = rte_lcore_count();
2591
2592        /* initialize all ports */
2593        RTE_ETH_FOREACH_DEV(portid) {
2594                struct rte_eth_conf local_port_conf = port_conf;
2595                /* not all app modes need interrupts */
2596                bool need_intr = app_mode == APP_MODE_LEGACY ||
2597                                app_mode == APP_MODE_INTERRUPT;
2598
2599                /* skip ports that are not enabled */
2600                if ((enabled_port_mask & (1 << portid)) == 0) {
2601                        printf("\nSkipping disabled port %d\n", portid);
2602                        continue;
2603                }
2604
2605                /* init port */
2606                printf("Initializing port %d ... ", portid );
2607                fflush(stdout);
2608
2609                ret = rte_eth_dev_info_get(portid, &dev_info);
2610                if (ret != 0)
2611                        rte_exit(EXIT_FAILURE,
2612                                "Error during getting device (port %u) info: %s\n",
2613                                portid, strerror(-ret));
2614
2615                dev_rxq_num = dev_info.max_rx_queues;
2616                dev_txq_num = dev_info.max_tx_queues;
2617
2618                nb_rx_queue = get_port_n_rx_queues(portid);
2619                if (nb_rx_queue > dev_rxq_num)
2620                        rte_exit(EXIT_FAILURE,
2621                                "Cannot configure not existed rxq: "
2622                                "port=%d\n", portid);
2623
2624                n_tx_queue = nb_lcores;
2625                if (n_tx_queue > dev_txq_num)
2626                        n_tx_queue = dev_txq_num;
2627                printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
2628                        nb_rx_queue, (unsigned)n_tx_queue );
2629                /* If number of Rx queue is 0, no need to enable Rx interrupt */
2630                if (nb_rx_queue == 0)
2631                        need_intr = false;
2632
2633                if (need_intr)
2634                        local_port_conf.intr_conf.rxq = 1;
2635
2636                ret = rte_eth_dev_info_get(portid, &dev_info);
2637                if (ret != 0)
2638                        rte_exit(EXIT_FAILURE,
2639                                "Error during getting device (port %u) info: %s\n",
2640                                portid, strerror(-ret));
2641
2642                ret = config_port_max_pkt_len(&local_port_conf, &dev_info);
2643                if (ret != 0)
2644                        rte_exit(EXIT_FAILURE,
2645                                "Invalid max packet length: %u (port %u)\n",
2646                                max_pkt_len, portid);
2647
2648                if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
2649                        local_port_conf.txmode.offloads |=
2650                                RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
2651
2652                local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
2653                        dev_info.flow_type_rss_offloads;
2654                if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
2655                                port_conf.rx_adv_conf.rss_conf.rss_hf) {
2656                        printf("Port %u modified RSS hash function based on hardware support,"
2657                                "requested:%#"PRIx64" configured:%#"PRIx64"\n",
2658                                portid,
2659                                port_conf.rx_adv_conf.rss_conf.rss_hf,
2660                                local_port_conf.rx_adv_conf.rss_conf.rss_hf);
2661                }
2662
2663                if (local_port_conf.rx_adv_conf.rss_conf.rss_hf == 0)
2664                        local_port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
2665                local_port_conf.rxmode.offloads &= dev_info.rx_offload_capa;
2666                port_conf.rxmode.offloads = local_port_conf.rxmode.offloads;
2667
2668                ret = rte_eth_dev_configure(portid, nb_rx_queue,
2669                                        (uint16_t)n_tx_queue, &local_port_conf);
2670                if (ret < 0)
2671                        rte_exit(EXIT_FAILURE, "Cannot configure device: "
2672                                        "err=%d, port=%d\n", ret, portid);
2673
2674                ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
2675                                                       &nb_txd);
2676                if (ret < 0)
2677                        rte_exit(EXIT_FAILURE,
2678                                 "Cannot adjust number of descriptors: err=%d, port=%d\n",
2679                                 ret, portid);
2680
2681                ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
2682                if (ret < 0)
2683                        rte_exit(EXIT_FAILURE,
2684                                 "Cannot get MAC address: err=%d, port=%d\n",
2685                                 ret, portid);
2686
2687                print_ethaddr(" Address:", &ports_eth_addr[portid]);
2688                printf(", ");
2689
2690                /* init memory */
2691                ret = init_mem(NB_MBUF);
2692                if (ret < 0)
2693                        rte_exit(EXIT_FAILURE, "init_mem failed\n");
2694
2695                for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
2696                        if (rte_lcore_is_enabled(lcore_id) == 0)
2697                                continue;
2698
2699                        /* Initialize TX buffers */
2700                        qconf = &lcore_conf[lcore_id];
2701                        qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
2702                                RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
2703                                rte_eth_dev_socket_id(portid));
2704                        if (qconf->tx_buffer[portid] == NULL)
2705                                rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n",
2706                                                 portid);
2707
2708                        rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST);
2709                }
2710
2711                /* init one TX queue per couple (lcore,port) */
2712                queueid = 0;
2713                for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
2714                        if (rte_lcore_is_enabled(lcore_id) == 0)
2715                                continue;
2716
2717                        if (queueid >= dev_txq_num)
2718                                continue;
2719
2720                        if (numa_on)
2721                                socketid = \
2722                                (uint8_t)rte_lcore_to_socket_id(lcore_id);
2723                        else
2724                                socketid = 0;
2725
2726                        printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
2727                        fflush(stdout);
2728
2729                        txconf = &dev_info.default_txconf;
2730                        txconf->offloads = local_port_conf.txmode.offloads;
2731                        ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
2732                                                     socketid, txconf);
2733                        if (ret < 0)
2734                                rte_exit(EXIT_FAILURE,
2735                                        "rte_eth_tx_queue_setup: err=%d, "
2736                                                "port=%d\n", ret, portid);
2737
2738                        qconf = &lcore_conf[lcore_id];
2739                        qconf->tx_queue_id[portid] = queueid;
2740                        queueid++;
2741
2742                        qconf->tx_port_id[qconf->n_tx_port] = portid;
2743                        qconf->n_tx_port++;
2744                }
2745                printf("\n");
2746        }
2747
2748        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
2749                if (rte_lcore_is_enabled(lcore_id) == 0)
2750                        continue;
2751
2752                if (app_mode == APP_MODE_LEGACY) {
2753                        /* init timer structures for each enabled lcore */
2754                        rte_timer_init(&power_timers[lcore_id]);
2755                        hz = rte_get_timer_hz();
2756                        rte_timer_reset(&power_timers[lcore_id],
2757                                        hz/TIMER_NUMBER_PER_SECOND,
2758                                        SINGLE, lcore_id,
2759                                        power_timer_cb, NULL);
2760                }
2761                qconf = &lcore_conf[lcore_id];
2762                printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
2763                fflush(stdout);
2764
2765                /* init RX queues */
2766                for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
2767                        struct rte_eth_rxconf rxq_conf;
2768
2769                        portid = qconf->rx_queue_list[queue].port_id;
2770                        queueid = qconf->rx_queue_list[queue].queue_id;
2771
2772                        if (numa_on)
2773                                socketid = \
2774                                (uint8_t)rte_lcore_to_socket_id(lcore_id);
2775                        else
2776                                socketid = 0;
2777
2778                        printf("rxq=%d,%d,%d ", portid, queueid, socketid);
2779                        fflush(stdout);
2780
2781                        ret = rte_eth_dev_info_get(portid, &dev_info);
2782                        if (ret != 0)
2783                                rte_exit(EXIT_FAILURE,
2784                                        "Error during getting device (port %u) info: %s\n",
2785                                        portid, strerror(-ret));
2786
2787                        rxq_conf = dev_info.default_rxconf;
2788                        rxq_conf.offloads = port_conf.rxmode.offloads;
2789                        ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
2790                                socketid, &rxq_conf,
2791                                pktmbuf_pool[socketid]);
2792                        if (ret < 0)
2793                                rte_exit(EXIT_FAILURE,
2794                                        "rte_eth_rx_queue_setup: err=%d, "
2795                                                "port=%d\n", ret, portid);
2796
2797                        if (parse_ptype) {
2798                                if (add_cb_parse_ptype(portid, queueid) < 0)
2799                                        rte_exit(EXIT_FAILURE,
2800                                                 "Fail to add ptype cb\n");
2801                        }
2802
2803                        if (app_mode == APP_MODE_PMD_MGMT && !baseline_enabled) {
2804                                ret = rte_power_ethdev_pmgmt_queue_enable(
2805                                                lcore_id, portid, queueid,
2806                                                pmgmt_type);
2807                                if (ret < 0)
2808                                        rte_exit(EXIT_FAILURE,
2809                                                "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n",
2810                                                        ret, portid);
2811                        }
2812                }
2813        }
2814        /* >8 End of power library initialization. */
2815
2816        printf("\n");
2817
2818        /* start ports */
2819        RTE_ETH_FOREACH_DEV(portid) {
2820                if ((enabled_port_mask & (1 << portid)) == 0) {
2821                        continue;
2822                }
2823                /* Start device */
2824                ret = rte_eth_dev_start(portid);
2825                if (ret < 0)
2826                        rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
2827                                                "port=%d\n", ret, portid);
2828                /*
2829                 * If enabled, put device in promiscuous mode.
2830                 * This allows IO forwarding mode to forward packets
2831                 * to itself through 2 cross-connected  ports of the
2832                 * target machine.
2833                 */
2834                if (promiscuous_on) {
2835                        ret = rte_eth_promiscuous_enable(portid);
2836                        if (ret != 0)
2837                                rte_exit(EXIT_FAILURE,
2838                                        "rte_eth_promiscuous_enable: err=%s, port=%u\n",
2839                                        rte_strerror(-ret), portid);
2840                }
2841                /* initialize spinlock for each port */
2842                rte_spinlock_init(&(locks[portid]));
2843
2844                if (!parse_ptype)
2845                        if (!check_ptype(portid))
2846                                rte_exit(EXIT_FAILURE,
2847                                        "PMD can not provide needed ptypes\n");
2848        }
2849
2850        check_all_ports_link_status(enabled_port_mask);
2851
2852        if (app_mode == APP_MODE_EMPTY_POLL) {
2853
2854                if (empty_poll_train) {
2855                        policy.state = TRAINING;
2856                } else {
2857                        policy.state = MED_NORMAL;
2858                        policy.med_base_edpi = ep_med_edpi;
2859                        policy.hgh_base_edpi = ep_hgh_edpi;
2860                }
2861
2862                ret = rte_power_empty_poll_stat_init(&ep_params,
2863                                freq_tlb,
2864                                &policy);
2865                if (ret < 0)
2866                        rte_exit(EXIT_FAILURE, "empty poll init failed");
2867        }
2868
2869
2870        /* launch per-lcore init on every lcore */
2871        if (app_mode == APP_MODE_LEGACY) {
2872                rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MAIN);
2873        } else if (app_mode == APP_MODE_EMPTY_POLL) {
2874                empty_poll_stop = false;
2875                rte_eal_mp_remote_launch(main_empty_poll_loop, NULL,
2876                                SKIP_MAIN);
2877        } else if (app_mode == APP_MODE_TELEMETRY) {
2878                unsigned int i;
2879
2880                /* Init metrics library */
2881                rte_metrics_init(rte_socket_id());
2882                /** Register stats with metrics library */
2883                for (i = 0; i < NUM_TELSTATS; i++)
2884                        ptr_strings[i] = telstats_strings[i].name;
2885
2886                ret = rte_metrics_reg_names(ptr_strings, NUM_TELSTATS);
2887                if (ret >= 0)
2888                        telstats_index = ret;
2889                else
2890                        rte_exit(EXIT_FAILURE, "failed to register metrics names");
2891
2892                RTE_LCORE_FOREACH_WORKER(lcore_id) {
2893                        rte_spinlock_init(&stats[lcore_id].telemetry_lock);
2894                }
2895                rte_timer_init(&telemetry_timer);
2896                rte_telemetry_register_cmd("/l3fwd-power/stats",
2897                                handle_app_stats,
2898                                "Returns global power stats. Parameters: None");
2899                rte_eal_mp_remote_launch(main_telemetry_loop, NULL,
2900                                                SKIP_MAIN);
2901        } else if (app_mode == APP_MODE_INTERRUPT) {
2902                rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN);
2903        } else if (app_mode == APP_MODE_PMD_MGMT) {
2904                /* reuse telemetry loop for PMD power management mode */
2905                rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN);
2906        }
2907
2908        if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY)
2909                launch_timer(rte_lcore_id());
2910
2911        RTE_LCORE_FOREACH_WORKER(lcore_id) {
2912                if (rte_eal_wait_lcore(lcore_id) < 0)
2913                        return -1;
2914        }
2915
2916        if (app_mode == APP_MODE_PMD_MGMT) {
2917                for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
2918                        if (rte_lcore_is_enabled(lcore_id) == 0)
2919                                continue;
2920                        qconf = &lcore_conf[lcore_id];
2921                        for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
2922                                portid = qconf->rx_queue_list[queue].port_id;
2923                                queueid = qconf->rx_queue_list[queue].queue_id;
2924
2925                                rte_power_ethdev_pmgmt_queue_disable(lcore_id,
2926                                                portid, queueid);
2927                        }
2928                }
2929        }
2930
2931        RTE_ETH_FOREACH_DEV(portid)
2932        {
2933                if ((enabled_port_mask & (1 << portid)) == 0)
2934                        continue;
2935
2936                ret = rte_eth_dev_stop(portid);
2937                if (ret != 0)
2938                        RTE_LOG(ERR, L3FWD_POWER, "rte_eth_dev_stop: err=%d, port=%u\n",
2939                                ret, portid);
2940
2941                rte_eth_dev_close(portid);
2942        }
2943
2944        if (app_mode == APP_MODE_EMPTY_POLL)
2945                rte_power_empty_poll_stat_free();
2946
2947        if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) &&
2948                        deinit_power_library())
2949                rte_exit(EXIT_FAILURE, "deinit_power_library failed\n");
2950
2951        if (rte_eal_cleanup() < 0)
2952                RTE_LOG(ERR, L3FWD_POWER, "EAL cleanup failed\n");
2953
2954        return 0;
2955}
2956