dpdk/examples/ip_reassembly/main.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2014 Intel Corporation
   3 */
   4
   5#include <stdio.h>
   6#include <stdlib.h>
   7#include <stdint.h>
   8#include <inttypes.h>
   9#include <sys/types.h>
  10#include <string.h>
  11#include <sys/queue.h>
  12#include <stdarg.h>
  13#include <errno.h>
  14#include <getopt.h>
  15#include <signal.h>
  16#include <sys/param.h>
  17
  18#include <rte_common.h>
  19#include <rte_byteorder.h>
  20#include <rte_log.h>
  21#include <rte_memory.h>
  22#include <rte_memcpy.h>
  23#include <rte_eal.h>
  24#include <rte_launch.h>
  25#include <rte_cycles.h>
  26#include <rte_prefetch.h>
  27#include <rte_lcore.h>
  28#include <rte_per_lcore.h>
  29#include <rte_branch_prediction.h>
  30#include <rte_interrupts.h>
  31#include <rte_random.h>
  32#include <rte_debug.h>
  33#include <rte_ether.h>
  34#include <rte_ethdev.h>
  35#include <rte_mempool.h>
  36#include <rte_mbuf.h>
  37#include <rte_malloc.h>
  38#include <rte_ip.h>
  39#include <rte_tcp.h>
  40#include <rte_udp.h>
  41#include <rte_string_fns.h>
  42#include <rte_lpm.h>
  43#include <rte_lpm6.h>
  44
  45#include <rte_ip_frag.h>
  46
  47#define MAX_PKT_BURST 32
  48
  49
  50#define RTE_LOGTYPE_IP_RSMBL RTE_LOGTYPE_USER1
  51
  52#define MAX_JUMBO_PKT_LEN  9600
  53
  54#define BUF_SIZE        RTE_MBUF_DEFAULT_DATAROOM
  55#define MBUF_DATA_SIZE  RTE_MBUF_DEFAULT_BUF_SIZE
  56
  57#define NB_MBUF 8192
  58#define MEMPOOL_CACHE_SIZE 256
  59
  60/* allow max jumbo frame 9.5 KB */
  61#define JUMBO_FRAME_MAX_SIZE    0x2600
  62
  63#define MAX_FLOW_NUM    UINT16_MAX
  64#define MIN_FLOW_NUM    1
  65#define DEF_FLOW_NUM    0x1000
  66
  67/* TTL numbers are in ms. */
  68#define MAX_FLOW_TTL    (3600 * MS_PER_S)
  69#define MIN_FLOW_TTL    1
  70#define DEF_FLOW_TTL    MS_PER_S
  71
  72#define MAX_FRAG_NUM RTE_LIBRTE_IP_FRAG_MAX_FRAG
  73
  74/* Should be power of two. */
  75#define IP_FRAG_TBL_BUCKET_ENTRIES      16
  76
  77static uint32_t max_flow_num = DEF_FLOW_NUM;
  78static uint32_t max_flow_ttl = DEF_FLOW_TTL;
  79
  80#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
  81
  82#define NB_SOCKETS 8
  83
  84/* Configure how many packets ahead to prefetch, when reading packets */
  85#define PREFETCH_OFFSET 3
  86
  87/*
  88 * Configurable number of RX/TX ring descriptors
  89 */
  90#define RTE_TEST_RX_DESC_DEFAULT 1024
  91#define RTE_TEST_TX_DESC_DEFAULT 1024
  92
  93static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
  94static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
  95
  96/* ethernet addresses of ports */
  97static struct rte_ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
  98
  99#ifndef IPv4_BYTES
 100#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8
 101#define IPv4_BYTES(addr) \
 102                (uint8_t) (((addr) >> 24) & 0xFF),\
 103                (uint8_t) (((addr) >> 16) & 0xFF),\
 104                (uint8_t) (((addr) >> 8) & 0xFF),\
 105                (uint8_t) ((addr) & 0xFF)
 106#endif
 107
 108#ifndef IPv6_BYTES
 109#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
 110                       "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
 111#define IPv6_BYTES(addr) \
 112        addr[0],  addr[1], addr[2],  addr[3], \
 113        addr[4],  addr[5], addr[6],  addr[7], \
 114        addr[8],  addr[9], addr[10], addr[11],\
 115        addr[12], addr[13],addr[14], addr[15]
 116#endif
 117
 118#define IPV6_ADDR_LEN 16
 119
 120/* mask of enabled ports */
 121static uint32_t enabled_port_mask = 0;
 122
 123static int rx_queue_per_lcore = 1;
 124
 125struct mbuf_table {
 126        uint32_t len;
 127        uint32_t head;
 128        uint32_t tail;
 129        struct rte_mbuf *m_table[0];
 130};
 131
 132struct rx_queue {
 133        struct rte_ip_frag_tbl *frag_tbl;
 134        struct rte_mempool *pool;
 135        struct rte_lpm *lpm;
 136        struct rte_lpm6 *lpm6;
 137        uint16_t portid;
 138};
 139
 140struct tx_lcore_stat {
 141        uint64_t call;
 142        uint64_t drop;
 143        uint64_t queue;
 144        uint64_t send;
 145};
 146
 147#define MAX_RX_QUEUE_PER_LCORE 16
 148#define MAX_TX_QUEUE_PER_PORT 16
 149#define MAX_RX_QUEUE_PER_PORT 128
 150
 151struct lcore_queue_conf {
 152        uint16_t n_rx_queue;
 153        struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
 154        uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
 155        struct rte_ip_frag_death_row death_row;
 156        struct mbuf_table *tx_mbufs[RTE_MAX_ETHPORTS];
 157        struct tx_lcore_stat tx_stat;
 158} __rte_cache_aligned;
 159static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
 160
 161static struct rte_eth_conf port_conf = {
 162        .rxmode = {
 163                .mq_mode        = RTE_ETH_MQ_RX_RSS,
 164                .mtu = JUMBO_FRAME_MAX_SIZE - RTE_ETHER_HDR_LEN -
 165                        RTE_ETHER_CRC_LEN,
 166                .split_hdr_size = 0,
 167                .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM,
 168        },
 169        .rx_adv_conf = {
 170                        .rss_conf = {
 171                                .rss_key = NULL,
 172                                .rss_hf = RTE_ETH_RSS_IP,
 173                },
 174        },
 175        .txmode = {
 176                .mq_mode = RTE_ETH_MQ_TX_NONE,
 177                .offloads = (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
 178                             RTE_ETH_TX_OFFLOAD_MULTI_SEGS),
 179        },
 180};
 181
 182/*
 183 * IPv4 forwarding table
 184 */
 185struct l3fwd_ipv4_route {
 186        uint32_t ip;
 187        uint8_t  depth;
 188        uint8_t  if_out;
 189};
 190
 191/* Default l3fwd_ipv4_route_array table. 8< */
 192struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = {
 193                {RTE_IPV4(100,10,0,0), 16, 0},
 194                {RTE_IPV4(100,20,0,0), 16, 1},
 195                {RTE_IPV4(100,30,0,0), 16, 2},
 196                {RTE_IPV4(100,40,0,0), 16, 3},
 197                {RTE_IPV4(100,50,0,0), 16, 4},
 198                {RTE_IPV4(100,60,0,0), 16, 5},
 199                {RTE_IPV4(100,70,0,0), 16, 6},
 200                {RTE_IPV4(100,80,0,0), 16, 7},
 201};
 202/* >8 End of default l3fwd_ipv4_route_array table. */
 203
 204/*
 205 * IPv6 forwarding table
 206 */
 207
 208struct l3fwd_ipv6_route {
 209        uint8_t ip[IPV6_ADDR_LEN];
 210        uint8_t depth;
 211        uint8_t if_out;
 212};
 213
 214/* Default l3fwd_ipv6_route_array table. 8< */
 215static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = {
 216        {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
 217        {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
 218        {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
 219        {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
 220        {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
 221        {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
 222        {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
 223        {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
 224};
 225/* >8 End of default l3fwd_ipv6_route_array table. */
 226
 227#define LPM_MAX_RULES         1024
 228#define LPM6_MAX_RULES         1024
 229#define LPM6_NUMBER_TBL8S (1 << 16)
 230
 231struct rte_lpm6_config lpm6_config = {
 232                .max_rules = LPM6_MAX_RULES,
 233                .number_tbl8s = LPM6_NUMBER_TBL8S,
 234                .flags = 0
 235};
 236
 237static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES];
 238static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES];
 239
 240#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
 241#define TX_LCORE_STAT_UPDATE(s, f, v)   ((s)->f += (v))
 242#else
 243#define TX_LCORE_STAT_UPDATE(s, f, v)   do {} while (0)
 244#endif /* RTE_LIBRTE_IP_FRAG_TBL_STAT */
 245
 246/*
 247 * If number of queued packets reached given threshold, then
 248 * send burst of packets on an output interface.
 249 */
 250static inline uint32_t
 251send_burst(struct lcore_queue_conf *qconf, uint32_t thresh, uint16_t port)
 252{
 253        uint32_t fill, len, k, n;
 254        struct mbuf_table *txmb;
 255
 256        txmb = qconf->tx_mbufs[port];
 257        len = txmb->len;
 258
 259        if ((int32_t)(fill = txmb->head - txmb->tail) < 0)
 260                fill += len;
 261
 262        if (fill >= thresh) {
 263                n = RTE_MIN(len - txmb->tail, fill);
 264
 265                k = rte_eth_tx_burst(port, qconf->tx_queue_id[port],
 266                        txmb->m_table + txmb->tail, (uint16_t)n);
 267
 268                TX_LCORE_STAT_UPDATE(&qconf->tx_stat, call, 1);
 269                TX_LCORE_STAT_UPDATE(&qconf->tx_stat, send, k);
 270
 271                fill -= k;
 272                if ((txmb->tail += k) == len)
 273                        txmb->tail = 0;
 274        }
 275
 276        return fill;
 277}
 278
 279/* Enqueue a single packet, and send burst if queue is filled */
 280static inline int
 281send_single_packet(struct rte_mbuf *m, uint16_t port)
 282{
 283        uint32_t fill, lcore_id, len;
 284        struct lcore_queue_conf *qconf;
 285        struct mbuf_table *txmb;
 286
 287        lcore_id = rte_lcore_id();
 288        qconf = &lcore_queue_conf[lcore_id];
 289
 290        txmb = qconf->tx_mbufs[port];
 291        len = txmb->len;
 292
 293        fill = send_burst(qconf, MAX_PKT_BURST, port);
 294
 295        if (fill == len - 1) {
 296                TX_LCORE_STAT_UPDATE(&qconf->tx_stat, drop, 1);
 297                rte_pktmbuf_free(txmb->m_table[txmb->tail]);
 298                if (++txmb->tail == len)
 299                        txmb->tail = 0;
 300        }
 301
 302        TX_LCORE_STAT_UPDATE(&qconf->tx_stat, queue, 1);
 303        txmb->m_table[txmb->head] = m;
 304        if(++txmb->head == len)
 305                txmb->head = 0;
 306
 307        return 0;
 308}
 309
 310static inline void
 311reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue,
 312        struct lcore_queue_conf *qconf, uint64_t tms)
 313{
 314        struct rte_ether_hdr *eth_hdr;
 315        struct rte_ip_frag_tbl *tbl;
 316        struct rte_ip_frag_death_row *dr;
 317        struct rx_queue *rxq;
 318        void *d_addr_bytes;
 319        uint32_t next_hop;
 320        uint16_t dst_port;
 321
 322        rxq = &qconf->rx_queue_list[queue];
 323
 324        eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
 325
 326        dst_port = portid;
 327
 328        /* if packet is IPv4 */
 329        if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
 330                struct rte_ipv4_hdr *ip_hdr;
 331                uint32_t ip_dst;
 332
 333                ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
 334
 335                 /* if it is a fragmented packet, then try to reassemble. */
 336                if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) {
 337                        struct rte_mbuf *mo;
 338
 339                        tbl = rxq->frag_tbl;
 340                        dr = &qconf->death_row;
 341
 342                        /* prepare mbuf: setup l2_len/l3_len. */
 343                        m->l2_len = sizeof(*eth_hdr);
 344                        m->l3_len = sizeof(*ip_hdr);
 345
 346                        /* process this fragment. */
 347                        mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr);
 348                        if (mo == NULL)
 349                                /* no packet to send out. */
 350                                return;
 351
 352                        /* we have our packet reassembled. */
 353                        if (mo != m) {
 354                                m = mo;
 355                                eth_hdr = rte_pktmbuf_mtod(m,
 356                                        struct rte_ether_hdr *);
 357                                ip_hdr = (struct rte_ipv4_hdr *)(eth_hdr + 1);
 358                        }
 359
 360                        /* update offloading flags */
 361                        m->ol_flags |= (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM);
 362                }
 363                ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
 364
 365                /* Find destination port */
 366                if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 &&
 367                                (enabled_port_mask & 1 << next_hop) != 0) {
 368                        dst_port = next_hop;
 369                }
 370
 371                eth_hdr->ether_type = rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV4);
 372        } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
 373                /* if packet is IPv6 */
 374                struct rte_ipv6_fragment_ext *frag_hdr;
 375                struct rte_ipv6_hdr *ip_hdr;
 376
 377                ip_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1);
 378
 379                frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
 380
 381                if (frag_hdr != NULL) {
 382                        struct rte_mbuf *mo;
 383
 384                        tbl = rxq->frag_tbl;
 385                        dr  = &qconf->death_row;
 386
 387                        /* prepare mbuf: setup l2_len/l3_len. */
 388                        m->l2_len = sizeof(*eth_hdr);
 389                        m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr);
 390
 391                        mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr);
 392                        if (mo == NULL)
 393                                return;
 394
 395                        if (mo != m) {
 396                                m = mo;
 397                                eth_hdr = rte_pktmbuf_mtod(m,
 398                                                        struct rte_ether_hdr *);
 399                                ip_hdr = (struct rte_ipv6_hdr *)(eth_hdr + 1);
 400                        }
 401                }
 402
 403                /* Find destination port */
 404                if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr,
 405                                                &next_hop) == 0 &&
 406                                (enabled_port_mask & 1 << next_hop) != 0) {
 407                        dst_port = next_hop;
 408                }
 409
 410                eth_hdr->ether_type = rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV6);
 411        }
 412        /* if packet wasn't IPv4 or IPv6, it's forwarded to the port it came from */
 413
 414        /* 02:00:00:00:00:xx */
 415        d_addr_bytes = &eth_hdr->dst_addr.addr_bytes[0];
 416        *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40);
 417
 418        /* src addr */
 419        rte_ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->src_addr);
 420
 421        send_single_packet(m, dst_port);
 422}
 423
 424/* main processing loop */
 425static int
 426main_loop(__rte_unused void *dummy)
 427{
 428        struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
 429        unsigned lcore_id;
 430        uint64_t diff_tsc, cur_tsc, prev_tsc;
 431        int i, j, nb_rx;
 432        uint16_t portid;
 433        struct lcore_queue_conf *qconf;
 434        const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
 435
 436        prev_tsc = 0;
 437
 438        lcore_id = rte_lcore_id();
 439        qconf = &lcore_queue_conf[lcore_id];
 440
 441        if (qconf->n_rx_queue == 0) {
 442                RTE_LOG(INFO, IP_RSMBL, "lcore %u has nothing to do\n", lcore_id);
 443                return 0;
 444        }
 445
 446        RTE_LOG(INFO, IP_RSMBL, "entering main loop on lcore %u\n", lcore_id);
 447
 448        for (i = 0; i < qconf->n_rx_queue; i++) {
 449
 450                portid = qconf->rx_queue_list[i].portid;
 451                RTE_LOG(INFO, IP_RSMBL, " -- lcoreid=%u portid=%u\n", lcore_id,
 452                        portid);
 453        }
 454
 455        while (1) {
 456
 457                cur_tsc = rte_rdtsc();
 458
 459                /*
 460                 * TX burst queue drain
 461                 */
 462                diff_tsc = cur_tsc - prev_tsc;
 463                if (unlikely(diff_tsc > drain_tsc)) {
 464
 465                        /*
 466                         * This could be optimized (use queueid instead of
 467                         * portid), but it is not called so often
 468                         */
 469                        for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
 470                                if ((enabled_port_mask & (1 << portid)) != 0)
 471                                        send_burst(qconf, 1, portid);
 472                        }
 473
 474                        prev_tsc = cur_tsc;
 475                }
 476
 477                /*
 478                 * Read packet from RX queues
 479                 */
 480                for (i = 0; i < qconf->n_rx_queue; ++i) {
 481
 482                        portid = qconf->rx_queue_list[i].portid;
 483
 484                        nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
 485                                MAX_PKT_BURST);
 486
 487                        /* Prefetch first packets */
 488                        for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
 489                                rte_prefetch0(rte_pktmbuf_mtod(
 490                                                pkts_burst[j], void *));
 491                        }
 492
 493                        /* Prefetch and forward already prefetched packets */
 494                        for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
 495                                rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
 496                                        j + PREFETCH_OFFSET], void *));
 497                                reassemble(pkts_burst[j], portid,
 498                                        i, qconf, cur_tsc);
 499                        }
 500
 501                        /* Forward remaining prefetched packets */
 502                        for (; j < nb_rx; j++) {
 503                                reassemble(pkts_burst[j], portid,
 504                                        i, qconf, cur_tsc);
 505                        }
 506
 507                        rte_ip_frag_free_death_row(&qconf->death_row,
 508                                PREFETCH_OFFSET);
 509                }
 510        }
 511}
 512
 513/* display usage */
 514static void
 515print_usage(const char *prgname)
 516{
 517        printf("%s [EAL options] -- -p PORTMASK [-q NQ]"
 518                "  [--maxflows=<flows>]  [--flowttl=<ttl>[(s|ms)]]\n"
 519                "  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
 520                "  -q NQ: number of RX queues per lcore\n"
 521                "  --maxflows=<flows>: optional, maximum number of flows "
 522                "supported\n"
 523                "  --flowttl=<ttl>[(s|ms)]: optional, maximum TTL for each "
 524                "flow\n",
 525                prgname);
 526}
 527
 528static uint32_t
 529parse_flow_num(const char *str, uint32_t min, uint32_t max, uint32_t *val)
 530{
 531        char *end;
 532        uint64_t v;
 533
 534        /* parse decimal string */
 535        errno = 0;
 536        v = strtoul(str, &end, 10);
 537        if (errno != 0 || *end != '\0')
 538                return -EINVAL;
 539
 540        if (v < min || v > max)
 541                return -EINVAL;
 542
 543        *val = (uint32_t)v;
 544        return 0;
 545}
 546
 547static int
 548parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val)
 549{
 550        char *end;
 551        uint64_t v;
 552
 553        static const char frmt_sec[] = "s";
 554        static const char frmt_msec[] = "ms";
 555
 556        /* parse decimal string */
 557        errno = 0;
 558        v = strtoul(str, &end, 10);
 559        if (errno != 0)
 560                return -EINVAL;
 561
 562        if (*end != '\0') {
 563                if (strncmp(frmt_sec, end, sizeof(frmt_sec)) == 0)
 564                        v *= MS_PER_S;
 565                else if (strncmp(frmt_msec, end, sizeof (frmt_msec)) != 0)
 566                        return -EINVAL;
 567        }
 568
 569        if (v < min || v > max)
 570                return -EINVAL;
 571
 572        *val = (uint32_t)v;
 573        return 0;
 574}
 575
 576static int
 577parse_portmask(const char *portmask)
 578{
 579        char *end = NULL;
 580        unsigned long pm;
 581
 582        /* parse hexadecimal string */
 583        pm = strtoul(portmask, &end, 16);
 584        if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
 585                return 0;
 586
 587        return pm;
 588}
 589
 590static int
 591parse_nqueue(const char *q_arg)
 592{
 593        char *end = NULL;
 594        unsigned long n;
 595
 596        printf("%p\n", q_arg);
 597
 598        /* parse hexadecimal string */
 599        n = strtoul(q_arg, &end, 10);
 600        if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
 601                return -1;
 602        if (n == 0)
 603                return -1;
 604        if (n >= MAX_RX_QUEUE_PER_LCORE)
 605                return -1;
 606
 607        return n;
 608}
 609
 610/* Parse the argument given in the command line of the application */
 611static int
 612parse_args(int argc, char **argv)
 613{
 614        int opt, ret;
 615        char **argvopt;
 616        int option_index;
 617        char *prgname = argv[0];
 618        static struct option lgopts[] = {
 619                {"maxflows", 1, 0, 0},
 620                {"flowttl", 1, 0, 0},
 621                {NULL, 0, 0, 0}
 622        };
 623
 624        argvopt = argv;
 625
 626        while ((opt = getopt_long(argc, argvopt, "p:q:",
 627                                lgopts, &option_index)) != EOF) {
 628
 629                switch (opt) {
 630                /* portmask */
 631                case 'p':
 632                        enabled_port_mask = parse_portmask(optarg);
 633                        if (enabled_port_mask == 0) {
 634                                printf("invalid portmask\n");
 635                                print_usage(prgname);
 636                                return -1;
 637                        }
 638                        break;
 639
 640                /* nqueue */
 641                case 'q':
 642                        rx_queue_per_lcore = parse_nqueue(optarg);
 643                        if (rx_queue_per_lcore < 0) {
 644                                printf("invalid queue number\n");
 645                                print_usage(prgname);
 646                                return -1;
 647                        }
 648                        break;
 649
 650                /* long options */
 651                case 0:
 652                        if (!strncmp(lgopts[option_index].name,
 653                                        "maxflows", 8)) {
 654                                if ((ret = parse_flow_num(optarg, MIN_FLOW_NUM,
 655                                                MAX_FLOW_NUM,
 656                                                &max_flow_num)) != 0) {
 657                                        printf("invalid value: \"%s\" for "
 658                                                "parameter %s\n",
 659                                                optarg,
 660                                                lgopts[option_index].name);
 661                                        print_usage(prgname);
 662                                        return ret;
 663                                }
 664                        }
 665
 666                        if (!strncmp(lgopts[option_index].name, "flowttl", 7)) {
 667                                if ((ret = parse_flow_ttl(optarg, MIN_FLOW_TTL,
 668                                                MAX_FLOW_TTL,
 669                                                &max_flow_ttl)) != 0) {
 670                                        printf("invalid value: \"%s\" for "
 671                                                "parameter %s\n",
 672                                                optarg,
 673                                                lgopts[option_index].name);
 674                                        print_usage(prgname);
 675                                        return ret;
 676                                }
 677                        }
 678
 679                        break;
 680
 681                default:
 682                        print_usage(prgname);
 683                        return -1;
 684                }
 685        }
 686
 687        if (optind >= 0)
 688                argv[optind-1] = prgname;
 689
 690        ret = optind-1;
 691        optind = 1; /* reset getopt lib */
 692        return ret;
 693}
 694
 695static void
 696print_ethaddr(const char *name, const struct rte_ether_addr *eth_addr)
 697{
 698        char buf[RTE_ETHER_ADDR_FMT_SIZE];
 699        rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, eth_addr);
 700        printf("%s%s", name, buf);
 701}
 702
 703/* Check the link status of all ports in up to 9s, and print them finally */
 704static void
 705check_all_ports_link_status(uint32_t port_mask)
 706{
 707#define CHECK_INTERVAL 100 /* 100ms */
 708#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
 709        uint16_t portid;
 710        uint8_t count, all_ports_up, print_flag = 0;
 711        struct rte_eth_link link;
 712        int ret;
 713        char link_status_text[RTE_ETH_LINK_MAX_STR_LEN];
 714
 715        printf("\nChecking link status");
 716        fflush(stdout);
 717        for (count = 0; count <= MAX_CHECK_TIME; count++) {
 718                all_ports_up = 1;
 719                RTE_ETH_FOREACH_DEV(portid) {
 720                        if ((port_mask & (1 << portid)) == 0)
 721                                continue;
 722                        memset(&link, 0, sizeof(link));
 723                        ret = rte_eth_link_get_nowait(portid, &link);
 724                        if (ret < 0) {
 725                                all_ports_up = 0;
 726                                if (print_flag == 1)
 727                                        printf("Port %u link get failed: %s\n",
 728                                                portid, rte_strerror(-ret));
 729                                continue;
 730                        }
 731                        /* print link status if flag set */
 732                        if (print_flag == 1) {
 733                                rte_eth_link_to_str(link_status_text,
 734                                        sizeof(link_status_text), &link);
 735                                printf("Port %d %s\n", portid,
 736                                       link_status_text);
 737                                continue;
 738                        }
 739                        /* clear all_ports_up flag if any link down */
 740                        if (link.link_status == RTE_ETH_LINK_DOWN) {
 741                                all_ports_up = 0;
 742                                break;
 743                        }
 744                }
 745                /* after finally printing all link status, get out */
 746                if (print_flag == 1)
 747                        break;
 748
 749                if (all_ports_up == 0) {
 750                        printf(".");
 751                        fflush(stdout);
 752                        rte_delay_ms(CHECK_INTERVAL);
 753                }
 754
 755                /* set the print_flag if all ports up or timeout */
 756                if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
 757                        print_flag = 1;
 758                        printf("\ndone\n");
 759                }
 760        }
 761}
 762
 763static int
 764init_routing_table(void)
 765{
 766        struct rte_lpm *lpm;
 767        struct rte_lpm6 *lpm6;
 768        int socket, ret;
 769        unsigned i;
 770
 771        for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
 772                if (socket_lpm[socket]) {
 773                        lpm = socket_lpm[socket];
 774                        /* populate the LPM table */
 775                        for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) {
 776                                ret = rte_lpm_add(lpm,
 777                                        l3fwd_ipv4_route_array[i].ip,
 778                                        l3fwd_ipv4_route_array[i].depth,
 779                                        l3fwd_ipv4_route_array[i].if_out);
 780
 781                                if (ret < 0) {
 782                                        RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd "
 783                                                "LPM table\n", i);
 784                                        return -1;
 785                                }
 786
 787                                RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv4_BYTES_FMT
 788                                                "/%d (port %d)\n",
 789                                        socket,
 790                                        IPv4_BYTES(l3fwd_ipv4_route_array[i].ip),
 791                                        l3fwd_ipv4_route_array[i].depth,
 792                                        l3fwd_ipv4_route_array[i].if_out);
 793                        }
 794                }
 795
 796                if (socket_lpm6[socket]) {
 797                        lpm6 = socket_lpm6[socket];
 798                        /* populate the LPM6 table */
 799                        for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) {
 800                                ret = rte_lpm6_add(lpm6,
 801                                        l3fwd_ipv6_route_array[i].ip,
 802                                        l3fwd_ipv6_route_array[i].depth,
 803                                        l3fwd_ipv6_route_array[i].if_out);
 804
 805                                if (ret < 0) {
 806                                        RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd "
 807                                                "LPM6 table\n", i);
 808                                        return -1;
 809                                }
 810
 811                                RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv6_BYTES_FMT
 812                                                "/%d (port %d)\n",
 813                                        socket,
 814                                        IPv6_BYTES(l3fwd_ipv6_route_array[i].ip),
 815                                        l3fwd_ipv6_route_array[i].depth,
 816                                        l3fwd_ipv6_route_array[i].if_out);
 817                        }
 818                }
 819        }
 820        return 0;
 821}
 822
 823static int
 824setup_port_tbl(struct lcore_queue_conf *qconf, uint32_t lcore, int socket,
 825        uint32_t port)
 826{
 827        struct mbuf_table *mtb;
 828        uint32_t n;
 829        size_t sz;
 830
 831        n = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST);
 832        sz = sizeof (*mtb) + sizeof (mtb->m_table[0]) *  n;
 833
 834        if ((mtb = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE,
 835                        socket)) == NULL) {
 836                RTE_LOG(ERR, IP_RSMBL, "%s() for lcore: %u, port: %u "
 837                        "failed to allocate %zu bytes\n",
 838                        __func__, lcore, port, sz);
 839                return -1;
 840        }
 841
 842        mtb->len = n;
 843        qconf->tx_mbufs[port] = mtb;
 844
 845        return 0;
 846}
 847
 848static int
 849setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue)
 850{
 851        int socket;
 852        uint32_t nb_mbuf;
 853        uint64_t frag_cycles;
 854        char buf[RTE_MEMPOOL_NAMESIZE];
 855
 856        socket = rte_lcore_to_socket_id(lcore);
 857        if (socket == SOCKET_ID_ANY)
 858                socket = 0;
 859
 860        /* Each table entry holds information about packet fragmentation. 8< */
 861        frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S *
 862                max_flow_ttl;
 863
 864        if ((rxq->frag_tbl = rte_ip_frag_table_create(max_flow_num,
 865                        IP_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles,
 866                        socket)) == NULL) {
 867                RTE_LOG(ERR, IP_RSMBL, "ip_frag_tbl_create(%u) on "
 868                        "lcore: %u for queue: %u failed\n",
 869                        max_flow_num, lcore, queue);
 870                return -1;
 871        }
 872        /* >8 End of holding packet fragmentation. */
 873
 874        /*
 875         * At any given moment up to <max_flow_num * (MAX_FRAG_NUM)>
 876         * mbufs could be stored in the fragment table.
 877         * Plus, each TX queue can hold up to <max_flow_num> packets.
 878         */
 879
 880        /* mbufs stored in the fragment table. 8< */
 881        nb_mbuf = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM;
 882        nb_mbuf *= (port_conf.rxmode.mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN
 883                        + BUF_SIZE - 1) / BUF_SIZE;
 884        nb_mbuf *= 2; /* ipv4 and ipv6 */
 885        nb_mbuf += nb_rxd + nb_txd;
 886
 887        nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)NB_MBUF);
 888
 889        snprintf(buf, sizeof(buf), "mbuf_pool_%u_%u", lcore, queue);
 890
 891        rxq->pool = rte_pktmbuf_pool_create(buf, nb_mbuf, MEMPOOL_CACHE_SIZE, 0,
 892                                            MBUF_DATA_SIZE, socket);
 893        if (rxq->pool == NULL) {
 894                RTE_LOG(ERR, IP_RSMBL,
 895                        "rte_pktmbuf_pool_create(%s) failed", buf);
 896                return -1;
 897        }
 898        /* >8 End of mbufs stored in the fragmentation table. */
 899
 900        return 0;
 901}
 902
 903static int
 904init_mem(void)
 905{
 906        char buf[PATH_MAX];
 907        struct rte_lpm *lpm;
 908        struct rte_lpm6 *lpm6;
 909        struct rte_lpm_config lpm_config;
 910        int socket;
 911        unsigned lcore_id;
 912
 913        /* traverse through lcores and initialize structures on each socket */
 914
 915        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 916
 917                if (rte_lcore_is_enabled(lcore_id) == 0)
 918                        continue;
 919
 920                socket = rte_lcore_to_socket_id(lcore_id);
 921
 922                if (socket == SOCKET_ID_ANY)
 923                        socket = 0;
 924
 925                if (socket_lpm[socket] == NULL) {
 926                        RTE_LOG(INFO, IP_RSMBL, "Creating LPM table on socket %i\n", socket);
 927                        snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket);
 928
 929                        lpm_config.max_rules = LPM_MAX_RULES;
 930                        lpm_config.number_tbl8s = 256;
 931                        lpm_config.flags = 0;
 932
 933                        lpm = rte_lpm_create(buf, socket, &lpm_config);
 934                        if (lpm == NULL) {
 935                                RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n");
 936                                return -1;
 937                        }
 938                        socket_lpm[socket] = lpm;
 939                }
 940
 941                if (socket_lpm6[socket] == NULL) {
 942                        RTE_LOG(INFO, IP_RSMBL, "Creating LPM6 table on socket %i\n", socket);
 943                        snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket);
 944
 945                        lpm6 = rte_lpm6_create(buf, socket, &lpm6_config);
 946                        if (lpm6 == NULL) {
 947                                RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n");
 948                                return -1;
 949                        }
 950                        socket_lpm6[socket] = lpm6;
 951                }
 952        }
 953
 954        return 0;
 955}
 956
 957static void
 958queue_dump_stat(void)
 959{
 960        uint32_t i, lcore;
 961        const struct lcore_queue_conf *qconf;
 962
 963        for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
 964                if (rte_lcore_is_enabled(lcore) == 0)
 965                        continue;
 966
 967                qconf = &lcore_queue_conf[lcore];
 968                for (i = 0; i < qconf->n_rx_queue; i++) {
 969
 970                        fprintf(stdout, " -- lcoreid=%u portid=%u "
 971                                "frag tbl stat:\n",
 972                                lcore,  qconf->rx_queue_list[i].portid);
 973                        rte_ip_frag_table_statistics_dump(stdout,
 974                                        qconf->rx_queue_list[i].frag_tbl);
 975                        fprintf(stdout, "TX bursts:\t%" PRIu64 "\n"
 976                                "TX packets _queued:\t%" PRIu64 "\n"
 977                                "TX packets dropped:\t%" PRIu64 "\n"
 978                                "TX packets send:\t%" PRIu64 "\n",
 979                                qconf->tx_stat.call,
 980                                qconf->tx_stat.queue,
 981                                qconf->tx_stat.drop,
 982                                qconf->tx_stat.send);
 983                }
 984        }
 985}
 986
 987static void
 988signal_handler(int signum)
 989{
 990        queue_dump_stat();
 991        if (signum != SIGUSR1)
 992                rte_exit(0, "received signal: %d, exiting\n", signum);
 993}
 994
 995int
 996main(int argc, char **argv)
 997{
 998        struct lcore_queue_conf *qconf;
 999        struct rte_eth_dev_info dev_info;
1000        struct rte_eth_txconf *txconf;
1001        struct rx_queue *rxq;
1002        int ret, socket;
1003        unsigned nb_ports;
1004        uint16_t queueid;
1005        unsigned lcore_id = 0, rx_lcore_id = 0;
1006        uint32_t n_tx_queue, nb_lcores;
1007        uint16_t portid;
1008
1009        /* init EAL */
1010        ret = rte_eal_init(argc, argv);
1011        if (ret < 0)
1012                rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
1013        argc -= ret;
1014        argv += ret;
1015
1016        /* parse application arguments (after the EAL ones) */
1017        ret = parse_args(argc, argv);
1018        if (ret < 0)
1019                rte_exit(EXIT_FAILURE, "Invalid IP reassembly parameters\n");
1020
1021        nb_ports = rte_eth_dev_count_avail();
1022        if (nb_ports == 0)
1023                rte_exit(EXIT_FAILURE, "No ports found!\n");
1024
1025        nb_lcores = rte_lcore_count();
1026
1027        /* initialize structures (mempools, lpm etc.) */
1028        if (init_mem() < 0)
1029                rte_panic("Cannot initialize memory structures!\n");
1030
1031        /* check if portmask has non-existent ports */
1032        if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned)))
1033                rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n");
1034
1035        /* initialize all ports */
1036        RTE_ETH_FOREACH_DEV(portid) {
1037                struct rte_eth_rxconf rxq_conf;
1038                struct rte_eth_conf local_port_conf = port_conf;
1039
1040                /* skip ports that are not enabled */
1041                if ((enabled_port_mask & (1 << portid)) == 0) {
1042                        printf("\nSkipping disabled port %d\n", portid);
1043                        continue;
1044                }
1045
1046                qconf = &lcore_queue_conf[rx_lcore_id];
1047
1048                /* limit the frame size to the maximum supported by NIC */
1049                ret = rte_eth_dev_info_get(portid, &dev_info);
1050                if (ret != 0)
1051                        rte_exit(EXIT_FAILURE,
1052                                "Error during getting device (port %u) info: %s\n",
1053                                portid, strerror(-ret));
1054
1055                local_port_conf.rxmode.mtu = RTE_MIN(
1056                    dev_info.max_mtu,
1057                    local_port_conf.rxmode.mtu);
1058
1059                /* get the lcore_id for this port */
1060                while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
1061                           qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
1062
1063                        rx_lcore_id++;
1064                        if (rx_lcore_id >= RTE_MAX_LCORE)
1065                                rte_exit(EXIT_FAILURE, "Not enough cores\n");
1066
1067                        qconf = &lcore_queue_conf[rx_lcore_id];
1068                }
1069
1070                socket = rte_lcore_to_socket_id(portid);
1071                if (socket == SOCKET_ID_ANY)
1072                        socket = 0;
1073
1074                queueid = qconf->n_rx_queue;
1075                rxq = &qconf->rx_queue_list[queueid];
1076                rxq->portid = portid;
1077                rxq->lpm = socket_lpm[socket];
1078                rxq->lpm6 = socket_lpm6[socket];
1079
1080                ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
1081                                                       &nb_txd);
1082                if (ret < 0)
1083                        rte_exit(EXIT_FAILURE,
1084                                 "Cannot adjust number of descriptors: err=%d, port=%d\n",
1085                                 ret, portid);
1086
1087                if (setup_queue_tbl(rxq, rx_lcore_id, queueid) < 0)
1088                        rte_exit(EXIT_FAILURE, "Failed to set up queue table\n");
1089                qconf->n_rx_queue++;
1090
1091                /* init port */
1092                printf("Initializing port %d ... ", portid );
1093                fflush(stdout);
1094
1095                n_tx_queue = nb_lcores;
1096                if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
1097                        n_tx_queue = MAX_TX_QUEUE_PER_PORT;
1098                if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
1099                        local_port_conf.txmode.offloads |=
1100                                RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
1101
1102                local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
1103                        dev_info.flow_type_rss_offloads;
1104                if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
1105                                port_conf.rx_adv_conf.rss_conf.rss_hf) {
1106                        printf("Port %u modified RSS hash function based on hardware support,"
1107                                "requested:%#"PRIx64" configured:%#"PRIx64"\n",
1108                                portid,
1109                                port_conf.rx_adv_conf.rss_conf.rss_hf,
1110                                local_port_conf.rx_adv_conf.rss_conf.rss_hf);
1111                }
1112
1113                ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
1114                                            &local_port_conf);
1115                if (ret < 0) {
1116                        printf("\n");
1117                        rte_exit(EXIT_FAILURE, "Cannot configure device: "
1118                                "err=%d, port=%d\n",
1119                                ret, portid);
1120                }
1121
1122                /* init one RX queue */
1123                rxq_conf = dev_info.default_rxconf;
1124                rxq_conf.offloads = local_port_conf.rxmode.offloads;
1125                ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
1126                                             socket, &rxq_conf,
1127                                             rxq->pool);
1128                if (ret < 0) {
1129                        printf("\n");
1130                        rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: "
1131                                "err=%d, port=%d\n",
1132                                ret, portid);
1133                }
1134
1135                ret = rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1136                if (ret < 0) {
1137                        printf("\n");
1138                        rte_exit(EXIT_FAILURE,
1139                                "rte_eth_macaddr_get: err=%d, port=%d\n",
1140                                ret, portid);
1141                }
1142
1143                print_ethaddr(" Address:", &ports_eth_addr[portid]);
1144                printf("\n");
1145
1146                /* init one TX queue per couple (lcore,port) */
1147                queueid = 0;
1148                for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1149                        if (rte_lcore_is_enabled(lcore_id) == 0)
1150                                continue;
1151
1152                        socket = (int) rte_lcore_to_socket_id(lcore_id);
1153
1154                        printf("txq=%u,%d,%d ", lcore_id, queueid, socket);
1155                        fflush(stdout);
1156
1157                        txconf = &dev_info.default_txconf;
1158                        txconf->offloads = local_port_conf.txmode.offloads;
1159
1160                        ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
1161                                        socket, txconf);
1162                        if (ret < 0)
1163                                rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
1164                                        "port=%d\n", ret, portid);
1165
1166                        qconf = &lcore_queue_conf[lcore_id];
1167                        qconf->tx_queue_id[portid] = queueid;
1168                        setup_port_tbl(qconf, lcore_id, socket, portid);
1169                        queueid++;
1170                }
1171                printf("\n");
1172        }
1173
1174        printf("\n");
1175
1176        /* start ports */
1177        RTE_ETH_FOREACH_DEV(portid) {
1178                if ((enabled_port_mask & (1 << portid)) == 0) {
1179                        continue;
1180                }
1181                /* Start device */
1182                ret = rte_eth_dev_start(portid);
1183                if (ret < 0)
1184                        rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1185                                ret, portid);
1186
1187                ret = rte_eth_promiscuous_enable(portid);
1188                if (ret != 0)
1189                        rte_exit(EXIT_FAILURE,
1190                                "rte_eth_promiscuous_enable: err=%s, port=%d\n",
1191                                rte_strerror(-ret), portid);
1192        }
1193
1194        if (init_routing_table() < 0)
1195                rte_exit(EXIT_FAILURE, "Cannot init routing table\n");
1196
1197        check_all_ports_link_status(enabled_port_mask);
1198
1199        signal(SIGUSR1, signal_handler);
1200        signal(SIGTERM, signal_handler);
1201        signal(SIGINT, signal_handler);
1202
1203        /* launch per-lcore init on every lcore */
1204        rte_eal_mp_remote_launch(main_loop, NULL, CALL_MAIN);
1205        RTE_LCORE_FOREACH_WORKER(lcore_id) {
1206                if (rte_eal_wait_lcore(lcore_id) < 0)
1207                        return -1;
1208        }
1209
1210        /* clean up the EAL */
1211        rte_eal_cleanup();
1212
1213        return 0;
1214}
1215