dpdk/app/test/test_distributor_perf.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2017 Intel Corporation
   3 */
   4
   5#include "test.h"
   6
   7#include <unistd.h>
   8#include <string.h>
   9#include <rte_mempool.h>
  10#include <rte_cycles.h>
  11#include <rte_common.h>
  12#include <rte_mbuf.h>
  13#include <rte_distributor.h>
  14#include <rte_pause.h>
  15
  16#define ITER_POWER_CL 25 /* log 2 of how many iterations  for Cache Line test */
  17#define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
  18#define BURST 64
  19#define BIG_BATCH 1024
  20
  21/* static vars - zero initialized by default */
  22static volatile int quit;
  23static volatile unsigned worker_idx;
  24
  25struct worker_stats {
  26        volatile unsigned handled_packets;
  27} __rte_cache_aligned;
  28static struct worker_stats worker_stats[RTE_MAX_LCORE];
  29
  30/*
  31 * worker thread used for testing the time to do a round-trip of a cache
  32 * line between two cores and back again
  33 */
  34static int
  35flip_bit(volatile uint64_t *arg)
  36{
  37        uint64_t old_val = 0;
  38        while (old_val != 2) {
  39                while (!*arg)
  40                        rte_pause();
  41                old_val = *arg;
  42                *arg = 0;
  43        }
  44        return 0;
  45}
  46
  47/*
  48 * test case to time the number of cycles to round-trip a cache line between
  49 * two cores and back again.
  50 */
  51static void
  52time_cache_line_switch(void)
  53{
  54        /* allocate a full cache line for data, we use only first byte of it */
  55        uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)];
  56
  57        unsigned int i, workerid = rte_get_next_lcore(rte_lcore_id(), 0, 0);
  58        volatile uint64_t *pdata = &data[0];
  59        *pdata = 1;
  60        rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], workerid);
  61        while (*pdata)
  62                rte_pause();
  63
  64        const uint64_t start_time = rte_rdtsc();
  65        for (i = 0; i < (1 << ITER_POWER_CL); i++) {
  66                while (*pdata)
  67                        rte_pause();
  68                *pdata = 1;
  69        }
  70        const uint64_t end_time = rte_rdtsc();
  71
  72        while (*pdata)
  73                rte_pause();
  74        *pdata = 2;
  75        rte_eal_wait_lcore(workerid);
  76        printf("==== Cache line switch test ===\n");
  77        printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
  78                        end_time-start_time);
  79        printf("Ticks per iteration = %"PRIu64"\n\n",
  80                        (end_time-start_time) >> ITER_POWER_CL);
  81}
  82
  83/*
  84 * returns the total count of the number of packets handled by the worker
  85 * functions given below.
  86 */
  87static unsigned
  88total_packet_count(void)
  89{
  90        unsigned i, count = 0;
  91        for (i = 0; i < worker_idx; i++)
  92                count += worker_stats[i].handled_packets;
  93        return count;
  94}
  95
  96/* resets the packet counts for a new test */
  97static void
  98clear_packet_count(void)
  99{
 100        memset(&worker_stats, 0, sizeof(worker_stats));
 101}
 102
 103/*
 104 * This is the basic worker function for performance tests.
 105 * it does nothing but return packets and count them.
 106 */
 107static int
 108handle_work(void *arg)
 109{
 110        struct rte_distributor *d = arg;
 111        unsigned int num = 0;
 112        int i;
 113        unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
 114        struct rte_mbuf *buf[8] __rte_cache_aligned;
 115
 116        for (i = 0; i < 8; i++)
 117                buf[i] = NULL;
 118
 119        num = rte_distributor_get_pkt(d, id, buf, buf, num);
 120        while (!quit) {
 121                worker_stats[id].handled_packets += num;
 122                num = rte_distributor_get_pkt(d, id, buf, buf, num);
 123        }
 124        worker_stats[id].handled_packets += num;
 125        rte_distributor_return_pkt(d, id, buf, num);
 126        return 0;
 127}
 128
 129/*
 130 * This basic performance test just repeatedly sends in 32 packets at a time
 131 * to the distributor and verifies at the end that we got them all in the worker
 132 * threads and finally how long per packet the processing took.
 133 */
 134static inline int
 135perf_test(struct rte_distributor *d, struct rte_mempool *p)
 136{
 137        unsigned int i;
 138        uint64_t start, end;
 139        struct rte_mbuf *bufs[BURST];
 140
 141        clear_packet_count();
 142        if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
 143                printf("Error getting mbufs from pool\n");
 144                return -1;
 145        }
 146        /* ensure we have different hash value for each pkt */
 147        for (i = 0; i < BURST; i++)
 148                bufs[i]->hash.usr = i;
 149
 150        start = rte_rdtsc();
 151        for (i = 0; i < (1<<ITER_POWER); i++)
 152                rte_distributor_process(d, bufs, BURST);
 153        end = rte_rdtsc();
 154
 155        do {
 156                usleep(100);
 157                rte_distributor_process(d, NULL, 0);
 158        } while (total_packet_count() < (BURST << ITER_POWER));
 159
 160        rte_distributor_clear_returns(d);
 161
 162        printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
 163        printf("Time per packet: %"PRIu64"\n\n",
 164                        ((end - start) >> ITER_POWER)/BURST);
 165        rte_mempool_put_bulk(p, (void *)bufs, BURST);
 166
 167        for (i = 0; i < rte_lcore_count() - 1; i++)
 168                printf("Worker %u handled %u packets\n", i,
 169                                worker_stats[i].handled_packets);
 170        printf("Total packets: %u (%x)\n", total_packet_count(),
 171                        total_packet_count());
 172        printf("=== Perf test done ===\n\n");
 173
 174        return 0;
 175}
 176
 177/* Useful function which ensures that all worker functions terminate */
 178static void
 179quit_workers(struct rte_distributor *d, struct rte_mempool *p)
 180{
 181        const unsigned int num_workers = rte_lcore_count() - 1;
 182        unsigned int i;
 183        struct rte_mbuf *bufs[RTE_MAX_LCORE];
 184
 185        rte_mempool_get_bulk(p, (void *)bufs, num_workers);
 186
 187        quit = 1;
 188        for (i = 0; i < num_workers; i++) {
 189                bufs[i]->hash.usr = i << 1;
 190                rte_distributor_process(d, &bufs[i], 1);
 191        }
 192
 193        rte_mempool_put_bulk(p, (void *)bufs, num_workers);
 194
 195        rte_distributor_process(d, NULL, 0);
 196        rte_distributor_flush(d);
 197        rte_eal_mp_wait_lcore();
 198        quit = 0;
 199        worker_idx = 0;
 200}
 201
 202static int
 203test_distributor_perf(void)
 204{
 205        static struct rte_distributor *ds;
 206        static struct rte_distributor *db;
 207        static struct rte_mempool *p;
 208
 209        if (rte_lcore_count() < 2) {
 210                printf("Not enough cores for distributor_perf_autotest, expecting at least 2\n");
 211                return TEST_SKIPPED;
 212        }
 213
 214        /* first time how long it takes to round-trip a cache line */
 215        time_cache_line_switch();
 216
 217        if (ds == NULL) {
 218                ds = rte_distributor_create("Test_perf", rte_socket_id(),
 219                                rte_lcore_count() - 1,
 220                                RTE_DIST_ALG_SINGLE);
 221                if (ds == NULL) {
 222                        printf("Error creating distributor\n");
 223                        return -1;
 224                }
 225        } else {
 226                rte_distributor_clear_returns(ds);
 227        }
 228
 229        if (db == NULL) {
 230                db = rte_distributor_create("Test_burst", rte_socket_id(),
 231                                rte_lcore_count() - 1,
 232                                RTE_DIST_ALG_BURST);
 233                if (db == NULL) {
 234                        printf("Error creating burst distributor\n");
 235                        return -1;
 236                }
 237        } else {
 238                rte_distributor_clear_returns(db);
 239        }
 240
 241        const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
 242                        (BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
 243        if (p == NULL) {
 244                p = rte_pktmbuf_pool_create("DPT_MBUF_POOL", nb_bufs, BURST,
 245                        0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
 246                if (p == NULL) {
 247                        printf("Error creating mempool\n");
 248                        return -1;
 249                }
 250        }
 251
 252        printf("=== Performance test of distributor (single mode) ===\n");
 253        rte_eal_mp_remote_launch(handle_work, ds, SKIP_MAIN);
 254        if (perf_test(ds, p) < 0)
 255                return -1;
 256        quit_workers(ds, p);
 257
 258        printf("=== Performance test of distributor (burst mode) ===\n");
 259        rte_eal_mp_remote_launch(handle_work, db, SKIP_MAIN);
 260        if (perf_test(db, p) < 0)
 261                return -1;
 262        quit_workers(db, p);
 263
 264        return 0;
 265}
 266
 267REGISTER_TEST_COMMAND(distributor_perf_autotest, test_distributor_perf);
 268