dpdk/app/test/test_lpm_perf.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2014 Intel Corporation
   3 * Copyright(c) 2020 Arm Limited
   4 */
   5
   6#include "test.h"
   7
   8#ifdef RTE_EXEC_ENV_WINDOWS
   9static int
  10test_lpm_perf(void)
  11{
  12        printf("lpm_perf not supported on Windows, skipping test\n");
  13        return TEST_SKIPPED;
  14}
  15
  16#else
  17#include <stdio.h>
  18#include <stdint.h>
  19#include <stdlib.h>
  20#include <math.h>
  21
  22#include <rte_cycles.h>
  23#include <rte_random.h>
  24#include <rte_branch_prediction.h>
  25#include <rte_malloc.h>
  26#include <rte_ip.h>
  27#include <rte_lpm.h>
  28
  29#include "test_xmmt_ops.h"
  30
  31struct rte_lpm *lpm;
  32static struct rte_rcu_qsbr *rv;
  33static volatile uint8_t writer_done;
  34static volatile uint32_t thr_id;
  35static uint64_t gwrite_cycles;
  36static uint32_t num_writers;
  37/* LPM APIs are not thread safe, use mutex to provide thread safety */
  38static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
  39
  40/* Report quiescent state interval every 1024 lookups. Larger critical
  41 * sections in reader will result in writer polling multiple times.
  42 */
  43#define QSBR_REPORTING_INTERVAL 1024
  44
  45#define TEST_LPM_ASSERT(cond) do {                                            \
  46        if (!(cond)) {                                                        \
  47                printf("Error at line %d: \n", __LINE__);                     \
  48                return -1;                                                    \
  49        }                                                                     \
  50} while(0)
  51
  52#define ITERATIONS (1 << 10)
  53#define RCU_ITERATIONS 10
  54#define BATCH_SIZE (1 << 12)
  55#define BULK_SIZE 32
  56
  57#define MAX_RULE_NUM (1200000)
  58
  59struct route_rule {
  60        uint32_t ip;
  61        uint8_t depth;
  62};
  63
  64static struct route_rule large_route_table[MAX_RULE_NUM];
  65/* Route table for routes with depth > 24 */
  66struct route_rule large_ldepth_route_table[MAX_RULE_NUM];
  67
  68static uint32_t num_route_entries;
  69static uint32_t num_ldepth_route_entries;
  70#define NUM_ROUTE_ENTRIES num_route_entries
  71#define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
  72
  73#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
  74
  75enum {
  76        IP_CLASS_A,
  77        IP_CLASS_B,
  78        IP_CLASS_C
  79};
  80
  81/* struct route_rule_count defines the total number of rules in following a/b/c
  82 * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not
  83 * including the ones for private local network.
  84 */
  85struct route_rule_count {
  86        uint32_t a[RTE_LPM_MAX_DEPTH];
  87        uint32_t b[RTE_LPM_MAX_DEPTH];
  88        uint32_t c[RTE_LPM_MAX_DEPTH];
  89};
  90
  91/* All following numbers of each depth of each common IP class are just
  92 * got from previous large constant table in app/test/test_lpm_routes.h .
  93 * In order to match similar performance, they keep same depth and IP
  94 * address coverage as previous constant table. These numbers don't
  95 * include any private local IP address. As previous large const rule
  96 * table was just dumped from a real router, there are no any IP address
  97 * in class C or D.
  98 */
  99static struct route_rule_count rule_count = {
 100        .a = { /* IP class A in which the most significant bit is 0 */
 101                    0, /* depth =  1 */
 102                    0, /* depth =  2 */
 103                    1, /* depth =  3 */
 104                    0, /* depth =  4 */
 105                    2, /* depth =  5 */
 106                    1, /* depth =  6 */
 107                    3, /* depth =  7 */
 108                  185, /* depth =  8 */
 109                   26, /* depth =  9 */
 110                   16, /* depth = 10 */
 111                   39, /* depth = 11 */
 112                  144, /* depth = 12 */
 113                  233, /* depth = 13 */
 114                  528, /* depth = 14 */
 115                  866, /* depth = 15 */
 116                 3856, /* depth = 16 */
 117                 3268, /* depth = 17 */
 118                 5662, /* depth = 18 */
 119                17301, /* depth = 19 */
 120                22226, /* depth = 20 */
 121                11147, /* depth = 21 */
 122                16746, /* depth = 22 */
 123                17120, /* depth = 23 */
 124                77578, /* depth = 24 */
 125                  401, /* depth = 25 */
 126                  656, /* depth = 26 */
 127                 1107, /* depth = 27 */
 128                 1121, /* depth = 28 */
 129                 2316, /* depth = 29 */
 130                  717, /* depth = 30 */
 131                   10, /* depth = 31 */
 132                   66  /* depth = 32 */
 133        },
 134        .b = { /* IP class A in which the most 2 significant bits are 10 */
 135                    0, /* depth =  1 */
 136                    0, /* depth =  2 */
 137                    0, /* depth =  3 */
 138                    0, /* depth =  4 */
 139                    1, /* depth =  5 */
 140                    1, /* depth =  6 */
 141                    1, /* depth =  7 */
 142                    3, /* depth =  8 */
 143                    3, /* depth =  9 */
 144                   30, /* depth = 10 */
 145                   25, /* depth = 11 */
 146                  168, /* depth = 12 */
 147                  305, /* depth = 13 */
 148                  569, /* depth = 14 */
 149                 1129, /* depth = 15 */
 150                50800, /* depth = 16 */
 151                 1645, /* depth = 17 */
 152                 1820, /* depth = 18 */
 153                 3506, /* depth = 19 */
 154                 3258, /* depth = 20 */
 155                 3424, /* depth = 21 */
 156                 4971, /* depth = 22 */
 157                 6885, /* depth = 23 */
 158                39771, /* depth = 24 */
 159                  424, /* depth = 25 */
 160                  170, /* depth = 26 */
 161                  433, /* depth = 27 */
 162                   92, /* depth = 28 */
 163                  366, /* depth = 29 */
 164                  377, /* depth = 30 */
 165                    2, /* depth = 31 */
 166                  200  /* depth = 32 */
 167        },
 168        .c = { /* IP class A in which the most 3 significant bits are 110 */
 169                     0, /* depth =  1 */
 170                     0, /* depth =  2 */
 171                     0, /* depth =  3 */
 172                     0, /* depth =  4 */
 173                     0, /* depth =  5 */
 174                     0, /* depth =  6 */
 175                     0, /* depth =  7 */
 176                    12, /* depth =  8 */
 177                     8, /* depth =  9 */
 178                     9, /* depth = 10 */
 179                    33, /* depth = 11 */
 180                    69, /* depth = 12 */
 181                   237, /* depth = 13 */
 182                  1007, /* depth = 14 */
 183                  1717, /* depth = 15 */
 184                 14663, /* depth = 16 */
 185                  8070, /* depth = 17 */
 186                 16185, /* depth = 18 */
 187                 48261, /* depth = 19 */
 188                 36870, /* depth = 20 */
 189                 33960, /* depth = 21 */
 190                 50638, /* depth = 22 */
 191                 61422, /* depth = 23 */
 192                466549, /* depth = 24 */
 193                  1829, /* depth = 25 */
 194                  4824, /* depth = 26 */
 195                  4927, /* depth = 27 */
 196                  5914, /* depth = 28 */
 197                 10254, /* depth = 29 */
 198                  4905, /* depth = 30 */
 199                     1, /* depth = 31 */
 200                   716  /* depth = 32 */
 201        }
 202};
 203
 204static void generate_random_rule_prefix(uint32_t ip_class, uint8_t depth)
 205{
 206/* IP address class A, the most significant bit is 0 */
 207#define IP_HEAD_MASK_A                  0x00000000
 208#define IP_HEAD_BIT_NUM_A               1
 209
 210/* IP address class B, the most significant 2 bits are 10 */
 211#define IP_HEAD_MASK_B                  0x80000000
 212#define IP_HEAD_BIT_NUM_B               2
 213
 214/* IP address class C, the most significant 3 bits are 110 */
 215#define IP_HEAD_MASK_C                  0xC0000000
 216#define IP_HEAD_BIT_NUM_C               3
 217
 218        uint32_t class_depth;
 219        uint32_t range;
 220        uint32_t mask;
 221        uint32_t step;
 222        uint32_t start;
 223        uint32_t fixed_bit_num;
 224        uint32_t ip_head_mask;
 225        uint32_t rule_num;
 226        uint32_t k;
 227        struct route_rule *ptr_rule, *ptr_ldepth_rule;
 228
 229        if (ip_class == IP_CLASS_A) {        /* IP Address class A */
 230                fixed_bit_num = IP_HEAD_BIT_NUM_A;
 231                ip_head_mask = IP_HEAD_MASK_A;
 232                rule_num = rule_count.a[depth - 1];
 233        } else if (ip_class == IP_CLASS_B) { /* IP Address class B */
 234                fixed_bit_num = IP_HEAD_BIT_NUM_B;
 235                ip_head_mask = IP_HEAD_MASK_B;
 236                rule_num = rule_count.b[depth - 1];
 237        } else {                             /* IP Address class C */
 238                fixed_bit_num = IP_HEAD_BIT_NUM_C;
 239                ip_head_mask = IP_HEAD_MASK_C;
 240                rule_num = rule_count.c[depth - 1];
 241        }
 242
 243        if (rule_num == 0)
 244                return;
 245
 246        /* the number of rest bits which don't include the most significant
 247         * fixed bits for this IP address class
 248         */
 249        class_depth = depth - fixed_bit_num;
 250
 251        /* range is the maximum number of rules for this depth and
 252         * this IP address class
 253         */
 254        range = 1 << class_depth;
 255
 256        /* only mask the most depth significant generated bits
 257         * except fixed bits for IP address class
 258         */
 259        mask = range - 1;
 260
 261        /* Widen coverage of IP address in generated rules */
 262        if (range <= rule_num)
 263                step = 1;
 264        else
 265                step = round((double)range / rule_num);
 266
 267        /* Only generate rest bits except the most significant
 268         * fixed bits for IP address class
 269         */
 270        start = lrand48() & mask;
 271        ptr_rule = &large_route_table[num_route_entries];
 272        ptr_ldepth_rule = &large_ldepth_route_table[num_ldepth_route_entries];
 273        for (k = 0; k < rule_num; k++) {
 274                ptr_rule->ip = (start << (RTE_LPM_MAX_DEPTH - depth))
 275                        | ip_head_mask;
 276                ptr_rule->depth = depth;
 277                /* If the depth of the route is more than 24, store it
 278                 * in another table as well.
 279                 */
 280                if (depth > 24) {
 281                        ptr_ldepth_rule->ip = ptr_rule->ip;
 282                        ptr_ldepth_rule->depth = ptr_rule->depth;
 283                        ptr_ldepth_rule++;
 284                        num_ldepth_route_entries++;
 285                }
 286                ptr_rule++;
 287                start = (start + step) & mask;
 288        }
 289        num_route_entries += rule_num;
 290}
 291
 292static void insert_rule_in_random_pos(uint32_t ip, uint8_t depth)
 293{
 294        uint32_t pos;
 295        int try_count = 0;
 296        struct route_rule tmp;
 297
 298        do {
 299                pos = lrand48();
 300                try_count++;
 301        } while ((try_count < 10) && (pos > num_route_entries));
 302
 303        if ((pos > num_route_entries) || (pos >= MAX_RULE_NUM))
 304                pos = num_route_entries >> 1;
 305
 306        tmp = large_route_table[pos];
 307        large_route_table[pos].ip = ip;
 308        large_route_table[pos].depth = depth;
 309        if (num_route_entries < MAX_RULE_NUM)
 310                large_route_table[num_route_entries++] = tmp;
 311}
 312
 313static void generate_large_route_rule_table(void)
 314{
 315        uint32_t ip_class;
 316        uint8_t  depth;
 317
 318        num_route_entries = 0;
 319        num_ldepth_route_entries = 0;
 320        memset(large_route_table, 0, sizeof(large_route_table));
 321
 322        for (ip_class = IP_CLASS_A; ip_class <= IP_CLASS_C; ip_class++) {
 323                for (depth = 1; depth <= RTE_LPM_MAX_DEPTH; depth++) {
 324                        generate_random_rule_prefix(ip_class, depth);
 325                }
 326        }
 327
 328        /* Add following rules to keep same as previous large constant table,
 329         * they are 4 rules with private local IP address and 1 all-zeros prefix
 330         * with depth = 8.
 331         */
 332        insert_rule_in_random_pos(RTE_IPV4(0, 0, 0, 0), 8);
 333        insert_rule_in_random_pos(RTE_IPV4(10, 2, 23, 147), 32);
 334        insert_rule_in_random_pos(RTE_IPV4(192, 168, 100, 10), 24);
 335        insert_rule_in_random_pos(RTE_IPV4(192, 168, 25, 100), 24);
 336        insert_rule_in_random_pos(RTE_IPV4(192, 168, 129, 124), 32);
 337}
 338
 339static void
 340print_route_distribution(const struct route_rule *table, uint32_t n)
 341{
 342        unsigned i, j;
 343
 344        printf("Route distribution per prefix width: \n");
 345        printf("DEPTH    QUANTITY (PERCENT)\n");
 346        printf("--------------------------- \n");
 347
 348        /* Count depths. */
 349        for (i = 1; i <= 32; i++) {
 350                unsigned depth_counter = 0;
 351                double percent_hits;
 352
 353                for (j = 0; j < n; j++)
 354                        if (table[j].depth == (uint8_t) i)
 355                                depth_counter++;
 356
 357                percent_hits = ((double)depth_counter)/((double)n) * 100;
 358                printf("%.2u%15u (%.2f)\n", i, depth_counter, percent_hits);
 359        }
 360        printf("\n");
 361}
 362
 363/* Check condition and return an error if true. */
 364static uint16_t enabled_core_ids[RTE_MAX_LCORE];
 365static unsigned int num_cores;
 366
 367/* Simple way to allocate thread ids in 0 to RTE_MAX_LCORE space */
 368static inline uint32_t
 369alloc_thread_id(void)
 370{
 371        uint32_t tmp_thr_id;
 372
 373        tmp_thr_id = __atomic_fetch_add(&thr_id, 1, __ATOMIC_RELAXED);
 374        if (tmp_thr_id >= RTE_MAX_LCORE)
 375                printf("Invalid thread id %u\n", tmp_thr_id);
 376
 377        return tmp_thr_id;
 378}
 379
 380/*
 381 * Reader thread using rte_lpm data structure without RCU.
 382 */
 383static int
 384test_lpm_reader(void *arg)
 385{
 386        int i;
 387        uint32_t ip_batch[QSBR_REPORTING_INTERVAL];
 388        uint32_t next_hop_return = 0;
 389
 390        RTE_SET_USED(arg);
 391        do {
 392                for (i = 0; i < QSBR_REPORTING_INTERVAL; i++)
 393                        ip_batch[i] = rte_rand();
 394
 395                for (i = 0; i < QSBR_REPORTING_INTERVAL; i++)
 396                        rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return);
 397
 398        } while (!writer_done);
 399
 400        return 0;
 401}
 402
 403/*
 404 * Reader thread using rte_lpm data structure with RCU.
 405 */
 406static int
 407test_lpm_rcu_qsbr_reader(void *arg)
 408{
 409        int i;
 410        uint32_t thread_id = alloc_thread_id();
 411        uint32_t ip_batch[QSBR_REPORTING_INTERVAL];
 412        uint32_t next_hop_return = 0;
 413
 414        RTE_SET_USED(arg);
 415        /* Register this thread to report quiescent state */
 416        rte_rcu_qsbr_thread_register(rv, thread_id);
 417        rte_rcu_qsbr_thread_online(rv, thread_id);
 418
 419        do {
 420                for (i = 0; i < QSBR_REPORTING_INTERVAL; i++)
 421                        ip_batch[i] = rte_rand();
 422
 423                for (i = 0; i < QSBR_REPORTING_INTERVAL; i++)
 424                        rte_lpm_lookup(lpm, ip_batch[i], &next_hop_return);
 425
 426                /* Update quiescent state */
 427                rte_rcu_qsbr_quiescent(rv, thread_id);
 428        } while (!writer_done);
 429
 430        rte_rcu_qsbr_thread_offline(rv, thread_id);
 431        rte_rcu_qsbr_thread_unregister(rv, thread_id);
 432
 433        return 0;
 434}
 435
 436/*
 437 * Writer thread using rte_lpm data structure with RCU.
 438 */
 439static int
 440test_lpm_rcu_qsbr_writer(void *arg)
 441{
 442        unsigned int i, j, si, ei;
 443        uint64_t begin, total_cycles;
 444        uint32_t next_hop_add = 0xAA;
 445        uint8_t pos_core = (uint8_t)((uintptr_t)arg);
 446
 447        si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
 448        ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
 449
 450        /* Measure add/delete. */
 451        begin = rte_rdtsc_precise();
 452        for (i = 0; i < RCU_ITERATIONS; i++) {
 453                /* Add all the entries */
 454                for (j = si; j < ei; j++) {
 455                        if (num_writers > 1)
 456                                pthread_mutex_lock(&lpm_mutex);
 457                        if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 458                                        large_ldepth_route_table[j].depth,
 459                                        next_hop_add) != 0) {
 460                                printf("Failed to add iteration %d, route# %d\n",
 461                                        i, j);
 462                                goto error;
 463                        }
 464                        if (num_writers > 1)
 465                                pthread_mutex_unlock(&lpm_mutex);
 466                }
 467
 468                /* Delete all the entries */
 469                for (j = si; j < ei; j++) {
 470                        if (num_writers > 1)
 471                                pthread_mutex_lock(&lpm_mutex);
 472                        if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 473                                large_ldepth_route_table[j].depth) != 0) {
 474                                printf("Failed to delete iteration %d, route# %d\n",
 475                                        i, j);
 476                                goto error;
 477                        }
 478                        if (num_writers > 1)
 479                                pthread_mutex_unlock(&lpm_mutex);
 480                }
 481        }
 482
 483        total_cycles = rte_rdtsc_precise() - begin;
 484
 485        __atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 486
 487        return 0;
 488
 489error:
 490        if (num_writers > 1)
 491                pthread_mutex_unlock(&lpm_mutex);
 492        return -1;
 493}
 494
 495/*
 496 * Functional test:
 497 * 1/2 writers, rest are readers
 498 */
 499static int
 500test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 501{
 502        struct rte_lpm_config config;
 503        size_t sz;
 504        unsigned int i, j;
 505        uint16_t core_id;
 506        struct rte_lpm_rcu_config rcu_cfg = {0};
 507        int (*reader_f)(void *arg) = NULL;
 508
 509        if (rte_lcore_count() < 3) {
 510                printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
 511                return TEST_SKIPPED;
 512        }
 513
 514        num_cores = 0;
 515        RTE_LCORE_FOREACH_WORKER(core_id) {
 516                enabled_core_ids[num_cores] = core_id;
 517                num_cores++;
 518        }
 519
 520        for (j = 1; j < 3; j++) {
 521                if (use_rcu)
 522                        printf("\nPerf test: %d writer(s), %d reader(s),"
 523                               " RCU integration enabled\n", j, num_cores - j);
 524                else
 525                        printf("\nPerf test: %d writer(s), %d reader(s),"
 526                               " RCU integration disabled\n", j, num_cores - j);
 527
 528                num_writers = j;
 529
 530                /* Create LPM table */
 531                config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
 532                config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
 533                config.flags = 0;
 534                lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
 535                TEST_LPM_ASSERT(lpm != NULL);
 536
 537                /* Init RCU variable */
 538                if (use_rcu) {
 539                        sz = rte_rcu_qsbr_get_memsize(num_cores);
 540                        rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
 541                                                        RTE_CACHE_LINE_SIZE);
 542                        rte_rcu_qsbr_init(rv, num_cores);
 543
 544                        rcu_cfg.v = rv;
 545                        /* Assign the RCU variable to LPM */
 546                        if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
 547                                printf("RCU variable assignment failed\n");
 548                                goto error;
 549                        }
 550
 551                        reader_f = test_lpm_rcu_qsbr_reader;
 552                } else
 553                        reader_f = test_lpm_reader;
 554
 555                writer_done = 0;
 556                __atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 557
 558                __atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 559
 560                /* Launch reader threads */
 561                for (i = j; i < num_cores; i++)
 562                        rte_eal_remote_launch(reader_f, NULL,
 563                                                enabled_core_ids[i]);
 564
 565                /* Launch writer threads */
 566                for (i = 0; i < j; i++)
 567                        rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
 568                                                (void *)(uintptr_t)i,
 569                                                enabled_core_ids[i]);
 570
 571                /* Wait for writer threads */
 572                for (i = 0; i < j; i++)
 573                        if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 574                                goto error;
 575
 576                printf("Total LPM Adds: %d\n", TOTAL_WRITES);
 577                printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 578                printf("Average LPM Add/Del: %"PRIu64" cycles\n",
 579                        __atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 580                        / TOTAL_WRITES);
 581
 582                writer_done = 1;
 583                /* Wait until all readers have exited */
 584                for (i = j; i < num_cores; i++)
 585                        rte_eal_wait_lcore(enabled_core_ids[i]);
 586
 587                rte_lpm_free(lpm);
 588                rte_free(rv);
 589                lpm = NULL;
 590                rv = NULL;
 591        }
 592
 593        return 0;
 594
 595error:
 596        writer_done = 1;
 597        /* Wait until all readers have exited */
 598        rte_eal_mp_wait_lcore();
 599
 600        rte_lpm_free(lpm);
 601        rte_free(rv);
 602
 603        return -1;
 604}
 605
 606static int
 607test_lpm_perf(void)
 608{
 609        struct rte_lpm_config config;
 610
 611        config.max_rules = 2000000;
 612        config.number_tbl8s = 2048;
 613        config.flags = 0;
 614        uint64_t begin, total_time, lpm_used_entries = 0;
 615        unsigned i, j;
 616        uint32_t next_hop_add = 0xAA, next_hop_return = 0;
 617        int status = 0;
 618        uint64_t cache_line_counter = 0;
 619        int64_t count = 0;
 620
 621        rte_srand(rte_rdtsc());
 622
 623        generate_large_route_rule_table();
 624
 625        printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES);
 626
 627        print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES);
 628
 629        lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
 630        TEST_LPM_ASSERT(lpm != NULL);
 631
 632        /* Measure add. */
 633        begin = rte_rdtsc();
 634
 635        for (i = 0; i < NUM_ROUTE_ENTRIES; i++) {
 636                if (rte_lpm_add(lpm, large_route_table[i].ip,
 637                                large_route_table[i].depth, next_hop_add) == 0)
 638                        status++;
 639        }
 640        /* End Timer. */
 641        total_time = rte_rdtsc() - begin;
 642
 643        printf("Unique added entries = %d\n", status);
 644        /* Obtain add statistics. */
 645        for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) {
 646                if (lpm->tbl24[i].valid)
 647                        lpm_used_entries++;
 648
 649                if (i % 32 == 0) {
 650                        if ((uint64_t)count < lpm_used_entries) {
 651                                cache_line_counter++;
 652                                count = lpm_used_entries;
 653                        }
 654                }
 655        }
 656
 657        printf("Used table 24 entries = %u (%g%%)\n",
 658                        (unsigned) lpm_used_entries,
 659                        (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES);
 660        printf("64 byte Cache entries used = %u (%u bytes)\n",
 661                        (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64);
 662
 663        printf("Average LPM Add: %g cycles\n",
 664                        (double)total_time / NUM_ROUTE_ENTRIES);
 665
 666        /* Measure single Lookup */
 667        total_time = 0;
 668        count = 0;
 669
 670        for (i = 0; i < ITERATIONS; i++) {
 671                static uint32_t ip_batch[BATCH_SIZE];
 672
 673                for (j = 0; j < BATCH_SIZE; j++)
 674                        ip_batch[j] = rte_rand();
 675
 676                /* Lookup per batch */
 677                begin = rte_rdtsc();
 678
 679                for (j = 0; j < BATCH_SIZE; j++) {
 680                        if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0)
 681                                count++;
 682                }
 683
 684                total_time += rte_rdtsc() - begin;
 685
 686        }
 687        printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n",
 688                        (double)total_time / ((double)ITERATIONS * BATCH_SIZE),
 689                        (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE));
 690
 691        /* Measure bulk Lookup */
 692        total_time = 0;
 693        count = 0;
 694        for (i = 0; i < ITERATIONS; i++) {
 695                static uint32_t ip_batch[BATCH_SIZE];
 696                uint32_t next_hops[BULK_SIZE];
 697
 698                /* Create array of random IP addresses */
 699                for (j = 0; j < BATCH_SIZE; j++)
 700                        ip_batch[j] = rte_rand();
 701
 702                /* Lookup per batch */
 703                begin = rte_rdtsc();
 704                for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) {
 705                        unsigned k;
 706                        rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE);
 707                        for (k = 0; k < BULK_SIZE; k++)
 708                                if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS)))
 709                                        count++;
 710                }
 711
 712                total_time += rte_rdtsc() - begin;
 713        }
 714        printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n",
 715                        (double)total_time / ((double)ITERATIONS * BATCH_SIZE),
 716                        (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE));
 717
 718        /* Measure LookupX4 */
 719        total_time = 0;
 720        count = 0;
 721        for (i = 0; i < ITERATIONS; i++) {
 722                static uint32_t ip_batch[BATCH_SIZE];
 723                uint32_t next_hops[4];
 724
 725                /* Create array of random IP addresses */
 726                for (j = 0; j < BATCH_SIZE; j++)
 727                        ip_batch[j] = rte_rand();
 728
 729                /* Lookup per batch */
 730                begin = rte_rdtsc();
 731                for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) {
 732                        unsigned k;
 733                        xmm_t ipx4;
 734
 735                        ipx4 = vect_loadu_sil128((xmm_t *)(ip_batch + j));
 736                        ipx4 = *(xmm_t *)(ip_batch + j);
 737                        rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX);
 738                        for (k = 0; k < RTE_DIM(next_hops); k++)
 739                                if (unlikely(next_hops[k] == UINT32_MAX))
 740                                        count++;
 741                }
 742
 743                total_time += rte_rdtsc() - begin;
 744        }
 745        printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n",
 746                        (double)total_time / ((double)ITERATIONS * BATCH_SIZE),
 747                        (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE));
 748
 749        /* Measure Delete */
 750        status = 0;
 751        begin = rte_rdtsc();
 752
 753        for (i = 0; i < NUM_ROUTE_ENTRIES; i++) {
 754                /* rte_lpm_delete(lpm, ip, depth) */
 755                status += rte_lpm_delete(lpm, large_route_table[i].ip,
 756                                large_route_table[i].depth);
 757        }
 758
 759        total_time = rte_rdtsc() - begin;
 760
 761        printf("Average LPM Delete: %g cycles\n",
 762                        (double)total_time / NUM_ROUTE_ENTRIES);
 763
 764        rte_lpm_delete_all(lpm);
 765        rte_lpm_free(lpm);
 766
 767        if (test_lpm_rcu_perf_multi_writer(0) < 0)
 768                return -1;
 769
 770        if (test_lpm_rcu_perf_multi_writer(1) < 0)
 771                return -1;
 772
 773        return 0;
 774}
 775
 776#endif /* !RTE_EXEC_ENV_WINDOWS */
 777
 778REGISTER_TEST_COMMAND(lpm_perf_autotest, test_lpm_perf);
 779