dpdk/app/test-compress-perf/comp_perf_test_cyclecount.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2019 Intel Corporation
   3 */
   4
   5#include <rte_malloc.h>
   6#include <rte_eal.h>
   7#include <rte_log.h>
   8#include <rte_cycles.h>
   9#include "rte_spinlock.h"
  10#include <rte_compressdev.h>
  11
  12#include "comp_perf_test_cyclecount.h"
  13
  14struct cperf_cyclecount_ctx {
  15        struct cperf_verify_ctx ver;
  16
  17        uint32_t ops_enq_retries;
  18        uint32_t ops_deq_retries;
  19
  20        uint64_t duration_op;
  21        uint64_t duration_enq;
  22        uint64_t duration_deq;
  23};
  24
  25void
  26cperf_cyclecount_test_destructor(void *arg)
  27{
  28        struct cperf_cyclecount_ctx *ctx = arg;
  29
  30        if (arg) {
  31                comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
  32                rte_free(arg);
  33        }
  34}
  35
  36void *
  37cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
  38                struct comp_test_data *options)
  39{
  40        struct cperf_cyclecount_ctx *ctx = NULL;
  41
  42        ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
  43
  44        if (ctx == NULL)
  45                return NULL;
  46
  47        ctx->ver.mem.dev_id = dev_id;
  48        ctx->ver.mem.qp_id = qp_id;
  49        ctx->ver.options = options;
  50        ctx->ver.silent = 1; /* ver. part will be silent */
  51
  52        if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
  53                        && !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
  54                return ctx;
  55
  56        cperf_cyclecount_test_destructor(ctx);
  57        return NULL;
  58}
  59
  60static int
  61cperf_cyclecount_op_setup(struct rte_comp_op **ops,
  62                                 struct cperf_cyclecount_ctx *ctx,
  63                                 struct rte_mbuf **input_bufs,
  64                                 struct rte_mbuf **output_bufs,
  65                                 void *priv_xform,
  66                                 uint32_t out_seg_sz)
  67{
  68        struct comp_test_data *test_data = ctx->ver.options;
  69        struct cperf_mem_resources *mem = &ctx->ver.mem;
  70
  71        uint32_t i, iter, num_iter;
  72        int res = 0;
  73        uint16_t ops_needed;
  74
  75        num_iter = test_data->num_iter;
  76
  77        for (iter = 0; iter < num_iter; iter++) {
  78                uint32_t remaining_ops = mem->total_bufs;
  79                uint32_t total_deq_ops = 0;
  80                uint32_t total_enq_ops = 0;
  81                uint16_t num_enq = 0;
  82                uint16_t num_deq = 0;
  83
  84                while (remaining_ops > 0) {
  85                        uint16_t num_ops = RTE_MIN(remaining_ops,
  86                                                   test_data->burst_sz);
  87                        ops_needed = num_ops;
  88
  89                        /* Allocate compression operations */
  90                        if (ops_needed && rte_mempool_get_bulk(
  91                                                mem->op_pool,
  92                                                (void **)ops,
  93                                                ops_needed) != 0) {
  94                                RTE_LOG(ERR, USER1,
  95                                      "Cyclecount: could not allocate enough operations\n");
  96                                res = -1;
  97                                goto end;
  98                        }
  99
 100                        for (i = 0; i < ops_needed; i++) {
 101
 102                                /* Calculate next buffer to attach */
 103                                /* to operation */
 104                                uint32_t buf_id = total_enq_ops + i;
 105                                uint16_t op_id = i;
 106
 107                                /* Reset all data in output buffers */
 108                                struct rte_mbuf *m = output_bufs[buf_id];
 109
 110                                m->pkt_len = out_seg_sz * m->nb_segs;
 111                                while (m) {
 112                                        m->data_len = m->buf_len - m->data_off;
 113                                        m = m->next;
 114                                }
 115                                ops[op_id]->m_src = input_bufs[buf_id];
 116                                ops[op_id]->m_dst = output_bufs[buf_id];
 117                                ops[op_id]->src.offset = 0;
 118                                ops[op_id]->src.length =
 119                                        rte_pktmbuf_pkt_len(input_bufs[buf_id]);
 120                                ops[op_id]->dst.offset = 0;
 121                                ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
 122                                ops[op_id]->input_chksum = buf_id;
 123                                ops[op_id]->private_xform = priv_xform;
 124                        }
 125
 126                        /* E N Q U E U I N G */
 127                        /* assuming that all ops are enqueued */
 128                        /* instead of the real enqueue operation */
 129                        num_enq = num_ops;
 130
 131                        remaining_ops -= num_enq;
 132                        total_enq_ops += num_enq;
 133
 134                        /* D E Q U E U I N G */
 135                        /* assuming that all ops dequeued */
 136                        /* instead of the real dequeue operation */
 137                        num_deq = num_ops;
 138
 139                        total_deq_ops += num_deq;
 140                        rte_mempool_put_bulk(mem->op_pool,
 141                                             (void **)ops, num_deq);
 142                }
 143        }
 144        return res;
 145end:
 146        rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
 147        rte_free(ops);
 148
 149        return res;
 150}
 151
 152static int
 153main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
 154{
 155        struct comp_test_data *test_data = ctx->ver.options;
 156        struct cperf_mem_resources *mem = &ctx->ver.mem;
 157        uint8_t dev_id = mem->dev_id;
 158        uint32_t i, iter, num_iter;
 159        struct rte_comp_op **ops, **deq_ops;
 160        void *priv_xform = NULL;
 161        struct rte_comp_xform xform;
 162        struct rte_mbuf **input_bufs, **output_bufs;
 163        int ret, res = 0;
 164        int allocated = 0;
 165        uint32_t out_seg_sz;
 166
 167        uint64_t tsc_start, tsc_end, tsc_duration;
 168
 169        if (test_data == NULL || !test_data->burst_sz) {
 170                RTE_LOG(ERR, USER1, "Unknown burst size\n");
 171                return -1;
 172        }
 173        ctx->duration_enq = 0;
 174        ctx->duration_deq = 0;
 175        ctx->ops_enq_retries = 0;
 176        ctx->ops_deq_retries = 0;
 177
 178        /* one array for both enqueue and dequeue */
 179        ops = rte_zmalloc_socket(NULL,
 180                2 * mem->total_bufs * sizeof(struct rte_comp_op *),
 181                0, rte_socket_id());
 182
 183        if (ops == NULL) {
 184                RTE_LOG(ERR, USER1,
 185                        "Can't allocate memory for ops strucures\n");
 186                return -1;
 187        }
 188
 189        deq_ops = &ops[mem->total_bufs];
 190
 191        if (type == RTE_COMP_COMPRESS) {
 192                xform = (struct rte_comp_xform) {
 193                        .type = RTE_COMP_COMPRESS,
 194                        .compress = {
 195                                .algo = RTE_COMP_ALGO_DEFLATE,
 196                                .deflate.huffman = test_data->huffman_enc,
 197                                .level = test_data->level,
 198                                .window_size = test_data->window_sz,
 199                                .chksum = RTE_COMP_CHECKSUM_NONE,
 200                                .hash_algo = RTE_COMP_HASH_ALGO_NONE
 201                        }
 202                };
 203                input_bufs = mem->decomp_bufs;
 204                output_bufs = mem->comp_bufs;
 205                out_seg_sz = test_data->out_seg_sz;
 206        } else {
 207                xform = (struct rte_comp_xform) {
 208                        .type = RTE_COMP_DECOMPRESS,
 209                        .decompress = {
 210                                .algo = RTE_COMP_ALGO_DEFLATE,
 211                                .chksum = RTE_COMP_CHECKSUM_NONE,
 212                                .window_size = test_data->window_sz,
 213                                .hash_algo = RTE_COMP_HASH_ALGO_NONE
 214                        }
 215                };
 216                input_bufs = mem->comp_bufs;
 217                output_bufs = mem->decomp_bufs;
 218                out_seg_sz = test_data->seg_sz;
 219        }
 220
 221        /* Create private xform */
 222        if (rte_compressdev_private_xform_create(dev_id, &xform,
 223                                                &priv_xform) < 0) {
 224                RTE_LOG(ERR, USER1, "Private xform could not be created\n");
 225                res = -1;
 226                goto end;
 227        }
 228
 229        tsc_start = rte_rdtsc_precise();
 230        ret = cperf_cyclecount_op_setup(ops,
 231                                ctx,
 232                                input_bufs,
 233                                output_bufs,
 234                                priv_xform,
 235                                out_seg_sz);
 236
 237        tsc_end = rte_rdtsc_precise();
 238
 239        /* ret value check postponed a bit to cancel extra 'if' bias */
 240        if (ret < 0) {
 241                RTE_LOG(ERR, USER1, "Setup function failed\n");
 242                res = -1;
 243                goto end;
 244        }
 245
 246        tsc_duration = tsc_end - tsc_start;
 247        ctx->duration_op = tsc_duration;
 248
 249        num_iter = test_data->num_iter;
 250        for (iter = 0; iter < num_iter; iter++) {
 251                uint32_t total_ops = mem->total_bufs;
 252                uint32_t remaining_ops = mem->total_bufs;
 253                uint32_t total_deq_ops = 0;
 254                uint32_t total_enq_ops = 0;
 255                uint16_t ops_unused = 0;
 256                uint16_t num_enq = 0;
 257                uint16_t num_deq = 0;
 258
 259                while (remaining_ops > 0) {
 260                        uint16_t num_ops = RTE_MIN(remaining_ops,
 261                                                   test_data->burst_sz);
 262                        uint16_t ops_needed = num_ops - ops_unused;
 263
 264                        /*
 265                         * Move the unused operations from the previous
 266                         * enqueue_burst call to the front, to maintain order
 267                         */
 268                        if ((ops_unused > 0) && (num_enq > 0)) {
 269                                size_t nb_b_to_mov =
 270                                      ops_unused * sizeof(struct rte_comp_op *);
 271
 272                                memmove(ops, &ops[num_enq], nb_b_to_mov);
 273                        }
 274
 275                        /* Allocate compression operations */
 276                        if (ops_needed && rte_mempool_get_bulk(
 277                                                mem->op_pool,
 278                                                (void **)ops,
 279                                                ops_needed) != 0) {
 280                                RTE_LOG(ERR, USER1,
 281                                      "Could not allocate enough operations\n");
 282                                res = -1;
 283                                goto end;
 284                        }
 285                        allocated += ops_needed;
 286
 287                        for (i = 0; i < ops_needed; i++) {
 288                                /*
 289                                 * Calculate next buffer to attach to operation
 290                                 */
 291                                uint32_t buf_id = total_enq_ops + i +
 292                                                ops_unused;
 293                                uint16_t op_id = ops_unused + i;
 294                                /* Reset all data in output buffers */
 295                                struct rte_mbuf *m = output_bufs[buf_id];
 296
 297                                m->pkt_len = out_seg_sz * m->nb_segs;
 298                                while (m) {
 299                                        m->data_len = m->buf_len - m->data_off;
 300                                        m = m->next;
 301                                }
 302                                ops[op_id]->m_src = input_bufs[buf_id];
 303                                ops[op_id]->m_dst = output_bufs[buf_id];
 304                                ops[op_id]->src.offset = 0;
 305                                ops[op_id]->src.length =
 306                                        rte_pktmbuf_pkt_len(input_bufs[buf_id]);
 307                                ops[op_id]->dst.offset = 0;
 308                                ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
 309                                ops[op_id]->input_chksum = buf_id;
 310                                ops[op_id]->private_xform = priv_xform;
 311                        }
 312
 313                        if (unlikely(test_data->perf_comp_force_stop))
 314                                goto end;
 315
 316                        tsc_start = rte_rdtsc_precise();
 317                        num_enq = rte_compressdev_enqueue_burst(dev_id,
 318                                                                mem->qp_id, ops,
 319                                                                num_ops);
 320                        tsc_end = rte_rdtsc_precise();
 321                        tsc_duration = tsc_end - tsc_start;
 322                        ctx->duration_enq += tsc_duration;
 323
 324                        if (num_enq < num_ops)
 325                                ctx->ops_enq_retries++;
 326
 327                        if (test_data->cyclecount_delay)
 328                                rte_delay_us_block(test_data->cyclecount_delay);
 329
 330                        if (num_enq == 0) {
 331                                struct rte_compressdev_stats stats;
 332
 333                                rte_compressdev_stats_get(dev_id, &stats);
 334                                if (stats.enqueue_err_count) {
 335                                        res = -1;
 336                                        goto end;
 337                                }
 338                        }
 339
 340                        ops_unused = num_ops - num_enq;
 341                        remaining_ops -= num_enq;
 342                        total_enq_ops += num_enq;
 343
 344                        tsc_start = rte_rdtsc_precise();
 345                        num_deq = rte_compressdev_dequeue_burst(dev_id,
 346                                                           mem->qp_id,
 347                                                           deq_ops,
 348                                                           allocated);
 349                        tsc_end = rte_rdtsc_precise();
 350                        tsc_duration = tsc_end - tsc_start;
 351                        ctx->duration_deq += tsc_duration;
 352
 353                        if (num_deq < allocated)
 354                                ctx->ops_deq_retries++;
 355
 356                        total_deq_ops += num_deq;
 357
 358                        if (iter == num_iter - 1) {
 359                                for (i = 0; i < num_deq; i++) {
 360                                        struct rte_comp_op *op = deq_ops[i];
 361
 362                                        if (op->status !=
 363                                                RTE_COMP_OP_STATUS_SUCCESS) {
 364                                                RTE_LOG(ERR, USER1, "Some operations were not successful\n");
 365                                                goto end;
 366                                        }
 367
 368                                        struct rte_mbuf *m = op->m_dst;
 369
 370                                        m->pkt_len = op->produced;
 371                                        uint32_t remaining_data = op->produced;
 372                                        uint16_t data_to_append;
 373
 374                                        while (remaining_data > 0) {
 375                                                data_to_append =
 376                                                        RTE_MIN(remaining_data,
 377                                                             out_seg_sz);
 378                                                m->data_len = data_to_append;
 379                                                remaining_data -=
 380                                                                data_to_append;
 381                                                m = m->next;
 382                                        }
 383                                }
 384                        }
 385                        rte_mempool_put_bulk(mem->op_pool,
 386                                             (void **)deq_ops, num_deq);
 387                        allocated -= num_deq;
 388                }
 389
 390                /* Dequeue the last operations */
 391                while (total_deq_ops < total_ops) {
 392                        if (unlikely(test_data->perf_comp_force_stop))
 393                                goto end;
 394
 395                        tsc_start = rte_rdtsc_precise();
 396                        num_deq = rte_compressdev_dequeue_burst(dev_id,
 397                                                mem->qp_id,
 398                                                deq_ops,
 399                                                test_data->burst_sz);
 400                        tsc_end = rte_rdtsc_precise();
 401                        tsc_duration = tsc_end - tsc_start;
 402                        ctx->duration_deq += tsc_duration;
 403                        ctx->ops_deq_retries++;
 404
 405                        if (num_deq == 0) {
 406                                struct rte_compressdev_stats stats;
 407
 408                                rte_compressdev_stats_get(dev_id, &stats);
 409                                if (stats.dequeue_err_count) {
 410                                        res = -1;
 411                                        goto end;
 412                                }
 413                        }
 414                        total_deq_ops += num_deq;
 415
 416                        if (iter == num_iter - 1) {
 417                                for (i = 0; i < num_deq; i++) {
 418                                        struct rte_comp_op *op = deq_ops[i];
 419
 420                                        if (op->status !=
 421                                                RTE_COMP_OP_STATUS_SUCCESS) {
 422                                                RTE_LOG(ERR, USER1, "Some operations were not successful\n");
 423                                                goto end;
 424                                        }
 425
 426                                        struct rte_mbuf *m = op->m_dst;
 427
 428                                        m->pkt_len = op->produced;
 429                                        uint32_t remaining_data = op->produced;
 430                                        uint16_t data_to_append;
 431
 432                                        while (remaining_data > 0) {
 433                                                data_to_append =
 434                                                RTE_MIN(remaining_data,
 435                                                        out_seg_sz);
 436                                                m->data_len = data_to_append;
 437                                                remaining_data -=
 438                                                                data_to_append;
 439                                                m = m->next;
 440                                        }
 441                                }
 442                        }
 443                        rte_mempool_put_bulk(mem->op_pool,
 444                                             (void **)deq_ops, num_deq);
 445                        allocated -= num_deq;
 446                }
 447        }
 448        allocated = 0;
 449
 450end:
 451        if (allocated)
 452                rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
 453        rte_compressdev_private_xform_free(dev_id, priv_xform);
 454        rte_free(ops);
 455
 456        if (test_data->perf_comp_force_stop) {
 457                RTE_LOG(ERR, USER1,
 458                      "lcore: %d Perf. test has been aborted by user\n",
 459                        mem->lcore_id);
 460                res = -1;
 461        }
 462        return res;
 463}
 464
 465int
 466cperf_cyclecount_test_runner(void *test_ctx)
 467{
 468        struct cperf_cyclecount_ctx *ctx = test_ctx;
 469        struct comp_test_data *test_data = ctx->ver.options;
 470        uint32_t lcore = rte_lcore_id();
 471        static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
 472        static rte_spinlock_t print_spinlock;
 473        int i;
 474
 475        uint32_t ops_enq_retries_comp;
 476        uint32_t ops_deq_retries_comp;
 477
 478        uint32_t ops_enq_retries_decomp;
 479        uint32_t ops_deq_retries_decomp;
 480
 481        uint32_t duration_setup_per_op;
 482
 483        uint32_t duration_enq_per_op_comp;
 484        uint32_t duration_deq_per_op_comp;
 485
 486        uint32_t duration_enq_per_op_decomp;
 487        uint32_t duration_deq_per_op_decomp;
 488
 489        ctx->ver.mem.lcore_id = lcore;
 490
 491        /*
 492         * printing information about current compression thread
 493         */
 494        if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
 495                printf("    lcore: %u,"
 496                                " driver name: %s,"
 497                                " device name: %s,"
 498                                " device id: %u,"
 499                                " socket id: %u,"
 500                                " queue pair id: %u\n",
 501                        lcore,
 502                        ctx->ver.options->driver_name,
 503                        rte_compressdev_name_get(ctx->ver.mem.dev_id),
 504                        ctx->ver.mem.dev_id,
 505                        rte_compressdev_socket_id(ctx->ver.mem.dev_id),
 506                        ctx->ver.mem.qp_id);
 507
 508        /*
 509         * First the verification part is needed
 510         */
 511        if (cperf_verify_test_runner(&ctx->ver))
 512                return EXIT_FAILURE;
 513
 514        /*
 515         * Run the tests twice, discarding the first performance
 516         * results, before the cache is warmed up
 517         */
 518
 519        /* C O M P R E S S */
 520        for (i = 0; i < 2; i++) {
 521                if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
 522                        return EXIT_FAILURE;
 523        }
 524
 525        ops_enq_retries_comp = ctx->ops_enq_retries;
 526        ops_deq_retries_comp = ctx->ops_deq_retries;
 527
 528        duration_enq_per_op_comp = ctx->duration_enq /
 529                        (ctx->ver.mem.total_bufs * test_data->num_iter);
 530        duration_deq_per_op_comp = ctx->duration_deq /
 531                        (ctx->ver.mem.total_bufs * test_data->num_iter);
 532
 533        /* D E C O M P R E S S */
 534        for (i = 0; i < 2; i++) {
 535                if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
 536                        return EXIT_FAILURE;
 537        }
 538
 539        ops_enq_retries_decomp = ctx->ops_enq_retries;
 540        ops_deq_retries_decomp = ctx->ops_deq_retries;
 541
 542        duration_enq_per_op_decomp = ctx->duration_enq /
 543                        (ctx->ver.mem.total_bufs * test_data->num_iter);
 544        duration_deq_per_op_decomp = ctx->duration_deq /
 545                        (ctx->ver.mem.total_bufs * test_data->num_iter);
 546
 547        duration_setup_per_op = ctx->duration_op /
 548                        (ctx->ver.mem.total_bufs * test_data->num_iter);
 549
 550        /* R E P O R T processing */
 551        if (rte_atomic16_test_and_set(&display_once)) {
 552
 553                rte_spinlock_lock(&print_spinlock);
 554
 555                printf("\nLegend for the table\n"
 556                "  - Retries section: number of retries for the following operations:\n"
 557                "    [C-e] - compression enqueue\n"
 558                "    [C-d] - compression dequeue\n"
 559                "    [D-e] - decompression enqueue\n"
 560                "    [D-d] - decompression dequeue\n"
 561                "  - Cycles section: number of cycles per 'op' for the following operations:\n"
 562                "    setup/op - memory allocation, op configuration and memory dealocation\n"
 563                "    [C-e] - compression enqueue\n"
 564                "    [C-d] - compression dequeue\n"
 565                "    [D-e] - decompression enqueue\n"
 566                "    [D-d] - decompression dequeue\n\n");
 567
 568                printf("\n%12s%6s%12s%17s",
 569                        "lcore id", "Level", "Comp size", "Comp ratio [%]");
 570
 571                printf("  |%10s %6s %8s %6s %8s",
 572                        " Retries:",
 573                        "[C-e]", "[C-d]",
 574                        "[D-e]", "[D-d]");
 575
 576                printf("  |%9s %9s %9s %9s %9s %9s\n",
 577                        " Cycles:",
 578                        "setup/op",
 579                        "[C-e]", "[C-d]",
 580                        "[D-e]", "[D-d]");
 581
 582                rte_spinlock_unlock(&print_spinlock);
 583        }
 584
 585        rte_spinlock_lock(&print_spinlock);
 586
 587        printf("%12u"
 588               "%6u"
 589               "%12zu"
 590               "%17.2f",
 591                ctx->ver.mem.lcore_id,
 592                test_data->level,
 593                ctx->ver.comp_data_sz,
 594                ctx->ver.ratio);
 595
 596        printf("  |%10s %6u %8u %6u %8u",
 597               " ",
 598                ops_enq_retries_comp,
 599                ops_deq_retries_comp,
 600                ops_enq_retries_decomp,
 601                ops_deq_retries_decomp);
 602
 603        printf("  |%9s %9u %9u %9u %9u %9u\n",
 604               " ",
 605                duration_setup_per_op,
 606                duration_enq_per_op_comp,
 607                duration_deq_per_op_comp,
 608                duration_enq_per_op_decomp,
 609                duration_deq_per_op_decomp);
 610
 611        rte_spinlock_unlock(&print_spinlock);
 612
 613        return EXIT_SUCCESS;
 614}
 615