dpdk/app/test-crypto-perf/cperf_test_latency.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2016-2017 Intel Corporation
   3 */
   4
   5#include <rte_malloc.h>
   6#include <rte_cycles.h>
   7#include <rte_crypto.h>
   8#include <rte_cryptodev.h>
   9
  10#include "cperf_test_latency.h"
  11#include "cperf_ops.h"
  12#include "cperf_test_common.h"
  13
  14struct cperf_op_result {
  15        uint64_t tsc_start;
  16        uint64_t tsc_end;
  17        enum rte_crypto_op_status status;
  18};
  19
  20struct cperf_latency_ctx {
  21        uint8_t dev_id;
  22        uint16_t qp_id;
  23        uint8_t lcore_id;
  24
  25        struct rte_mempool *pool;
  26
  27        struct rte_cryptodev_sym_session *sess;
  28
  29        cperf_populate_ops_t populate_ops;
  30
  31        uint32_t src_buf_offset;
  32        uint32_t dst_buf_offset;
  33
  34        const struct cperf_options *options;
  35        const struct cperf_test_vector *test_vector;
  36        struct cperf_op_result *res;
  37};
  38
  39struct priv_op_data {
  40        struct cperf_op_result *result;
  41};
  42
  43static void
  44cperf_latency_test_free(struct cperf_latency_ctx *ctx)
  45{
  46        if (ctx) {
  47                if (ctx->sess) {
  48                        rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess);
  49                        rte_cryptodev_sym_session_free(ctx->sess);
  50                }
  51
  52                if (ctx->pool)
  53                        rte_mempool_free(ctx->pool);
  54
  55                rte_free(ctx->res);
  56                rte_free(ctx);
  57        }
  58}
  59
  60void *
  61cperf_latency_test_constructor(struct rte_mempool *sess_mp,
  62                struct rte_mempool *sess_priv_mp,
  63                uint8_t dev_id, uint16_t qp_id,
  64                const struct cperf_options *options,
  65                const struct cperf_test_vector *test_vector,
  66                const struct cperf_op_fns *op_fns)
  67{
  68        struct cperf_latency_ctx *ctx = NULL;
  69        size_t extra_op_priv_size = sizeof(struct priv_op_data);
  70
  71        ctx = rte_malloc(NULL, sizeof(struct cperf_latency_ctx), 0);
  72        if (ctx == NULL)
  73                goto err;
  74
  75        ctx->dev_id = dev_id;
  76        ctx->qp_id = qp_id;
  77
  78        ctx->populate_ops = op_fns->populate_ops;
  79        ctx->options = options;
  80        ctx->test_vector = test_vector;
  81
  82        /* IV goes at the end of the crypto operation */
  83        uint16_t iv_offset = sizeof(struct rte_crypto_op) +
  84                sizeof(struct rte_crypto_sym_op) +
  85                sizeof(struct cperf_op_result *);
  86
  87        ctx->sess = op_fns->sess_create(sess_mp, sess_priv_mp, dev_id, options,
  88                        test_vector, iv_offset);
  89        if (ctx->sess == NULL)
  90                goto err;
  91
  92        if (cperf_alloc_common_memory(options, test_vector, dev_id, qp_id,
  93                        extra_op_priv_size,
  94                        &ctx->src_buf_offset, &ctx->dst_buf_offset,
  95                        &ctx->pool) < 0)
  96                goto err;
  97
  98        ctx->res = rte_malloc(NULL, sizeof(struct cperf_op_result) *
  99                        ctx->options->total_ops, 0);
 100
 101        if (ctx->res == NULL)
 102                goto err;
 103
 104        return ctx;
 105err:
 106        cperf_latency_test_free(ctx);
 107
 108        return NULL;
 109}
 110
 111static inline void
 112store_timestamp(struct rte_crypto_op *op, uint64_t timestamp)
 113{
 114        struct priv_op_data *priv_data;
 115
 116        priv_data = (struct priv_op_data *) (op->sym + 1);
 117        priv_data->result->status = op->status;
 118        priv_data->result->tsc_end = timestamp;
 119}
 120
 121int
 122cperf_latency_test_runner(void *arg)
 123{
 124        struct cperf_latency_ctx *ctx = arg;
 125        uint16_t test_burst_size;
 126        uint8_t burst_size_idx = 0;
 127        uint32_t imix_idx = 0;
 128
 129        static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
 130
 131        if (ctx == NULL)
 132                return 0;
 133
 134        struct rte_crypto_op *ops[ctx->options->max_burst_size];
 135        struct rte_crypto_op *ops_processed[ctx->options->max_burst_size];
 136        uint64_t i;
 137        struct priv_op_data *priv_data;
 138
 139        uint32_t lcore = rte_lcore_id();
 140
 141#ifdef CPERF_LINEARIZATION_ENABLE
 142        struct rte_cryptodev_info dev_info;
 143        int linearize = 0;
 144
 145        /* Check if source mbufs require coalescing */
 146        if (ctx->options->segment_sz < ctx->options->max_buffer_size) {
 147                rte_cryptodev_info_get(ctx->dev_id, &dev_info);
 148                if ((dev_info.feature_flags &
 149                                RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0)
 150                        linearize = 1;
 151        }
 152#endif /* CPERF_LINEARIZATION_ENABLE */
 153
 154        ctx->lcore_id = lcore;
 155
 156        /* Warm up the host CPU before starting the test */
 157        for (i = 0; i < ctx->options->total_ops; i++)
 158                rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
 159
 160        /* Get first size from range or list */
 161        if (ctx->options->inc_burst_size != 0)
 162                test_burst_size = ctx->options->min_burst_size;
 163        else
 164                test_burst_size = ctx->options->burst_size_list[0];
 165
 166        uint16_t iv_offset = sizeof(struct rte_crypto_op) +
 167                sizeof(struct rte_crypto_sym_op) +
 168                sizeof(struct cperf_op_result *);
 169
 170        while (test_burst_size <= ctx->options->max_burst_size) {
 171                uint64_t ops_enqd = 0, ops_deqd = 0;
 172                uint64_t b_idx = 0;
 173
 174                uint64_t tsc_val, tsc_end, tsc_start;
 175                uint64_t tsc_max = 0, tsc_min = ~0UL, tsc_tot = 0, tsc_idx = 0;
 176                uint64_t enqd_max = 0, enqd_min = ~0UL, enqd_tot = 0;
 177                uint64_t deqd_max = 0, deqd_min = ~0UL, deqd_tot = 0;
 178
 179                while (enqd_tot < ctx->options->total_ops) {
 180
 181                        uint16_t burst_size = ((enqd_tot + test_burst_size)
 182                                        <= ctx->options->total_ops) ?
 183                                                        test_burst_size :
 184                                                        ctx->options->total_ops -
 185                                                        enqd_tot;
 186
 187                        /* Allocate objects containing crypto operations and mbufs */
 188                        if (rte_mempool_get_bulk(ctx->pool, (void **)ops,
 189                                                burst_size) != 0) {
 190                                RTE_LOG(ERR, USER1,
 191                                        "Failed to allocate more crypto operations "
 192                                        "from the crypto operation pool.\n"
 193                                        "Consider increasing the pool size "
 194                                        "with --pool-sz\n");
 195                                return -1;
 196                        }
 197
 198                        /* Setup crypto op, attach mbuf etc */
 199                        (ctx->populate_ops)(ops, ctx->src_buf_offset,
 200                                        ctx->dst_buf_offset,
 201                                        burst_size, ctx->sess, ctx->options,
 202                                        ctx->test_vector, iv_offset,
 203                                        &imix_idx);
 204
 205                        tsc_start = rte_rdtsc_precise();
 206
 207#ifdef CPERF_LINEARIZATION_ENABLE
 208                        if (linearize) {
 209                                /* PMD doesn't support scatter-gather and source buffer
 210                                 * is segmented.
 211                                 * We need to linearize it before enqueuing.
 212                                 */
 213                                for (i = 0; i < burst_size; i++)
 214                                        rte_pktmbuf_linearize(ops[i]->sym->m_src);
 215                        }
 216#endif /* CPERF_LINEARIZATION_ENABLE */
 217
 218                        /* Enqueue burst of ops on crypto device */
 219                        ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id,
 220                                        ops, burst_size);
 221
 222                        /* Dequeue processed burst of ops from crypto device */
 223                        ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
 224                                        ops_processed, test_burst_size);
 225
 226                        tsc_end = rte_rdtsc_precise();
 227
 228                        /* Free memory for not enqueued operations */
 229                        if (ops_enqd != burst_size)
 230                                rte_mempool_put_bulk(ctx->pool,
 231                                                (void **)&ops[ops_enqd],
 232                                                burst_size - ops_enqd);
 233
 234                        for (i = 0; i < ops_enqd; i++) {
 235                                ctx->res[tsc_idx].tsc_start = tsc_start;
 236                                /*
 237                                 * Private data structure starts after the end of the
 238                                 * rte_crypto_sym_op structure.
 239                                 */
 240                                priv_data = (struct priv_op_data *) (ops[i]->sym + 1);
 241                                priv_data->result = (void *)&ctx->res[tsc_idx];
 242                                tsc_idx++;
 243                        }
 244
 245                        if (likely(ops_deqd))  {
 246                                /* Free crypto ops so they can be reused. */
 247                                for (i = 0; i < ops_deqd; i++)
 248                                        store_timestamp(ops_processed[i], tsc_end);
 249
 250                                rte_mempool_put_bulk(ctx->pool,
 251                                                (void **)ops_processed, ops_deqd);
 252
 253                                deqd_tot += ops_deqd;
 254                                deqd_max = RTE_MAX(ops_deqd, deqd_max);
 255                                deqd_min = RTE_MIN(ops_deqd, deqd_min);
 256                        }
 257
 258                        enqd_tot += ops_enqd;
 259                        enqd_max = RTE_MAX(ops_enqd, enqd_max);
 260                        enqd_min = RTE_MIN(ops_enqd, enqd_min);
 261
 262                        b_idx++;
 263                }
 264
 265                /* Dequeue any operations still in the crypto device */
 266                while (deqd_tot < ctx->options->total_ops) {
 267                        /* Sending 0 length burst to flush sw crypto device */
 268                        rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0);
 269
 270                        /* dequeue burst */
 271                        ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id,
 272                                        ops_processed, test_burst_size);
 273
 274                        tsc_end = rte_rdtsc_precise();
 275
 276                        if (ops_deqd != 0) {
 277                                for (i = 0; i < ops_deqd; i++)
 278                                        store_timestamp(ops_processed[i], tsc_end);
 279
 280                                rte_mempool_put_bulk(ctx->pool,
 281                                                (void **)ops_processed, ops_deqd);
 282
 283                                deqd_tot += ops_deqd;
 284                                deqd_max = RTE_MAX(ops_deqd, deqd_max);
 285                                deqd_min = RTE_MIN(ops_deqd, deqd_min);
 286                        }
 287                }
 288
 289                for (i = 0; i < tsc_idx; i++) {
 290                        tsc_val = ctx->res[i].tsc_end - ctx->res[i].tsc_start;
 291                        tsc_max = RTE_MAX(tsc_val, tsc_max);
 292                        tsc_min = RTE_MIN(tsc_val, tsc_min);
 293                        tsc_tot += tsc_val;
 294                }
 295
 296                double time_tot, time_avg, time_max, time_min;
 297
 298                const uint64_t tunit = 1000000; /* us */
 299                const uint64_t tsc_hz = rte_get_tsc_hz();
 300
 301                uint64_t enqd_avg = enqd_tot / b_idx;
 302                uint64_t deqd_avg = deqd_tot / b_idx;
 303                uint64_t tsc_avg = tsc_tot / tsc_idx;
 304
 305                time_tot = tunit*(double)(tsc_tot) / tsc_hz;
 306                time_avg = tunit*(double)(tsc_avg) / tsc_hz;
 307                time_max = tunit*(double)(tsc_max) / tsc_hz;
 308                time_min = tunit*(double)(tsc_min) / tsc_hz;
 309
 310                if (ctx->options->csv) {
 311                        if (rte_atomic16_test_and_set(&display_once))
 312                                printf("\n# lcore, Buffer Size, Burst Size, Pakt Seq #, "
 313                                                "cycles, time (us)");
 314
 315                        for (i = 0; i < ctx->options->total_ops; i++) {
 316
 317                                printf("\n%u,%u,%u,%"PRIu64",%"PRIu64",%.3f",
 318                                        ctx->lcore_id, ctx->options->test_buffer_size,
 319                                        test_burst_size, i + 1,
 320                                        ctx->res[i].tsc_end - ctx->res[i].tsc_start,
 321                                        tunit * (double) (ctx->res[i].tsc_end
 322                                                        - ctx->res[i].tsc_start)
 323                                                / tsc_hz);
 324
 325                        }
 326                } else {
 327                        printf("\n# Device %d on lcore %u\n", ctx->dev_id,
 328                                ctx->lcore_id);
 329                        printf("\n# total operations: %u", ctx->options->total_ops);
 330                        printf("\n# Buffer size: %u", ctx->options->test_buffer_size);
 331                        printf("\n# Burst size: %u", test_burst_size);
 332                        printf("\n#     Number of bursts: %"PRIu64,
 333                                        b_idx);
 334
 335                        printf("\n#");
 336                        printf("\n#          \t       Total\t   Average\t   "
 337                                        "Maximum\t   Minimum");
 338                        printf("\n#  enqueued\t%12"PRIu64"\t%10"PRIu64"\t"
 339                                        "%10"PRIu64"\t%10"PRIu64, enqd_tot,
 340                                        enqd_avg, enqd_max, enqd_min);
 341                        printf("\n#  dequeued\t%12"PRIu64"\t%10"PRIu64"\t"
 342                                        "%10"PRIu64"\t%10"PRIu64, deqd_tot,
 343                                        deqd_avg, deqd_max, deqd_min);
 344                        printf("\n#    cycles\t%12"PRIu64"\t%10"PRIu64"\t"
 345                                        "%10"PRIu64"\t%10"PRIu64, tsc_tot,
 346                                        tsc_avg, tsc_max, tsc_min);
 347                        printf("\n# time [us]\t%12.0f\t%10.3f\t%10.3f\t%10.3f",
 348                                        time_tot, time_avg, time_max, time_min);
 349                        printf("\n\n");
 350
 351                }
 352
 353                /* Get next size from range or list */
 354                if (ctx->options->inc_burst_size != 0)
 355                        test_burst_size += ctx->options->inc_burst_size;
 356                else {
 357                        if (++burst_size_idx == ctx->options->burst_size_count)
 358                                break;
 359                        test_burst_size =
 360                                ctx->options->burst_size_list[burst_size_idx];
 361                }
 362        }
 363
 364        return 0;
 365}
 366
 367void
 368cperf_latency_test_destructor(void *arg)
 369{
 370        struct cperf_latency_ctx *ctx = arg;
 371
 372        if (ctx == NULL)
 373                return;
 374
 375        cperf_latency_test_free(ctx);
 376}
 377