dpdk/lib/latencystats/rte_latencystats.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2018 Intel Corporation
   3 */
   4
   5#include <math.h>
   6
   7#include <rte_string_fns.h>
   8#include <rte_mbuf_dyn.h>
   9#include <rte_log.h>
  10#include <rte_cycles.h>
  11#include <rte_ethdev.h>
  12#include <rte_metrics.h>
  13#include <rte_memzone.h>
  14#include <rte_lcore.h>
  15
  16#include "rte_latencystats.h"
  17
  18/** Nano seconds per second */
  19#define NS_PER_SEC 1E9
  20
  21/** Clock cycles per nano second */
  22static uint64_t
  23latencystat_cycles_per_ns(void)
  24{
  25        return rte_get_timer_hz() / NS_PER_SEC;
  26}
  27
  28/* Macros for printing using RTE_LOG */
  29#define RTE_LOGTYPE_LATENCY_STATS RTE_LOGTYPE_USER1
  30
  31static uint64_t timestamp_dynflag;
  32static int timestamp_dynfield_offset = -1;
  33
  34static inline rte_mbuf_timestamp_t *
  35timestamp_dynfield(struct rte_mbuf *mbuf)
  36{
  37        return RTE_MBUF_DYNFIELD(mbuf,
  38                        timestamp_dynfield_offset, rte_mbuf_timestamp_t *);
  39}
  40
  41static const char *MZ_RTE_LATENCY_STATS = "rte_latencystats";
  42static int latency_stats_index;
  43static uint64_t samp_intvl;
  44static uint64_t timer_tsc;
  45static uint64_t prev_tsc;
  46
  47struct rte_latency_stats {
  48        float min_latency; /**< Minimum latency in nano seconds */
  49        float avg_latency; /**< Average latency in nano seconds */
  50        float max_latency; /**< Maximum latency in nano seconds */
  51        float jitter; /** Latency variation */
  52        rte_spinlock_t lock; /** Latency calculation lock */
  53};
  54
  55static struct rte_latency_stats *glob_stats;
  56
  57struct rxtx_cbs {
  58        const struct rte_eth_rxtx_callback *cb;
  59};
  60
  61static struct rxtx_cbs rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
  62static struct rxtx_cbs tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
  63
  64struct latency_stats_nameoff {
  65        char name[RTE_ETH_XSTATS_NAME_SIZE];
  66        unsigned int offset;
  67};
  68
  69static const struct latency_stats_nameoff lat_stats_strings[] = {
  70        {"min_latency_ns", offsetof(struct rte_latency_stats, min_latency)},
  71        {"avg_latency_ns", offsetof(struct rte_latency_stats, avg_latency)},
  72        {"max_latency_ns", offsetof(struct rte_latency_stats, max_latency)},
  73        {"jitter_ns", offsetof(struct rte_latency_stats, jitter)},
  74};
  75
  76#define NUM_LATENCY_STATS (sizeof(lat_stats_strings) / \
  77                                sizeof(lat_stats_strings[0]))
  78
  79int32_t
  80rte_latencystats_update(void)
  81{
  82        unsigned int i;
  83        float *stats_ptr = NULL;
  84        uint64_t values[NUM_LATENCY_STATS] = {0};
  85        int ret;
  86
  87        for (i = 0; i < NUM_LATENCY_STATS; i++) {
  88                stats_ptr = RTE_PTR_ADD(glob_stats,
  89                                lat_stats_strings[i].offset);
  90                values[i] = (uint64_t)floor((*stats_ptr)/
  91                                latencystat_cycles_per_ns());
  92        }
  93
  94        ret = rte_metrics_update_values(RTE_METRICS_GLOBAL,
  95                                        latency_stats_index,
  96                                        values, NUM_LATENCY_STATS);
  97        if (ret < 0)
  98                RTE_LOG(INFO, LATENCY_STATS, "Failed to push the stats\n");
  99
 100        return ret;
 101}
 102
 103static void
 104rte_latencystats_fill_values(struct rte_metric_value *values)
 105{
 106        unsigned int i;
 107        float *stats_ptr = NULL;
 108
 109        for (i = 0; i < NUM_LATENCY_STATS; i++) {
 110                stats_ptr = RTE_PTR_ADD(glob_stats,
 111                                lat_stats_strings[i].offset);
 112                values[i].key = i;
 113                values[i].value = (uint64_t)floor((*stats_ptr)/
 114                                                latencystat_cycles_per_ns());
 115        }
 116}
 117
 118static uint16_t
 119add_time_stamps(uint16_t pid __rte_unused,
 120                uint16_t qid __rte_unused,
 121                struct rte_mbuf **pkts,
 122                uint16_t nb_pkts,
 123                uint16_t max_pkts __rte_unused,
 124                void *user_cb __rte_unused)
 125{
 126        unsigned int i;
 127        uint64_t diff_tsc, now;
 128
 129        /*
 130         * For every sample interval,
 131         * time stamp is marked on one received packet.
 132         */
 133        now = rte_rdtsc();
 134        for (i = 0; i < nb_pkts; i++) {
 135                diff_tsc = now - prev_tsc;
 136                timer_tsc += diff_tsc;
 137
 138                if ((pkts[i]->ol_flags & timestamp_dynflag) == 0
 139                                && (timer_tsc >= samp_intvl)) {
 140                        *timestamp_dynfield(pkts[i]) = now;
 141                        pkts[i]->ol_flags |= timestamp_dynflag;
 142                        timer_tsc = 0;
 143                }
 144                prev_tsc = now;
 145                now = rte_rdtsc();
 146        }
 147
 148        return nb_pkts;
 149}
 150
 151static uint16_t
 152calc_latency(uint16_t pid __rte_unused,
 153                uint16_t qid __rte_unused,
 154                struct rte_mbuf **pkts,
 155                uint16_t nb_pkts,
 156                void *_ __rte_unused)
 157{
 158        unsigned int i, cnt = 0;
 159        uint64_t now;
 160        float latency[nb_pkts];
 161        static float prev_latency;
 162        /*
 163         * Alpha represents degree of weighting decrease in EWMA,
 164         * a constant smoothing factor between 0 and 1. The value
 165         * is used below for measuring average latency.
 166         */
 167        const float alpha = 0.2;
 168
 169        now = rte_rdtsc();
 170        for (i = 0; i < nb_pkts; i++) {
 171                if (pkts[i]->ol_flags & timestamp_dynflag)
 172                        latency[cnt++] = now - *timestamp_dynfield(pkts[i]);
 173        }
 174
 175        rte_spinlock_lock(&glob_stats->lock);
 176        for (i = 0; i < cnt; i++) {
 177                /*
 178                 * The jitter is calculated as statistical mean of interpacket
 179                 * delay variation. The "jitter estimate" is computed by taking
 180                 * the absolute values of the ipdv sequence and applying an
 181                 * exponential filter with parameter 1/16 to generate the
 182                 * estimate. i.e J=J+(|D(i-1,i)|-J)/16. Where J is jitter,
 183                 * D(i-1,i) is difference in latency of two consecutive packets
 184                 * i-1 and i.
 185                 * Reference: Calculated as per RFC 5481, sec 4.1,
 186                 * RFC 3393 sec 4.5, RFC 1889 sec.
 187                 */
 188                glob_stats->jitter +=  (fabsf(prev_latency - latency[i])
 189                                        - glob_stats->jitter)/16;
 190                if (glob_stats->min_latency == 0)
 191                        glob_stats->min_latency = latency[i];
 192                else if (latency[i] < glob_stats->min_latency)
 193                        glob_stats->min_latency = latency[i];
 194                else if (latency[i] > glob_stats->max_latency)
 195                        glob_stats->max_latency = latency[i];
 196                /*
 197                 * The average latency is measured using exponential moving
 198                 * average, i.e. using EWMA
 199                 * https://en.wikipedia.org/wiki/Moving_average
 200                 */
 201                glob_stats->avg_latency +=
 202                        alpha * (latency[i] - glob_stats->avg_latency);
 203                prev_latency = latency[i];
 204        }
 205        rte_spinlock_unlock(&glob_stats->lock);
 206
 207        return nb_pkts;
 208}
 209
 210int
 211rte_latencystats_init(uint64_t app_samp_intvl,
 212                rte_latency_stats_flow_type_fn user_cb)
 213{
 214        unsigned int i;
 215        uint16_t pid;
 216        uint16_t qid;
 217        struct rxtx_cbs *cbs = NULL;
 218        const char *ptr_strings[NUM_LATENCY_STATS] = {0};
 219        const struct rte_memzone *mz = NULL;
 220        const unsigned int flags = 0;
 221        int ret;
 222
 223        if (rte_memzone_lookup(MZ_RTE_LATENCY_STATS))
 224                return -EEXIST;
 225
 226        /** Allocate stats in shared memory fo multi process support */
 227        mz = rte_memzone_reserve(MZ_RTE_LATENCY_STATS, sizeof(*glob_stats),
 228                                        rte_socket_id(), flags);
 229        if (mz == NULL) {
 230                RTE_LOG(ERR, LATENCY_STATS, "Cannot reserve memory: %s:%d\n",
 231                        __func__, __LINE__);
 232                return -ENOMEM;
 233        }
 234
 235        glob_stats = mz->addr;
 236        rte_spinlock_init(&glob_stats->lock);
 237        samp_intvl = app_samp_intvl * latencystat_cycles_per_ns();
 238
 239        /** Register latency stats with stats library */
 240        for (i = 0; i < NUM_LATENCY_STATS; i++)
 241                ptr_strings[i] = lat_stats_strings[i].name;
 242
 243        latency_stats_index = rte_metrics_reg_names(ptr_strings,
 244                                                        NUM_LATENCY_STATS);
 245        if (latency_stats_index < 0) {
 246                RTE_LOG(DEBUG, LATENCY_STATS,
 247                        "Failed to register latency stats names\n");
 248                return -1;
 249        }
 250
 251        /* Register mbuf field and flag for Rx timestamp */
 252        ret = rte_mbuf_dyn_rx_timestamp_register(&timestamp_dynfield_offset,
 253                        &timestamp_dynflag);
 254        if (ret != 0) {
 255                RTE_LOG(ERR, LATENCY_STATS,
 256                        "Cannot register mbuf field/flag for timestamp\n");
 257                return -rte_errno;
 258        }
 259
 260        /** Register Rx/Tx callbacks */
 261        RTE_ETH_FOREACH_DEV(pid) {
 262                struct rte_eth_dev_info dev_info;
 263
 264                ret = rte_eth_dev_info_get(pid, &dev_info);
 265                if (ret != 0) {
 266                        RTE_LOG(INFO, LATENCY_STATS,
 267                                "Error during getting device (port %u) info: %s\n",
 268                                pid, strerror(-ret));
 269
 270                        continue;
 271                }
 272
 273                for (qid = 0; qid < dev_info.nb_rx_queues; qid++) {
 274                        cbs = &rx_cbs[pid][qid];
 275                        cbs->cb = rte_eth_add_first_rx_callback(pid, qid,
 276                                        add_time_stamps, user_cb);
 277                        if (!cbs->cb)
 278                                RTE_LOG(INFO, LATENCY_STATS, "Failed to "
 279                                        "register Rx callback for pid=%d, "
 280                                        "qid=%d\n", pid, qid);
 281                }
 282                for (qid = 0; qid < dev_info.nb_tx_queues; qid++) {
 283                        cbs = &tx_cbs[pid][qid];
 284                        cbs->cb =  rte_eth_add_tx_callback(pid, qid,
 285                                        calc_latency, user_cb);
 286                        if (!cbs->cb)
 287                                RTE_LOG(INFO, LATENCY_STATS, "Failed to "
 288                                        "register Tx callback for pid=%d, "
 289                                        "qid=%d\n", pid, qid);
 290                }
 291        }
 292        return 0;
 293}
 294
 295int
 296rte_latencystats_uninit(void)
 297{
 298        uint16_t pid;
 299        uint16_t qid;
 300        int ret = 0;
 301        struct rxtx_cbs *cbs = NULL;
 302        const struct rte_memzone *mz = NULL;
 303
 304        /** De register Rx/Tx callbacks */
 305        RTE_ETH_FOREACH_DEV(pid) {
 306                struct rte_eth_dev_info dev_info;
 307
 308                ret = rte_eth_dev_info_get(pid, &dev_info);
 309                if (ret != 0) {
 310                        RTE_LOG(INFO, LATENCY_STATS,
 311                                "Error during getting device (port %u) info: %s\n",
 312                                pid, strerror(-ret));
 313
 314                        continue;
 315                }
 316
 317                for (qid = 0; qid < dev_info.nb_rx_queues; qid++) {
 318                        cbs = &rx_cbs[pid][qid];
 319                        ret = rte_eth_remove_rx_callback(pid, qid, cbs->cb);
 320                        if (ret)
 321                                RTE_LOG(INFO, LATENCY_STATS, "failed to "
 322                                        "remove Rx callback for pid=%d, "
 323                                        "qid=%d\n", pid, qid);
 324                }
 325                for (qid = 0; qid < dev_info.nb_tx_queues; qid++) {
 326                        cbs = &tx_cbs[pid][qid];
 327                        ret = rte_eth_remove_tx_callback(pid, qid, cbs->cb);
 328                        if (ret)
 329                                RTE_LOG(INFO, LATENCY_STATS, "failed to "
 330                                        "remove Tx callback for pid=%d, "
 331                                        "qid=%d\n", pid, qid);
 332                }
 333        }
 334
 335        /* free up the memzone */
 336        mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS);
 337        if (mz)
 338                rte_memzone_free(mz);
 339
 340        return 0;
 341}
 342
 343int
 344rte_latencystats_get_names(struct rte_metric_name *names, uint16_t size)
 345{
 346        unsigned int i;
 347
 348        if (names == NULL || size < NUM_LATENCY_STATS)
 349                return NUM_LATENCY_STATS;
 350
 351        for (i = 0; i < NUM_LATENCY_STATS; i++)
 352                strlcpy(names[i].name, lat_stats_strings[i].name,
 353                        sizeof(names[i].name));
 354
 355        return NUM_LATENCY_STATS;
 356}
 357
 358int
 359rte_latencystats_get(struct rte_metric_value *values, uint16_t size)
 360{
 361        if (size < NUM_LATENCY_STATS || values == NULL)
 362                return NUM_LATENCY_STATS;
 363
 364        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
 365                const struct rte_memzone *mz;
 366                mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS);
 367                if (mz == NULL) {
 368                        RTE_LOG(ERR, LATENCY_STATS,
 369                                "Latency stats memzone not found\n");
 370                        return -ENOMEM;
 371                }
 372                glob_stats =  mz->addr;
 373        }
 374
 375        /* Retrieve latency stats */
 376        rte_latencystats_fill_values(values);
 377
 378        return NUM_LATENCY_STATS;
 379}
 380