dpdk/lib/librte_timer/rte_timer.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2014 Intel Corporation
   3 */
   4
   5#include <string.h>
   6#include <stdio.h>
   7#include <stdint.h>
   8#include <stdbool.h>
   9#include <inttypes.h>
  10#include <assert.h>
  11#include <sys/queue.h>
  12
  13#include <rte_common.h>
  14#include <rte_cycles.h>
  15#include <rte_eal_memconfig.h>
  16#include <rte_per_lcore.h>
  17#include <rte_memory.h>
  18#include <rte_launch.h>
  19#include <rte_eal.h>
  20#include <rte_lcore.h>
  21#include <rte_branch_prediction.h>
  22#include <rte_spinlock.h>
  23#include <rte_random.h>
  24#include <rte_pause.h>
  25#include <rte_memzone.h>
  26#include <rte_malloc.h>
  27#include <rte_errno.h>
  28
  29#include "rte_timer.h"
  30
  31/**
  32 * Per-lcore info for timers.
  33 */
  34struct priv_timer {
  35        struct rte_timer pending_head;  /**< dummy timer instance to head up list */
  36        rte_spinlock_t list_lock;       /**< lock to protect list access */
  37
  38        /** per-core variable that true if a timer was updated on this
  39         *  core since last reset of the variable */
  40        int updated;
  41
  42        /** track the current depth of the skiplist */
  43        unsigned curr_skiplist_depth;
  44
  45        unsigned prev_lcore;              /**< used for lcore round robin */
  46
  47        /** running timer on this lcore now */
  48        struct rte_timer *running_tim;
  49
  50#ifdef RTE_LIBRTE_TIMER_DEBUG
  51        /** per-lcore statistics */
  52        struct rte_timer_debug_stats stats;
  53#endif
  54} __rte_cache_aligned;
  55
  56#define FL_ALLOCATED    (1 << 0)
  57struct rte_timer_data {
  58        struct priv_timer priv_timer[RTE_MAX_LCORE];
  59        uint8_t internal_flags;
  60};
  61
  62#define RTE_MAX_DATA_ELS 64
  63static const struct rte_memzone *rte_timer_data_mz;
  64static int *volatile rte_timer_mz_refcnt;
  65static struct rte_timer_data *rte_timer_data_arr;
  66static const uint32_t default_data_id;
  67static uint32_t rte_timer_subsystem_initialized;
  68
  69/* when debug is enabled, store some statistics */
  70#ifdef RTE_LIBRTE_TIMER_DEBUG
  71#define __TIMER_STAT_ADD(priv_timer, name, n) do {                      \
  72                unsigned __lcore_id = rte_lcore_id();                   \
  73                if (__lcore_id < RTE_MAX_LCORE)                         \
  74                        priv_timer[__lcore_id].stats.name += (n);       \
  75        } while(0)
  76#else
  77#define __TIMER_STAT_ADD(priv_timer, name, n) do {} while (0)
  78#endif
  79
  80static inline int
  81timer_data_valid(uint32_t id)
  82{
  83        return rte_timer_data_arr &&
  84                (rte_timer_data_arr[id].internal_flags & FL_ALLOCATED);
  85}
  86
  87/* validate ID and retrieve timer data pointer, or return error value */
  88#define TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, retval) do {    \
  89        if (id >= RTE_MAX_DATA_ELS || !timer_data_valid(id))            \
  90                return retval;                                          \
  91        timer_data = &rte_timer_data_arr[id];                           \
  92} while (0)
  93
  94int
  95rte_timer_data_alloc(uint32_t *id_ptr)
  96{
  97        int i;
  98        struct rte_timer_data *data;
  99
 100        if (!rte_timer_subsystem_initialized)
 101                return -ENOMEM;
 102
 103        for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
 104                data = &rte_timer_data_arr[i];
 105                if (!(data->internal_flags & FL_ALLOCATED)) {
 106                        data->internal_flags |= FL_ALLOCATED;
 107
 108                        if (id_ptr)
 109                                *id_ptr = i;
 110
 111                        return 0;
 112                }
 113        }
 114
 115        return -ENOSPC;
 116}
 117
 118int
 119rte_timer_data_dealloc(uint32_t id)
 120{
 121        struct rte_timer_data *timer_data;
 122        TIMER_DATA_VALID_GET_OR_ERR_RET(id, timer_data, -EINVAL);
 123
 124        timer_data->internal_flags &= ~(FL_ALLOCATED);
 125
 126        return 0;
 127}
 128
 129/* Init the timer library. Allocate an array of timer data structs in shared
 130 * memory, and allocate the zeroth entry for use with original timer
 131 * APIs. Since the intersection of the sets of lcore ids in primary and
 132 * secondary processes should be empty, the zeroth entry can be shared by
 133 * multiple processes.
 134 */
 135int
 136rte_timer_subsystem_init(void)
 137{
 138        const struct rte_memzone *mz;
 139        struct rte_timer_data *data;
 140        int i, lcore_id;
 141        static const char *mz_name = "rte_timer_mz";
 142        const size_t data_arr_size =
 143                        RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
 144        const size_t mem_size = data_arr_size + sizeof(*rte_timer_mz_refcnt);
 145        bool do_full_init = true;
 146
 147        rte_mcfg_timer_lock();
 148
 149        if (rte_timer_subsystem_initialized) {
 150                rte_mcfg_timer_unlock();
 151                return -EALREADY;
 152        }
 153
 154        mz = rte_memzone_lookup(mz_name);
 155        if (mz == NULL) {
 156                mz = rte_memzone_reserve_aligned(mz_name, mem_size,
 157                                SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
 158                if (mz == NULL) {
 159                        rte_mcfg_timer_unlock();
 160                        return -ENOMEM;
 161                }
 162                do_full_init = true;
 163        } else
 164                do_full_init = false;
 165
 166        rte_timer_data_mz = mz;
 167        rte_timer_data_arr = mz->addr;
 168        rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
 169
 170        if (do_full_init) {
 171                for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
 172                        data = &rte_timer_data_arr[i];
 173
 174                        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE;
 175                             lcore_id++) {
 176                                rte_spinlock_init(
 177                                        &data->priv_timer[lcore_id].list_lock);
 178                                data->priv_timer[lcore_id].prev_lcore =
 179                                        lcore_id;
 180                        }
 181                }
 182        }
 183
 184        rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
 185        (*rte_timer_mz_refcnt)++;
 186
 187        rte_timer_subsystem_initialized = 1;
 188
 189        rte_mcfg_timer_unlock();
 190
 191        return 0;
 192}
 193
 194void
 195rte_timer_subsystem_finalize(void)
 196{
 197        rte_mcfg_timer_lock();
 198
 199        if (!rte_timer_subsystem_initialized) {
 200                rte_mcfg_timer_unlock();
 201                return;
 202        }
 203
 204        if (--(*rte_timer_mz_refcnt) == 0)
 205                rte_memzone_free(rte_timer_data_mz);
 206
 207        rte_timer_subsystem_initialized = 0;
 208
 209        rte_mcfg_timer_unlock();
 210}
 211
 212/* Initialize the timer handle tim for use */
 213void
 214rte_timer_init(struct rte_timer *tim)
 215{
 216        union rte_timer_status status;
 217
 218        status.state = RTE_TIMER_STOP;
 219        status.owner = RTE_TIMER_NO_OWNER;
 220        __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELAXED);
 221}
 222
 223/*
 224 * if timer is pending or stopped (or running on the same core than
 225 * us), mark timer as configuring, and on success return the previous
 226 * status of the timer
 227 */
 228static int
 229timer_set_config_state(struct rte_timer *tim,
 230                       union rte_timer_status *ret_prev_status,
 231                       struct priv_timer *priv_timer)
 232{
 233        union rte_timer_status prev_status, status;
 234        int success = 0;
 235        unsigned lcore_id;
 236
 237        lcore_id = rte_lcore_id();
 238
 239        /* wait that the timer is in correct status before update,
 240         * and mark it as being configured */
 241        prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED);
 242
 243        while (success == 0) {
 244                /* timer is running on another core
 245                 * or ready to run on local core, exit
 246                 */
 247                if (prev_status.state == RTE_TIMER_RUNNING &&
 248                    (prev_status.owner != (uint16_t)lcore_id ||
 249                     tim != priv_timer[lcore_id].running_tim))
 250                        return -1;
 251
 252                /* timer is being configured on another core */
 253                if (prev_status.state == RTE_TIMER_CONFIG)
 254                        return -1;
 255
 256                /* here, we know that timer is stopped or pending,
 257                 * mark it atomically as being configured */
 258                status.state = RTE_TIMER_CONFIG;
 259                status.owner = (int16_t)lcore_id;
 260                /* CONFIG states are acting as locked states. If the
 261                 * timer is in CONFIG state, the state cannot be changed
 262                 * by other threads. So, we should use ACQUIRE here.
 263                 */
 264                success = __atomic_compare_exchange_n(&tim->status.u32,
 265                                              &prev_status.u32,
 266                                              status.u32, 0,
 267                                              __ATOMIC_ACQUIRE,
 268                                              __ATOMIC_RELAXED);
 269        }
 270
 271        ret_prev_status->u32 = prev_status.u32;
 272        return 0;
 273}
 274
 275/*
 276 * if timer is pending, mark timer as running
 277 */
 278static int
 279timer_set_running_state(struct rte_timer *tim)
 280{
 281        union rte_timer_status prev_status, status;
 282        unsigned lcore_id = rte_lcore_id();
 283        int success = 0;
 284
 285        /* wait that the timer is in correct status before update,
 286         * and mark it as running */
 287        prev_status.u32 = __atomic_load_n(&tim->status.u32, __ATOMIC_RELAXED);
 288
 289        while (success == 0) {
 290                /* timer is not pending anymore */
 291                if (prev_status.state != RTE_TIMER_PENDING)
 292                        return -1;
 293
 294                /* we know that the timer will be pending at this point
 295                 * mark it atomically as being running
 296                 */
 297                status.state = RTE_TIMER_RUNNING;
 298                status.owner = (int16_t)lcore_id;
 299                /* RUNNING states are acting as locked states. If the
 300                 * timer is in RUNNING state, the state cannot be changed
 301                 * by other threads. So, we should use ACQUIRE here.
 302                 */
 303                success = __atomic_compare_exchange_n(&tim->status.u32,
 304                                              &prev_status.u32,
 305                                              status.u32, 0,
 306                                              __ATOMIC_ACQUIRE,
 307                                              __ATOMIC_RELAXED);
 308        }
 309
 310        return 0;
 311}
 312
 313/*
 314 * Return a skiplist level for a new entry.
 315 * This probabilistically gives a level with p=1/4 that an entry at level n
 316 * will also appear at level n+1.
 317 */
 318static uint32_t
 319timer_get_skiplist_level(unsigned curr_depth)
 320{
 321#ifdef RTE_LIBRTE_TIMER_DEBUG
 322        static uint32_t i, count = 0;
 323        static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0};
 324#endif
 325
 326        /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1,
 327         * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest
 328         * bit position of a (pseudo)random number.
 329         */
 330        uint32_t rand = rte_rand() & (UINT32_MAX - 1);
 331        uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2;
 332
 333        /* limit the levels used to one above our current level, so we don't,
 334         * for instance, have a level 0 and a level 7 without anything between
 335         */
 336        if (level > curr_depth)
 337                level = curr_depth;
 338        if (level >= MAX_SKIPLIST_DEPTH)
 339                level = MAX_SKIPLIST_DEPTH-1;
 340#ifdef RTE_LIBRTE_TIMER_DEBUG
 341        count ++;
 342        levels[level]++;
 343        if (count % 10000 == 0)
 344                for (i = 0; i < MAX_SKIPLIST_DEPTH; i++)
 345                        printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]);
 346#endif
 347        return level;
 348}
 349
 350/*
 351 * For a given time value, get the entries at each level which
 352 * are <= that time value.
 353 */
 354static void
 355timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore,
 356                       struct rte_timer **prev, struct priv_timer *priv_timer)
 357{
 358        unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth;
 359        prev[lvl] = &priv_timer[tim_lcore].pending_head;
 360        while(lvl != 0) {
 361                lvl--;
 362                prev[lvl] = prev[lvl+1];
 363                while (prev[lvl]->sl_next[lvl] &&
 364                                prev[lvl]->sl_next[lvl]->expire <= time_val)
 365                        prev[lvl] = prev[lvl]->sl_next[lvl];
 366        }
 367}
 368
 369/*
 370 * Given a timer node in the skiplist, find the previous entries for it at
 371 * all skiplist levels.
 372 */
 373static void
 374timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
 375                                struct rte_timer **prev,
 376                                struct priv_timer *priv_timer)
 377{
 378        int i;
 379
 380        /* to get a specific entry in the list, look for just lower than the time
 381         * values, and then increment on each level individually if necessary
 382         */
 383        timer_get_prev_entries(tim->expire - 1, tim_lcore, prev, priv_timer);
 384        for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) {
 385                while (prev[i]->sl_next[i] != NULL &&
 386                                prev[i]->sl_next[i] != tim &&
 387                                prev[i]->sl_next[i]->expire <= tim->expire)
 388                        prev[i] = prev[i]->sl_next[i];
 389        }
 390}
 391
 392/* call with lock held as necessary
 393 * add in list
 394 * timer must be in config state
 395 * timer must not be in a list
 396 */
 397static void
 398timer_add(struct rte_timer *tim, unsigned int tim_lcore,
 399          struct priv_timer *priv_timer)
 400{
 401        unsigned lvl;
 402        struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
 403
 404        /* find where exactly this element goes in the list of elements
 405         * for each depth. */
 406        timer_get_prev_entries(tim->expire, tim_lcore, prev, priv_timer);
 407
 408        /* now assign it a new level and add at that level */
 409        const unsigned tim_level = timer_get_skiplist_level(
 410                        priv_timer[tim_lcore].curr_skiplist_depth);
 411        if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth)
 412                priv_timer[tim_lcore].curr_skiplist_depth++;
 413
 414        lvl = tim_level;
 415        while (lvl > 0) {
 416                tim->sl_next[lvl] = prev[lvl]->sl_next[lvl];
 417                prev[lvl]->sl_next[lvl] = tim;
 418                lvl--;
 419        }
 420        tim->sl_next[0] = prev[0]->sl_next[0];
 421        prev[0]->sl_next[0] = tim;
 422
 423        /* save the lowest list entry into the expire field of the dummy hdr
 424         * NOTE: this is not atomic on 32-bit*/
 425        priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
 426                        pending_head.sl_next[0]->expire;
 427}
 428
 429/*
 430 * del from list, lock if needed
 431 * timer must be in config state
 432 * timer must be in a list
 433 */
 434static void
 435timer_del(struct rte_timer *tim, union rte_timer_status prev_status,
 436          int local_is_locked, struct priv_timer *priv_timer)
 437{
 438        unsigned lcore_id = rte_lcore_id();
 439        unsigned prev_owner = prev_status.owner;
 440        int i;
 441        struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
 442
 443        /* if timer needs is pending another core, we need to lock the
 444         * list; if it is on local core, we need to lock if we are not
 445         * called from rte_timer_manage() */
 446        if (prev_owner != lcore_id || !local_is_locked)
 447                rte_spinlock_lock(&priv_timer[prev_owner].list_lock);
 448
 449        /* save the lowest list entry into the expire field of the dummy hdr.
 450         * NOTE: this is not atomic on 32-bit */
 451        if (tim == priv_timer[prev_owner].pending_head.sl_next[0])
 452                priv_timer[prev_owner].pending_head.expire =
 453                                ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire);
 454
 455        /* adjust pointers from previous entries to point past this */
 456        timer_get_prev_entries_for_node(tim, prev_owner, prev, priv_timer);
 457        for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) {
 458                if (prev[i]->sl_next[i] == tim)
 459                        prev[i]->sl_next[i] = tim->sl_next[i];
 460        }
 461
 462        /* in case we deleted last entry at a level, adjust down max level */
 463        for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--)
 464                if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL)
 465                        priv_timer[prev_owner].curr_skiplist_depth --;
 466                else
 467                        break;
 468
 469        if (prev_owner != lcore_id || !local_is_locked)
 470                rte_spinlock_unlock(&priv_timer[prev_owner].list_lock);
 471}
 472
 473/* Reset and start the timer associated with the timer handle (private func) */
 474static int
 475__rte_timer_reset(struct rte_timer *tim, uint64_t expire,
 476                  uint64_t period, unsigned tim_lcore,
 477                  rte_timer_cb_t fct, void *arg,
 478                  int local_is_locked,
 479                  struct rte_timer_data *timer_data)
 480{
 481        union rte_timer_status prev_status, status;
 482        int ret;
 483        unsigned lcore_id = rte_lcore_id();
 484        struct priv_timer *priv_timer = timer_data->priv_timer;
 485
 486        /* round robin for tim_lcore */
 487        if (tim_lcore == (unsigned)LCORE_ID_ANY) {
 488                if (lcore_id < RTE_MAX_LCORE) {
 489                        /* EAL thread with valid lcore_id */
 490                        tim_lcore = rte_get_next_lcore(
 491                                priv_timer[lcore_id].prev_lcore,
 492                                0, 1);
 493                        priv_timer[lcore_id].prev_lcore = tim_lcore;
 494                } else
 495                        /* non-EAL thread do not run rte_timer_manage(),
 496                         * so schedule the timer on the first enabled lcore. */
 497                        tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1);
 498        }
 499
 500        /* wait that the timer is in correct status before update,
 501         * and mark it as being configured */
 502        ret = timer_set_config_state(tim, &prev_status, priv_timer);
 503        if (ret < 0)
 504                return -1;
 505
 506        __TIMER_STAT_ADD(priv_timer, reset, 1);
 507        if (prev_status.state == RTE_TIMER_RUNNING &&
 508            lcore_id < RTE_MAX_LCORE) {
 509                priv_timer[lcore_id].updated = 1;
 510        }
 511
 512        /* remove it from list */
 513        if (prev_status.state == RTE_TIMER_PENDING) {
 514                timer_del(tim, prev_status, local_is_locked, priv_timer);
 515                __TIMER_STAT_ADD(priv_timer, pending, -1);
 516        }
 517
 518        tim->period = period;
 519        tim->expire = expire;
 520        tim->f = fct;
 521        tim->arg = arg;
 522
 523        /* if timer needs to be scheduled on another core, we need to
 524         * lock the destination list; if it is on local core, we need to lock if
 525         * we are not called from rte_timer_manage()
 526         */
 527        if (tim_lcore != lcore_id || !local_is_locked)
 528                rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
 529
 530        __TIMER_STAT_ADD(priv_timer, pending, 1);
 531        timer_add(tim, tim_lcore, priv_timer);
 532
 533        /* update state: as we are in CONFIG state, only us can modify
 534         * the state so we don't need to use cmpset() here */
 535        status.state = RTE_TIMER_PENDING;
 536        status.owner = (int16_t)tim_lcore;
 537        /* The "RELEASE" ordering guarantees the memory operations above
 538         * the status update are observed before the update by all threads
 539         */
 540        __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
 541
 542        if (tim_lcore != lcore_id || !local_is_locked)
 543                rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
 544
 545        return 0;
 546}
 547
 548/* Reset and start the timer associated with the timer handle tim */
 549int
 550rte_timer_reset(struct rte_timer *tim, uint64_t ticks,
 551                      enum rte_timer_type type, unsigned int tim_lcore,
 552                      rte_timer_cb_t fct, void *arg)
 553{
 554        return rte_timer_alt_reset(default_data_id, tim, ticks, type,
 555                                   tim_lcore, fct, arg);
 556}
 557
 558int
 559rte_timer_alt_reset(uint32_t timer_data_id, struct rte_timer *tim,
 560                    uint64_t ticks, enum rte_timer_type type,
 561                    unsigned int tim_lcore, rte_timer_cb_t fct, void *arg)
 562{
 563        uint64_t cur_time = rte_get_timer_cycles();
 564        uint64_t period;
 565        struct rte_timer_data *timer_data;
 566
 567        TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
 568
 569        if (type == PERIODICAL)
 570                period = ticks;
 571        else
 572                period = 0;
 573
 574        return __rte_timer_reset(tim,  cur_time + ticks, period, tim_lcore,
 575                                 fct, arg, 0, timer_data);
 576}
 577
 578/* loop until rte_timer_reset() succeed */
 579void
 580rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks,
 581                     enum rte_timer_type type, unsigned tim_lcore,
 582                     rte_timer_cb_t fct, void *arg)
 583{
 584        while (rte_timer_reset(tim, ticks, type, tim_lcore,
 585                               fct, arg) != 0)
 586                rte_pause();
 587}
 588
 589static int
 590__rte_timer_stop(struct rte_timer *tim, int local_is_locked,
 591                 struct rte_timer_data *timer_data)
 592{
 593        union rte_timer_status prev_status, status;
 594        unsigned lcore_id = rte_lcore_id();
 595        int ret;
 596        struct priv_timer *priv_timer = timer_data->priv_timer;
 597
 598        /* wait that the timer is in correct status before update,
 599         * and mark it as being configured */
 600        ret = timer_set_config_state(tim, &prev_status, priv_timer);
 601        if (ret < 0)
 602                return -1;
 603
 604        __TIMER_STAT_ADD(priv_timer, stop, 1);
 605        if (prev_status.state == RTE_TIMER_RUNNING &&
 606            lcore_id < RTE_MAX_LCORE) {
 607                priv_timer[lcore_id].updated = 1;
 608        }
 609
 610        /* remove it from list */
 611        if (prev_status.state == RTE_TIMER_PENDING) {
 612                timer_del(tim, prev_status, local_is_locked, priv_timer);
 613                __TIMER_STAT_ADD(priv_timer, pending, -1);
 614        }
 615
 616        /* mark timer as stopped */
 617        status.state = RTE_TIMER_STOP;
 618        status.owner = RTE_TIMER_NO_OWNER;
 619        /* The "RELEASE" ordering guarantees the memory operations above
 620         * the status update are observed before the update by all threads
 621         */
 622        __atomic_store_n(&tim->status.u32, status.u32, __ATOMIC_RELEASE);
 623
 624        return 0;
 625}
 626
 627/* Stop the timer associated with the timer handle tim */
 628int
 629rte_timer_stop(struct rte_timer *tim)
 630{
 631        return rte_timer_alt_stop(default_data_id, tim);
 632}
 633
 634int
 635rte_timer_alt_stop(uint32_t timer_data_id, struct rte_timer *tim)
 636{
 637        struct rte_timer_data *timer_data;
 638
 639        TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
 640
 641        return __rte_timer_stop(tim, 0, timer_data);
 642}
 643
 644/* loop until rte_timer_stop() succeed */
 645void
 646rte_timer_stop_sync(struct rte_timer *tim)
 647{
 648        while (rte_timer_stop(tim) != 0)
 649                rte_pause();
 650}
 651
 652/* Test the PENDING status of the timer handle tim */
 653int
 654rte_timer_pending(struct rte_timer *tim)
 655{
 656        return __atomic_load_n(&tim->status.state,
 657                                __ATOMIC_RELAXED) == RTE_TIMER_PENDING;
 658}
 659
 660/* must be called periodically, run all timer that expired */
 661static void
 662__rte_timer_manage(struct rte_timer_data *timer_data)
 663{
 664        union rte_timer_status status;
 665        struct rte_timer *tim, *next_tim;
 666        struct rte_timer *run_first_tim, **pprev;
 667        unsigned lcore_id = rte_lcore_id();
 668        struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
 669        uint64_t cur_time;
 670        int i, ret;
 671        struct priv_timer *priv_timer = timer_data->priv_timer;
 672
 673        /* timer manager only runs on EAL thread with valid lcore_id */
 674        assert(lcore_id < RTE_MAX_LCORE);
 675
 676        __TIMER_STAT_ADD(priv_timer, manage, 1);
 677        /* optimize for the case where per-cpu list is empty */
 678        if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL)
 679                return;
 680        cur_time = rte_get_timer_cycles();
 681
 682#ifdef RTE_ARCH_64
 683        /* on 64-bit the value cached in the pending_head.expired will be
 684         * updated atomically, so we can consult that for a quick check here
 685         * outside the lock */
 686        if (likely(priv_timer[lcore_id].pending_head.expire > cur_time))
 687                return;
 688#endif
 689
 690        /* browse ordered list, add expired timers in 'expired' list */
 691        rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
 692
 693        /* if nothing to do just unlock and return */
 694        if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL ||
 695            priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) {
 696                rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
 697                return;
 698        }
 699
 700        /* save start of list of expired timers */
 701        tim = priv_timer[lcore_id].pending_head.sl_next[0];
 702
 703        /* break the existing list at current time point */
 704        timer_get_prev_entries(cur_time, lcore_id, prev, priv_timer);
 705        for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) {
 706                if (prev[i] == &priv_timer[lcore_id].pending_head)
 707                        continue;
 708                priv_timer[lcore_id].pending_head.sl_next[i] =
 709                    prev[i]->sl_next[i];
 710                if (prev[i]->sl_next[i] == NULL)
 711                        priv_timer[lcore_id].curr_skiplist_depth--;
 712                prev[i] ->sl_next[i] = NULL;
 713        }
 714
 715        /* transition run-list from PENDING to RUNNING */
 716        run_first_tim = tim;
 717        pprev = &run_first_tim;
 718
 719        for ( ; tim != NULL; tim = next_tim) {
 720                next_tim = tim->sl_next[0];
 721
 722                ret = timer_set_running_state(tim);
 723                if (likely(ret == 0)) {
 724                        pprev = &tim->sl_next[0];
 725                } else {
 726                        /* another core is trying to re-config this one,
 727                         * remove it from local expired list
 728                         */
 729                        *pprev = next_tim;
 730                }
 731        }
 732
 733        /* update the next to expire timer value */
 734        priv_timer[lcore_id].pending_head.expire =
 735            (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 :
 736                priv_timer[lcore_id].pending_head.sl_next[0]->expire;
 737
 738        rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
 739
 740        /* now scan expired list and call callbacks */
 741        for (tim = run_first_tim; tim != NULL; tim = next_tim) {
 742                next_tim = tim->sl_next[0];
 743                priv_timer[lcore_id].updated = 0;
 744                priv_timer[lcore_id].running_tim = tim;
 745
 746                /* execute callback function with list unlocked */
 747                tim->f(tim, tim->arg);
 748
 749                __TIMER_STAT_ADD(priv_timer, pending, -1);
 750                /* the timer was stopped or reloaded by the callback
 751                 * function, we have nothing to do here */
 752                if (priv_timer[lcore_id].updated == 1)
 753                        continue;
 754
 755                if (tim->period == 0) {
 756                        /* remove from done list and mark timer as stopped */
 757                        status.state = RTE_TIMER_STOP;
 758                        status.owner = RTE_TIMER_NO_OWNER;
 759                        /* The "RELEASE" ordering guarantees the memory
 760                         * operations above the status update are observed
 761                         * before the update by all threads
 762                         */
 763                        __atomic_store_n(&tim->status.u32, status.u32,
 764                                __ATOMIC_RELEASE);
 765                }
 766                else {
 767                        /* keep it in list and mark timer as pending */
 768                        rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
 769                        status.state = RTE_TIMER_PENDING;
 770                        __TIMER_STAT_ADD(priv_timer, pending, 1);
 771                        status.owner = (int16_t)lcore_id;
 772                        /* The "RELEASE" ordering guarantees the memory
 773                         * operations above the status update are observed
 774                         * before the update by all threads
 775                         */
 776                        __atomic_store_n(&tim->status.u32, status.u32,
 777                                __ATOMIC_RELEASE);
 778                        __rte_timer_reset(tim, tim->expire + tim->period,
 779                                tim->period, lcore_id, tim->f, tim->arg, 1,
 780                                timer_data);
 781                        rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
 782                }
 783        }
 784        priv_timer[lcore_id].running_tim = NULL;
 785}
 786
 787int
 788rte_timer_manage(void)
 789{
 790        struct rte_timer_data *timer_data;
 791
 792        TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
 793
 794        __rte_timer_manage(timer_data);
 795
 796        return 0;
 797}
 798
 799int
 800rte_timer_alt_manage(uint32_t timer_data_id,
 801                     unsigned int *poll_lcores,
 802                     int nb_poll_lcores,
 803                     rte_timer_alt_manage_cb_t f)
 804{
 805        unsigned int default_poll_lcores[] = {rte_lcore_id()};
 806        union rte_timer_status status;
 807        struct rte_timer *tim, *next_tim, **pprev;
 808        struct rte_timer *run_first_tims[RTE_MAX_LCORE];
 809        unsigned int this_lcore = rte_lcore_id();
 810        struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1];
 811        uint64_t cur_time;
 812        int i, j, ret;
 813        int nb_runlists = 0;
 814        struct rte_timer_data *data;
 815        struct priv_timer *privp;
 816        uint32_t poll_lcore;
 817
 818        TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, data, -EINVAL);
 819
 820        /* timer manager only runs on EAL thread with valid lcore_id */
 821        assert(this_lcore < RTE_MAX_LCORE);
 822
 823        __TIMER_STAT_ADD(data->priv_timer, manage, 1);
 824
 825        if (poll_lcores == NULL) {
 826                poll_lcores = default_poll_lcores;
 827                nb_poll_lcores = RTE_DIM(default_poll_lcores);
 828        }
 829
 830        for (i = 0; i < nb_poll_lcores; i++) {
 831                poll_lcore = poll_lcores[i];
 832                privp = &data->priv_timer[poll_lcore];
 833
 834                /* optimize for the case where per-cpu list is empty */
 835                if (privp->pending_head.sl_next[0] == NULL)
 836                        continue;
 837                cur_time = rte_get_timer_cycles();
 838
 839#ifdef RTE_ARCH_64
 840                /* on 64-bit the value cached in the pending_head.expired will
 841                 * be updated atomically, so we can consult that for a quick
 842                 * check here outside the lock
 843                 */
 844                if (likely(privp->pending_head.expire > cur_time))
 845                        continue;
 846#endif
 847
 848                /* browse ordered list, add expired timers in 'expired' list */
 849                rte_spinlock_lock(&privp->list_lock);
 850
 851                /* if nothing to do just unlock and return */
 852                if (privp->pending_head.sl_next[0] == NULL ||
 853                    privp->pending_head.sl_next[0]->expire > cur_time) {
 854                        rte_spinlock_unlock(&privp->list_lock);
 855                        continue;
 856                }
 857
 858                /* save start of list of expired timers */
 859                tim = privp->pending_head.sl_next[0];
 860
 861                /* break the existing list at current time point */
 862                timer_get_prev_entries(cur_time, poll_lcore, prev,
 863                                       data->priv_timer);
 864                for (j = privp->curr_skiplist_depth - 1; j >= 0; j--) {
 865                        if (prev[j] == &privp->pending_head)
 866                                continue;
 867                        privp->pending_head.sl_next[j] =
 868                                prev[j]->sl_next[j];
 869                        if (prev[j]->sl_next[j] == NULL)
 870                                privp->curr_skiplist_depth--;
 871
 872                        prev[j]->sl_next[j] = NULL;
 873                }
 874
 875                /* transition run-list from PENDING to RUNNING */
 876                run_first_tims[nb_runlists] = tim;
 877                pprev = &run_first_tims[nb_runlists];
 878                nb_runlists++;
 879
 880                for ( ; tim != NULL; tim = next_tim) {
 881                        next_tim = tim->sl_next[0];
 882
 883                        ret = timer_set_running_state(tim);
 884                        if (likely(ret == 0)) {
 885                                pprev = &tim->sl_next[0];
 886                        } else {
 887                                /* another core is trying to re-config this one,
 888                                 * remove it from local expired list
 889                                 */
 890                                *pprev = next_tim;
 891                        }
 892                }
 893
 894                /* update the next to expire timer value */
 895                privp->pending_head.expire =
 896                    (privp->pending_head.sl_next[0] == NULL) ? 0 :
 897                        privp->pending_head.sl_next[0]->expire;
 898
 899                rte_spinlock_unlock(&privp->list_lock);
 900        }
 901
 902        /* Now process the run lists */
 903        while (1) {
 904                bool done = true;
 905                uint64_t min_expire = UINT64_MAX;
 906                int min_idx = 0;
 907
 908                /* Find the next oldest timer to process */
 909                for (i = 0; i < nb_runlists; i++) {
 910                        tim = run_first_tims[i];
 911
 912                        if (tim != NULL && tim->expire < min_expire) {
 913                                min_expire = tim->expire;
 914                                min_idx = i;
 915                                done = false;
 916                        }
 917                }
 918
 919                if (done)
 920                        break;
 921
 922                tim = run_first_tims[min_idx];
 923
 924                /* Move down the runlist from which we picked a timer to
 925                 * execute
 926                 */
 927                run_first_tims[min_idx] = run_first_tims[min_idx]->sl_next[0];
 928
 929                data->priv_timer[this_lcore].updated = 0;
 930                data->priv_timer[this_lcore].running_tim = tim;
 931
 932                /* Call the provided callback function */
 933                f(tim);
 934
 935                __TIMER_STAT_ADD(data->priv_timer, pending, -1);
 936
 937                /* the timer was stopped or reloaded by the callback
 938                 * function, we have nothing to do here
 939                 */
 940                if (data->priv_timer[this_lcore].updated == 1)
 941                        continue;
 942
 943                if (tim->period == 0) {
 944                        /* remove from done list and mark timer as stopped */
 945                        status.state = RTE_TIMER_STOP;
 946                        status.owner = RTE_TIMER_NO_OWNER;
 947                        /* The "RELEASE" ordering guarantees the memory
 948                         * operations above the status update are observed
 949                         * before the update by all threads
 950                         */
 951                        __atomic_store_n(&tim->status.u32, status.u32,
 952                                __ATOMIC_RELEASE);
 953                } else {
 954                        /* keep it in list and mark timer as pending */
 955                        rte_spinlock_lock(
 956                                &data->priv_timer[this_lcore].list_lock);
 957                        status.state = RTE_TIMER_PENDING;
 958                        __TIMER_STAT_ADD(data->priv_timer, pending, 1);
 959                        status.owner = (int16_t)this_lcore;
 960                        /* The "RELEASE" ordering guarantees the memory
 961                         * operations above the status update are observed
 962                         * before the update by all threads
 963                         */
 964                        __atomic_store_n(&tim->status.u32, status.u32,
 965                                __ATOMIC_RELEASE);
 966                        __rte_timer_reset(tim, tim->expire + tim->period,
 967                                tim->period, this_lcore, tim->f, tim->arg, 1,
 968                                data);
 969                        rte_spinlock_unlock(
 970                                &data->priv_timer[this_lcore].list_lock);
 971                }
 972
 973                data->priv_timer[this_lcore].running_tim = NULL;
 974        }
 975
 976        return 0;
 977}
 978
 979/* Walk pending lists, stopping timers and calling user-specified function */
 980int
 981rte_timer_stop_all(uint32_t timer_data_id, unsigned int *walk_lcores,
 982                   int nb_walk_lcores,
 983                   rte_timer_stop_all_cb_t f, void *f_arg)
 984{
 985        int i;
 986        struct priv_timer *priv_timer;
 987        uint32_t walk_lcore;
 988        struct rte_timer *tim, *next_tim;
 989        struct rte_timer_data *timer_data;
 990
 991        TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
 992
 993        for (i = 0; i < nb_walk_lcores; i++) {
 994                walk_lcore = walk_lcores[i];
 995                priv_timer = &timer_data->priv_timer[walk_lcore];
 996
 997                rte_spinlock_lock(&priv_timer->list_lock);
 998
 999                for (tim = priv_timer->pending_head.sl_next[0];
1000                     tim != NULL;
1001                     tim = next_tim) {
1002                        next_tim = tim->sl_next[0];
1003
1004                        /* Call timer_stop with lock held */
1005                        __rte_timer_stop(tim, 1, timer_data);
1006
1007                        if (f)
1008                                f(tim, f_arg);
1009                }
1010
1011                rte_spinlock_unlock(&priv_timer->list_lock);
1012        }
1013
1014        return 0;
1015}
1016
1017int64_t
1018rte_timer_next_ticks(void)
1019{
1020        unsigned int lcore_id = rte_lcore_id();
1021        struct rte_timer_data *timer_data;
1022        struct priv_timer *priv_timer;
1023        const struct rte_timer *tm;
1024        uint64_t cur_time;
1025        int64_t left = -ENOENT;
1026
1027        TIMER_DATA_VALID_GET_OR_ERR_RET(default_data_id, timer_data, -EINVAL);
1028
1029        priv_timer = timer_data->priv_timer;
1030        cur_time = rte_get_timer_cycles();
1031
1032        rte_spinlock_lock(&priv_timer[lcore_id].list_lock);
1033        tm = priv_timer[lcore_id].pending_head.sl_next[0];
1034        if (tm) {
1035                left = tm->expire - cur_time;
1036                if (left < 0)
1037                        left = 0;
1038        }
1039        rte_spinlock_unlock(&priv_timer[lcore_id].list_lock);
1040
1041        return left;
1042}
1043
1044/* dump statistics about timers */
1045static void
1046__rte_timer_dump_stats(struct rte_timer_data *timer_data __rte_unused, FILE *f)
1047{
1048#ifdef RTE_LIBRTE_TIMER_DEBUG
1049        struct rte_timer_debug_stats sum;
1050        unsigned lcore_id;
1051        struct priv_timer *priv_timer = timer_data->priv_timer;
1052
1053        memset(&sum, 0, sizeof(sum));
1054        for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1055                sum.reset += priv_timer[lcore_id].stats.reset;
1056                sum.stop += priv_timer[lcore_id].stats.stop;
1057                sum.manage += priv_timer[lcore_id].stats.manage;
1058                sum.pending += priv_timer[lcore_id].stats.pending;
1059        }
1060        fprintf(f, "Timer statistics:\n");
1061        fprintf(f, "  reset = %"PRIu64"\n", sum.reset);
1062        fprintf(f, "  stop = %"PRIu64"\n", sum.stop);
1063        fprintf(f, "  manage = %"PRIu64"\n", sum.manage);
1064        fprintf(f, "  pending = %"PRIu64"\n", sum.pending);
1065#else
1066        fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n");
1067#endif
1068}
1069
1070int
1071rte_timer_dump_stats(FILE *f)
1072{
1073        return rte_timer_alt_dump_stats(default_data_id, f);
1074}
1075
1076int
1077rte_timer_alt_dump_stats(uint32_t timer_data_id __rte_unused, FILE *f)
1078{
1079        struct rte_timer_data *timer_data;
1080
1081        TIMER_DATA_VALID_GET_OR_ERR_RET(timer_data_id, timer_data, -EINVAL);
1082
1083        __rte_timer_dump_stats(timer_data, f);
1084
1085        return 0;
1086}
1087