dpdk/app/test/test_barrier.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2018 Intel Corporation
   3 */
   4
   5 /*
   6  * This is a simple functional test for rte_smp_mb() implementation.
   7  * I.E. make sure that LOAD and STORE operations that precede the
   8  * rte_smp_mb() call are globally visible across the lcores
   9  * before the LOAD and STORE operations that follows it.
  10  * The test uses simple implementation of Peterson's lock algorithm
  11  * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm)
  12  * for two execution units to make sure that rte_smp_mb() prevents
  13  * store-load reordering to happen.
  14  * Also when executed on a single lcore could be used as a approxiamate
  15  * estimation of number of cycles particular implementation of rte_smp_mb()
  16  * will take.
  17  */
  18
  19#include <stdio.h>
  20#include <string.h>
  21#include <stdint.h>
  22#include <inttypes.h>
  23
  24#include <rte_memory.h>
  25#include <rte_per_lcore.h>
  26#include <rte_launch.h>
  27#include <rte_eal.h>
  28#include <rte_lcore.h>
  29#include <rte_pause.h>
  30#include <rte_random.h>
  31#include <rte_cycles.h>
  32#include <rte_vect.h>
  33#include <rte_debug.h>
  34
  35#include "test.h"
  36
  37#define ADD_MAX         8
  38#define ITER_MAX        0x1000000
  39
  40enum plock_use_type {
  41        USE_MB,
  42        USE_SMP_MB,
  43        USE_NUM
  44};
  45
  46struct plock {
  47        volatile uint32_t flag[2];
  48        volatile uint32_t victim;
  49        enum plock_use_type utype;
  50};
  51
  52/*
  53 * Lock plus protected by it two counters.
  54 */
  55struct plock_test {
  56        struct plock lock;
  57        uint64_t val;
  58        uint64_t iter;
  59};
  60
  61/*
  62 * Each active lcore shares plock_test struct with it's left and right
  63 * neighbours.
  64 */
  65struct lcore_plock_test {
  66        struct plock_test *pt[2]; /* shared, lock-protected data */
  67        uint64_t sum[2];          /* local copy of the shared data */
  68        uint64_t iter;            /* number of iterations to perform */
  69        uint32_t lc;              /* given lcore id */
  70};
  71
  72static inline void
  73store_load_barrier(uint32_t utype)
  74{
  75        if (utype == USE_MB)
  76                rte_mb();
  77        else if (utype == USE_SMP_MB)
  78                rte_smp_mb();
  79        else
  80                RTE_VERIFY(0);
  81}
  82
  83/*
  84 * Peterson lock implementation.
  85 */
  86static void
  87plock_lock(struct plock *l, uint32_t self)
  88{
  89        uint32_t other;
  90
  91        other = self ^ 1;
  92
  93        l->flag[self] = 1;
  94        rte_smp_wmb();
  95        l->victim = self;
  96
  97        store_load_barrier(l->utype);
  98
  99        while (l->flag[other] == 1 && l->victim == self)
 100                rte_pause();
 101        rte_smp_rmb();
 102}
 103
 104static void
 105plock_unlock(struct plock *l, uint32_t self)
 106{
 107        rte_smp_wmb();
 108        l->flag[self] = 0;
 109}
 110
 111static void
 112plock_reset(struct plock *l, enum plock_use_type utype)
 113{
 114        memset(l, 0, sizeof(*l));
 115        l->utype = utype;
 116}
 117
 118/*
 119 * grab the lock, update both counters, release the lock.
 120 */
 121static void
 122plock_add(struct plock_test *pt, uint32_t self, uint32_t n)
 123{
 124        plock_lock(&pt->lock, self);
 125        pt->iter++;
 126        pt->val += n;
 127        plock_unlock(&pt->lock, self);
 128}
 129
 130static int
 131plock_test1_lcore(void *data)
 132{
 133        uint64_t tm;
 134        uint32_t lc, ln;
 135        uint64_t i, n;
 136        struct lcore_plock_test *lpt;
 137
 138        lpt = data;
 139        lc = rte_lcore_id();
 140
 141        /* find lcore_plock_test struct for given lcore */
 142        for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--)
 143                ;
 144
 145        if (ln == 0) {
 146                printf("%s(%u) error at init\n", __func__, lc);
 147                return -1;
 148        }
 149
 150        n = rte_rand() % ADD_MAX;
 151        tm = rte_get_timer_cycles();
 152
 153        /*
 154         * for each iteration:
 155         * - update shared, locked protected data in a safe manner
 156         * - update local copy of the shared data
 157         */
 158        for (i = 0; i != lpt->iter; i++) {
 159
 160                plock_add(lpt->pt[0], 0, n);
 161                plock_add(lpt->pt[1], 1, n);
 162
 163                lpt->sum[0] += n;
 164                lpt->sum[1] += n;
 165
 166                n = (n + 1) % ADD_MAX;
 167        }
 168
 169        tm = rte_get_timer_cycles() - tm;
 170
 171        printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64
 172                " cycles, %#Lf cycles/iteration, "
 173                "local sum={%" PRIu64 ", %" PRIu64 "}\n",
 174                __func__, lc, i, tm, (long double)tm / i,
 175                lpt->sum[0], lpt->sum[1]);
 176        return 0;
 177}
 178
 179/*
 180 * For N active lcores we allocate N+1 lcore_plock_test structures.
 181 * Each active lcore shares one lcore_plock_test structure with its
 182 * left lcore neighbor and one lcore_plock_test structure with its
 183 * right lcore neighbor.
 184 * During the test each lcore updates data in both shared structures and
 185 * its local copies. Then at validation phase we check that our shared
 186 * and local data are the same.
 187 */
 188static int
 189plock_test(uint64_t iter, enum plock_use_type utype)
 190{
 191        int32_t rc;
 192        uint32_t i, lc, n;
 193        uint64_t *sum;
 194        struct plock_test *pt;
 195        struct lcore_plock_test *lpt;
 196
 197        /* init phase, allocate and initialize shared data */
 198
 199        n = rte_lcore_count();
 200        pt = calloc(n + 1, sizeof(*pt));
 201        lpt = calloc(n, sizeof(*lpt));
 202        sum = calloc(n + 1, sizeof(*sum));
 203
 204        printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n",
 205                __func__, iter, utype, n);
 206
 207        if (pt == NULL || lpt == NULL || sum == NULL) {
 208                printf("%s: failed to allocate memory for %u lcores\n",
 209                        __func__, n);
 210                free(pt);
 211                free(lpt);
 212                free(sum);
 213                return -ENOMEM;
 214        }
 215
 216        for (i = 0; i != n + 1; i++)
 217                plock_reset(&pt[i].lock, utype);
 218
 219        i = 0;
 220        RTE_LCORE_FOREACH(lc) {
 221
 222                lpt[i].lc = lc;
 223                lpt[i].iter = iter;
 224                lpt[i].pt[0] = pt + i;
 225                lpt[i].pt[1] = pt + i + 1;
 226                i++;
 227        }
 228
 229        lpt[i - 1].pt[1] = pt;
 230
 231        for (i = 0; i != n; i++)
 232                printf("lpt[%u]={lc=%u, pt={%p, %p},};\n",
 233                        i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]);
 234
 235
 236        /* test phase - start and wait for completion on each active lcore */
 237
 238        rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MAIN);
 239        rte_eal_mp_wait_lcore();
 240
 241        /* validation phase - make sure that shared and local data match */
 242
 243        for (i = 0; i != n; i++) {
 244                sum[i] += lpt[i].sum[0];
 245                sum[i + 1] += lpt[i].sum[1];
 246        }
 247
 248        sum[0] += sum[i];
 249
 250        rc = 0;
 251        for (i = 0; i != n; i++) {
 252                printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n",
 253                        __func__, i, sum[i], i, pt[i].val, i, pt[i].iter);
 254
 255                /* race condition occurred, lock doesn't work properly */
 256                if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) {
 257                        printf("error: local and shared sums don't match\n");
 258                        rc = -1;
 259                }
 260        }
 261
 262        free(pt);
 263        free(lpt);
 264        free(sum);
 265
 266        printf("%s(utype=%u) returns %d\n", __func__, utype, rc);
 267        return rc;
 268}
 269
 270static int
 271test_barrier(void)
 272{
 273        int32_t i, ret, rc[USE_NUM];
 274
 275        for (i = 0; i != RTE_DIM(rc); i++)
 276                rc[i] = plock_test(ITER_MAX, i);
 277
 278        ret = 0;
 279        for (i = 0; i != RTE_DIM(rc); i++) {
 280                printf("%s for utype=%d %s\n",
 281                        __func__, i, rc[i] == 0 ? "passed" : "failed");
 282                ret |= rc[i];
 283        }
 284
 285        return ret;
 286}
 287
 288REGISTER_TEST_COMMAND(barrier_autotest, test_barrier);
 289