qemu/util/lockcnt.c
<<
>>
Prefs
   1/*
   2 * QemuLockCnt implementation
   3 *
   4 * Copyright Red Hat, Inc. 2017
   5 *
   6 * Author:
   7 *   Paolo Bonzini <pbonzini@redhat.com>
   8 */
   9#include "qemu/osdep.h"
  10#include "qemu/thread.h"
  11#include "qemu/atomic.h"
  12#include "trace.h"
  13
  14#ifdef CONFIG_LINUX
  15#include "qemu/futex.h"
  16
  17/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter.
  18 * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok,
  19 * this is not the most relaxing citation I could make...).  It is similar
  20 * to mutex2 in the paper.
  21 */
  22
  23#define QEMU_LOCKCNT_STATE_MASK    3
  24#define QEMU_LOCKCNT_STATE_FREE    0   /* free, uncontended */
  25#define QEMU_LOCKCNT_STATE_LOCKED  1   /* locked, uncontended */
  26#define QEMU_LOCKCNT_STATE_WAITING 2   /* locked, contended */
  27
  28#define QEMU_LOCKCNT_COUNT_STEP    4
  29#define QEMU_LOCKCNT_COUNT_SHIFT   2
  30
  31void qemu_lockcnt_init(QemuLockCnt *lockcnt)
  32{
  33    lockcnt->count = 0;
  34}
  35
  36void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
  37{
  38}
  39
  40/* *val is the current value of lockcnt->count.
  41 *
  42 * If the lock is free, try a cmpxchg from *val to new_if_free; return
  43 * true and set *val to the old value found by the cmpxchg in
  44 * lockcnt->count.
  45 *
  46 * If the lock is taken, wait for it to be released and return false
  47 * *without trying again to take the lock*.  Again, set *val to the
  48 * new value of lockcnt->count.
  49 *
  50 * If *waited is true on return, new_if_free's bottom two bits must not
  51 * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller
  52 * does not know if there are other waiters.  Furthermore, after *waited
  53 * is set the caller has effectively acquired the lock.  If it returns
  54 * with the lock not taken, it must wake another futex waiter.
  55 */
  56static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val,
  57                                         int new_if_free, bool *waited)
  58{
  59    /* Fast path for when the lock is free.  */
  60    if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) {
  61        int expected = *val;
  62
  63        trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free);
  64        *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free);
  65        if (*val == expected) {
  66            trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free);
  67            *val = new_if_free;
  68            return true;
  69        }
  70    }
  71
  72    /* The slow path moves from locked to waiting if necessary, then
  73     * does a futex wait.  Both steps can be repeated ad nauseam,
  74     * only getting out of the loop if we can have another shot at the
  75     * fast path.  Once we can, get out to compute the new destination
  76     * value for the fast path.
  77     */
  78    while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) {
  79        if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) {
  80            int expected = *val;
  81            int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING;
  82
  83            trace_lockcnt_futex_wait_prepare(lockcnt, expected, new);
  84            *val = atomic_cmpxchg(&lockcnt->count, expected, new);
  85            if (*val == expected) {
  86                *val = new;
  87            }
  88            continue;
  89        }
  90
  91        if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) {
  92            *waited = true;
  93            trace_lockcnt_futex_wait(lockcnt, *val);
  94            qemu_futex_wait(&lockcnt->count, *val);
  95            *val = atomic_read(&lockcnt->count);
  96            trace_lockcnt_futex_wait_resume(lockcnt, *val);
  97            continue;
  98        }
  99
 100        abort();
 101    }
 102    return false;
 103}
 104
 105static void lockcnt_wake(QemuLockCnt *lockcnt)
 106{
 107    trace_lockcnt_futex_wake(lockcnt);
 108    qemu_futex_wake(&lockcnt->count, 1);
 109}
 110
 111void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
 112{
 113    int val = atomic_read(&lockcnt->count);
 114    bool waited = false;
 115
 116    for (;;) {
 117        if (val >= QEMU_LOCKCNT_COUNT_STEP) {
 118            int expected = val;
 119            val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP);
 120            if (val == expected) {
 121                break;
 122            }
 123        } else {
 124            /* The fast path is (0, unlocked)->(1, unlocked).  */
 125            if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP,
 126                                             &waited)) {
 127                break;
 128            }
 129        }
 130    }
 131
 132    /* If we were woken by another thread, we should also wake one because
 133     * we are effectively releasing the lock that was given to us.  This is
 134     * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING
 135     * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and
 136     * wake someone.
 137     */
 138    if (waited) {
 139        lockcnt_wake(lockcnt);
 140    }
 141}
 142
 143void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
 144{
 145    atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP);
 146}
 147
 148/* Decrement a counter, and return locked if it is decremented to zero.
 149 * If the function returns true, it is impossible for the counter to
 150 * become nonzero until the next qemu_lockcnt_unlock.
 151 */
 152bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
 153{
 154    int val = atomic_read(&lockcnt->count);
 155    int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
 156    bool waited = false;
 157
 158    for (;;) {
 159        if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) {
 160            int expected = val;
 161            val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP);
 162            if (val == expected) {
 163                break;
 164            }
 165        } else {
 166            /* If count is going 1->0, take the lock. The fast path is
 167             * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
 168             */
 169            if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
 170                return true;
 171            }
 172
 173            if (waited) {
 174                /* At this point we do not know if there are more waiters.  Assume
 175                 * there are.
 176                 */
 177                locked_state = QEMU_LOCKCNT_STATE_WAITING;
 178            }
 179        }
 180    }
 181
 182    /* If we were woken by another thread, but we're returning in unlocked
 183     * state, we should also wake a thread because we are effectively
 184     * releasing the lock that was given to us.  This is the case where
 185     * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
 186     * bits, and qemu_lockcnt_unlock would find it and wake someone.
 187     */
 188    if (waited) {
 189        lockcnt_wake(lockcnt);
 190    }
 191    return false;
 192}
 193
 194/* If the counter is one, decrement it and return locked.  Otherwise do
 195 * nothing.
 196 *
 197 * If the function returns true, it is impossible for the counter to
 198 * become nonzero until the next qemu_lockcnt_unlock.
 199 */
 200bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
 201{
 202    int val = atomic_read(&lockcnt->count);
 203    int locked_state = QEMU_LOCKCNT_STATE_LOCKED;
 204    bool waited = false;
 205
 206    while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) {
 207        /* If count is going 1->0, take the lock. The fast path is
 208         * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting).
 209         */
 210        if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) {
 211            return true;
 212        }
 213
 214        if (waited) {
 215            /* At this point we do not know if there are more waiters.  Assume
 216             * there are.
 217             */
 218            locked_state = QEMU_LOCKCNT_STATE_WAITING;
 219        }
 220    }
 221
 222    /* If we were woken by another thread, but we're returning in unlocked
 223     * state, we should also wake a thread because we are effectively
 224     * releasing the lock that was given to us.  This is the case where
 225     * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low
 226     * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone.
 227     */
 228    if (waited) {
 229        lockcnt_wake(lockcnt);
 230    }
 231    return false;
 232}
 233
 234void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
 235{
 236    int val = atomic_read(&lockcnt->count);
 237    int step = QEMU_LOCKCNT_STATE_LOCKED;
 238    bool waited = false;
 239
 240    /* The third argument is only used if the low bits of val are 0
 241     * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired
 242     * state.
 243     */
 244    while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) {
 245        if (waited) {
 246            /* At this point we do not know if there are more waiters.  Assume
 247             * there are.
 248             */
 249            step = QEMU_LOCKCNT_STATE_WAITING;
 250        }
 251    }
 252}
 253
 254void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
 255{
 256    int expected, new, val;
 257
 258    val = atomic_read(&lockcnt->count);
 259    do {
 260        expected = val;
 261        new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK;
 262        trace_lockcnt_unlock_attempt(lockcnt, val, new);
 263        val = atomic_cmpxchg(&lockcnt->count, val, new);
 264    } while (val != expected);
 265
 266    trace_lockcnt_unlock_success(lockcnt, val, new);
 267    if (val & QEMU_LOCKCNT_STATE_WAITING) {
 268        lockcnt_wake(lockcnt);
 269    }
 270}
 271
 272void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
 273{
 274    int expected, new, val;
 275
 276    val = atomic_read(&lockcnt->count);
 277    do {
 278        expected = val;
 279        new = val & ~QEMU_LOCKCNT_STATE_MASK;
 280        trace_lockcnt_unlock_attempt(lockcnt, val, new);
 281        val = atomic_cmpxchg(&lockcnt->count, val, new);
 282    } while (val != expected);
 283
 284    trace_lockcnt_unlock_success(lockcnt, val, new);
 285    if (val & QEMU_LOCKCNT_STATE_WAITING) {
 286        lockcnt_wake(lockcnt);
 287    }
 288}
 289
 290unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
 291{
 292    return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT;
 293}
 294#else
 295void qemu_lockcnt_init(QemuLockCnt *lockcnt)
 296{
 297    qemu_mutex_init(&lockcnt->mutex);
 298    lockcnt->count = 0;
 299}
 300
 301void qemu_lockcnt_destroy(QemuLockCnt *lockcnt)
 302{
 303    qemu_mutex_destroy(&lockcnt->mutex);
 304}
 305
 306void qemu_lockcnt_inc(QemuLockCnt *lockcnt)
 307{
 308    int old;
 309    for (;;) {
 310        old = atomic_read(&lockcnt->count);
 311        if (old == 0) {
 312            qemu_lockcnt_lock(lockcnt);
 313            qemu_lockcnt_inc_and_unlock(lockcnt);
 314            return;
 315        } else {
 316            if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) {
 317                return;
 318            }
 319        }
 320    }
 321}
 322
 323void qemu_lockcnt_dec(QemuLockCnt *lockcnt)
 324{
 325    atomic_dec(&lockcnt->count);
 326}
 327
 328/* Decrement a counter, and return locked if it is decremented to zero.
 329 * It is impossible for the counter to become nonzero while the mutex
 330 * is taken.
 331 */
 332bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt)
 333{
 334    int val = atomic_read(&lockcnt->count);
 335    while (val > 1) {
 336        int old = atomic_cmpxchg(&lockcnt->count, val, val - 1);
 337        if (old != val) {
 338            val = old;
 339            continue;
 340        }
 341
 342        return false;
 343    }
 344
 345    qemu_lockcnt_lock(lockcnt);
 346    if (atomic_fetch_dec(&lockcnt->count) == 1) {
 347        return true;
 348    }
 349
 350    qemu_lockcnt_unlock(lockcnt);
 351    return false;
 352}
 353
 354/* Decrement a counter and return locked if it is decremented to zero.
 355 * Otherwise do nothing.
 356 *
 357 * It is impossible for the counter to become nonzero while the mutex
 358 * is taken.
 359 */
 360bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt)
 361{
 362    /* No need for acquire semantics if we return false.  */
 363    int val = atomic_read(&lockcnt->count);
 364    if (val > 1) {
 365        return false;
 366    }
 367
 368    qemu_lockcnt_lock(lockcnt);
 369    if (atomic_fetch_dec(&lockcnt->count) == 1) {
 370        return true;
 371    }
 372
 373    qemu_lockcnt_inc_and_unlock(lockcnt);
 374    return false;
 375}
 376
 377void qemu_lockcnt_lock(QemuLockCnt *lockcnt)
 378{
 379    qemu_mutex_lock(&lockcnt->mutex);
 380}
 381
 382void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt)
 383{
 384    atomic_inc(&lockcnt->count);
 385    qemu_mutex_unlock(&lockcnt->mutex);
 386}
 387
 388void qemu_lockcnt_unlock(QemuLockCnt *lockcnt)
 389{
 390    qemu_mutex_unlock(&lockcnt->mutex);
 391}
 392
 393unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt)
 394{
 395    return atomic_read(&lockcnt->count);
 396}
 397#endif
 398