linux/lib/percpu-refcount.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#define pr_fmt(fmt) "%s: " fmt "\n", __func__
   3
   4#include <linux/kernel.h>
   5#include <linux/sched.h>
   6#include <linux/wait.h>
   7#include <linux/percpu-refcount.h>
   8
   9/*
  10 * Initially, a percpu refcount is just a set of percpu counters. Initially, we
  11 * don't try to detect the ref hitting 0 - which means that get/put can just
  12 * increment or decrement the local counter. Note that the counter on a
  13 * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
  14 * percpu counters will all sum to the correct value
  15 *
  16 * (More precisely: because modular arithmetic is commutative the sum of all the
  17 * percpu_count vars will be equal to what it would have been if all the gets
  18 * and puts were done to a single integer, even if some of the percpu integers
  19 * overflow or underflow).
  20 *
  21 * The real trick to implementing percpu refcounts is shutdown. We can't detect
  22 * the ref hitting 0 on every put - this would require global synchronization
  23 * and defeat the whole purpose of using percpu refs.
  24 *
  25 * What we do is require the user to keep track of the initial refcount; we know
  26 * the ref can't hit 0 before the user drops the initial ref, so as long as we
  27 * convert to non percpu mode before the initial ref is dropped everything
  28 * works.
  29 *
  30 * Converting to non percpu mode is done with some RCUish stuff in
  31 * percpu_ref_kill. Additionally, we need a bias value so that the
  32 * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  33 */
  34
  35#define PERCPU_COUNT_BIAS       (1LU << (BITS_PER_LONG - 1))
  36
  37static DEFINE_SPINLOCK(percpu_ref_switch_lock);
  38static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
  39
  40static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
  41{
  42        return (unsigned long __percpu *)
  43                (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
  44}
  45
  46/**
  47 * percpu_ref_init - initialize a percpu refcount
  48 * @ref: percpu_ref to initialize
  49 * @release: function which will be called when refcount hits 0
  50 * @flags: PERCPU_REF_INIT_* flags
  51 * @gfp: allocation mask to use
  52 *
  53 * Initializes @ref.  If @flags is zero, @ref starts in percpu mode with a
  54 * refcount of 1; analagous to atomic_long_set(ref, 1).  See the
  55 * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
  56 *
  57 * Note that @release must not sleep - it may potentially be called from RCU
  58 * callback context by percpu_ref_kill().
  59 */
  60int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
  61                    unsigned int flags, gfp_t gfp)
  62{
  63        size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
  64                             __alignof__(unsigned long));
  65        unsigned long start_count = 0;
  66
  67        ref->percpu_count_ptr = (unsigned long)
  68                __alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
  69        if (!ref->percpu_count_ptr)
  70                return -ENOMEM;
  71
  72        ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
  73        ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT;
  74
  75        if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) {
  76                ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
  77                ref->allow_reinit = true;
  78        } else {
  79                start_count += PERCPU_COUNT_BIAS;
  80        }
  81
  82        if (flags & PERCPU_REF_INIT_DEAD)
  83                ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
  84        else
  85                start_count++;
  86
  87        atomic_long_set(&ref->count, start_count);
  88
  89        ref->release = release;
  90        ref->confirm_switch = NULL;
  91        return 0;
  92}
  93EXPORT_SYMBOL_GPL(percpu_ref_init);
  94
  95/**
  96 * percpu_ref_exit - undo percpu_ref_init()
  97 * @ref: percpu_ref to exit
  98 *
  99 * This function exits @ref.  The caller is responsible for ensuring that
 100 * @ref is no longer in active use.  The usual places to invoke this
 101 * function from are the @ref->release() callback or in init failure path
 102 * where percpu_ref_init() succeeded but other parts of the initialization
 103 * of the embedding object failed.
 104 */
 105void percpu_ref_exit(struct percpu_ref *ref)
 106{
 107        unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 108
 109        if (percpu_count) {
 110                /* non-NULL confirm_switch indicates switching in progress */
 111                WARN_ON_ONCE(ref->confirm_switch);
 112                free_percpu(percpu_count);
 113                ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
 114        }
 115}
 116EXPORT_SYMBOL_GPL(percpu_ref_exit);
 117
 118static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
 119{
 120        struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
 121
 122        ref->confirm_switch(ref);
 123        ref->confirm_switch = NULL;
 124        wake_up_all(&percpu_ref_switch_waitq);
 125
 126        if (!ref->allow_reinit)
 127                percpu_ref_exit(ref);
 128
 129        /* drop ref from percpu_ref_switch_to_atomic() */
 130        percpu_ref_put(ref);
 131}
 132
 133static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 134{
 135        struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
 136        unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 137        unsigned long count = 0;
 138        int cpu;
 139
 140        for_each_possible_cpu(cpu)
 141                count += *per_cpu_ptr(percpu_count, cpu);
 142
 143        pr_debug("global %ld percpu %ld",
 144                 atomic_long_read(&ref->count), (long)count);
 145
 146        /*
 147         * It's crucial that we sum the percpu counters _before_ adding the sum
 148         * to &ref->count; since gets could be happening on one cpu while puts
 149         * happen on another, adding a single cpu's count could cause
 150         * @ref->count to hit 0 before we've got a consistent value - but the
 151         * sum of all the counts will be consistent and correct.
 152         *
 153         * Subtracting the bias value then has to happen _after_ adding count to
 154         * &ref->count; we need the bias value to prevent &ref->count from
 155         * reaching 0 before we add the percpu counts. But doing it at the same
 156         * time is equivalent and saves us atomic operations:
 157         */
 158        atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 159
 160        WARN_ONCE(atomic_long_read(&ref->count) <= 0,
 161                  "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
 162                  ref->release, atomic_long_read(&ref->count));
 163
 164        /* @ref is viewed as dead on all CPUs, send out switch confirmation */
 165        percpu_ref_call_confirm_rcu(rcu);
 166}
 167
 168static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
 169{
 170}
 171
 172static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 173                                          percpu_ref_func_t *confirm_switch)
 174{
 175        if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) {
 176                if (confirm_switch)
 177                        confirm_switch(ref);
 178                return;
 179        }
 180
 181        /* switching from percpu to atomic */
 182        ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
 183
 184        /*
 185         * Non-NULL ->confirm_switch is used to indicate that switching is
 186         * in progress.  Use noop one if unspecified.
 187         */
 188        ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
 189
 190        percpu_ref_get(ref);    /* put after confirmation */
 191        call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
 192}
 193
 194static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 195{
 196        unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 197        int cpu;
 198
 199        BUG_ON(!percpu_count);
 200
 201        if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
 202                return;
 203
 204        if (WARN_ON_ONCE(!ref->allow_reinit))
 205                return;
 206
 207        atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 208
 209        /*
 210         * Restore per-cpu operation.  smp_store_release() is paired
 211         * with READ_ONCE() in __ref_is_percpu() and guarantees that the
 212         * zeroing is visible to all percpu accesses which can see the
 213         * following __PERCPU_REF_ATOMIC clearing.
 214         */
 215        for_each_possible_cpu(cpu)
 216                *per_cpu_ptr(percpu_count, cpu) = 0;
 217
 218        smp_store_release(&ref->percpu_count_ptr,
 219                          ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 220}
 221
 222static void __percpu_ref_switch_mode(struct percpu_ref *ref,
 223                                     percpu_ref_func_t *confirm_switch)
 224{
 225        lockdep_assert_held(&percpu_ref_switch_lock);
 226
 227        /*
 228         * If the previous ATOMIC switching hasn't finished yet, wait for
 229         * its completion.  If the caller ensures that ATOMIC switching
 230         * isn't in progress, this function can be called from any context.
 231         */
 232        wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
 233                            percpu_ref_switch_lock);
 234
 235        if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
 236                __percpu_ref_switch_to_atomic(ref, confirm_switch);
 237        else
 238                __percpu_ref_switch_to_percpu(ref);
 239}
 240
 241/**
 242 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
 243 * @ref: percpu_ref to switch to atomic mode
 244 * @confirm_switch: optional confirmation callback
 245 *
 246 * There's no reason to use this function for the usual reference counting.
 247 * Use percpu_ref_kill[_and_confirm]().
 248 *
 249 * Schedule switching of @ref to atomic mode.  All its percpu counts will
 250 * be collected to the main atomic counter.  On completion, when all CPUs
 251 * are guaraneed to be in atomic mode, @confirm_switch, which may not
 252 * block, is invoked.  This function may be invoked concurrently with all
 253 * the get/put operations and can safely be mixed with kill and reinit
 254 * operations.  Note that @ref will stay in atomic mode across kill/reinit
 255 * cycles until percpu_ref_switch_to_percpu() is called.
 256 *
 257 * This function may block if @ref is in the process of switching to atomic
 258 * mode.  If the caller ensures that @ref is not in the process of
 259 * switching to atomic mode, this function can be called from any context.
 260 */
 261void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 262                                 percpu_ref_func_t *confirm_switch)
 263{
 264        unsigned long flags;
 265
 266        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 267
 268        ref->force_atomic = true;
 269        __percpu_ref_switch_mode(ref, confirm_switch);
 270
 271        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 272}
 273EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic);
 274
 275/**
 276 * percpu_ref_switch_to_atomic_sync - switch a percpu_ref to atomic mode
 277 * @ref: percpu_ref to switch to atomic mode
 278 *
 279 * Schedule switching the ref to atomic mode, and wait for the
 280 * switch to complete.  Caller must ensure that no other thread
 281 * will switch back to percpu mode.
 282 */
 283void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref)
 284{
 285        percpu_ref_switch_to_atomic(ref, NULL);
 286        wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
 287}
 288EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync);
 289
 290/**
 291 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
 292 * @ref: percpu_ref to switch to percpu mode
 293 *
 294 * There's no reason to use this function for the usual reference counting.
 295 * To re-use an expired ref, use percpu_ref_reinit().
 296 *
 297 * Switch @ref to percpu mode.  This function may be invoked concurrently
 298 * with all the get/put operations and can safely be mixed with kill and
 299 * reinit operations.  This function reverses the sticky atomic state set
 300 * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic().  If @ref is
 301 * dying or dead, the actual switching takes place on the following
 302 * percpu_ref_reinit().
 303 *
 304 * This function may block if @ref is in the process of switching to atomic
 305 * mode.  If the caller ensures that @ref is not in the process of
 306 * switching to atomic mode, this function can be called from any context.
 307 */
 308void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 309{
 310        unsigned long flags;
 311
 312        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 313
 314        ref->force_atomic = false;
 315        __percpu_ref_switch_mode(ref, NULL);
 316
 317        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 318}
 319EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
 320
 321/**
 322 * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
 323 * @ref: percpu_ref to kill
 324 * @confirm_kill: optional confirmation callback
 325 *
 326 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
 327 * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
 328 * called after @ref is seen as dead from all CPUs at which point all
 329 * further invocations of percpu_ref_tryget_live() will fail.  See
 330 * percpu_ref_tryget_live() for details.
 331 *
 332 * This function normally doesn't block and can be called from any context
 333 * but it may block if @confirm_kill is specified and @ref is in the
 334 * process of switching to atomic mode by percpu_ref_switch_to_atomic().
 335 *
 336 * There are no implied RCU grace periods between kill and release.
 337 */
 338void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 339                                 percpu_ref_func_t *confirm_kill)
 340{
 341        unsigned long flags;
 342
 343        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 344
 345        WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
 346                  "%s called more than once on %ps!", __func__, ref->release);
 347
 348        ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 349        __percpu_ref_switch_mode(ref, confirm_kill);
 350        percpu_ref_put(ref);
 351
 352        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 353}
 354EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 355
 356/**
 357 * percpu_ref_reinit - re-initialize a percpu refcount
 358 * @ref: perpcu_ref to re-initialize
 359 *
 360 * Re-initialize @ref so that it's in the same state as when it finished
 361 * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD.  @ref must have been
 362 * initialized successfully and reached 0 but not exited.
 363 *
 364 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
 365 * this function is in progress.
 366 */
 367void percpu_ref_reinit(struct percpu_ref *ref)
 368{
 369        WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 370
 371        percpu_ref_resurrect(ref);
 372}
 373EXPORT_SYMBOL_GPL(percpu_ref_reinit);
 374
 375/**
 376 * percpu_ref_resurrect - modify a percpu refcount from dead to live
 377 * @ref: perpcu_ref to resurrect
 378 *
 379 * Modify @ref so that it's in the same state as before percpu_ref_kill() was
 380 * called. @ref must be dead but must not yet have exited.
 381 *
 382 * If @ref->release() frees @ref then the caller is responsible for
 383 * guaranteeing that @ref->release() does not get called while this
 384 * function is in progress.
 385 *
 386 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
 387 * this function is in progress.
 388 */
 389void percpu_ref_resurrect(struct percpu_ref *ref)
 390{
 391        unsigned long __percpu *percpu_count;
 392        unsigned long flags;
 393
 394        spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 395
 396        WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD));
 397        WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count));
 398
 399        ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
 400        percpu_ref_get(ref);
 401        __percpu_ref_switch_mode(ref, NULL);
 402
 403        spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 404}
 405EXPORT_SYMBOL_GPL(percpu_ref_resurrect);
 406