1/* 2 * Percpu refcounts: 3 * (C) 2012 Google, Inc. 4 * Author: Kent Overstreet <koverstreet@google.com> 5 * 6 * This implements a refcount with similar semantics to atomic_t - atomic_inc(), 7 * atomic_dec_and_test() - but percpu. 8 * 9 * There's one important difference between percpu refs and normal atomic_t 10 * refcounts; you have to keep track of your initial refcount, and then when you 11 * start shutting down you call percpu_ref_kill() _before_ dropping the initial 12 * refcount. 13 * 14 * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less 15 * than an atomic_t - this is because of the way shutdown works, see 16 * percpu_ref_kill()/PERCPU_COUNT_BIAS. 17 * 18 * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the 19 * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill() 20 * puts the ref back in single atomic_t mode, collecting the per cpu refs and 21 * issuing the appropriate barriers, and then marks the ref as shutting down so 22 * that percpu_ref_put() will check for the ref hitting 0. After it returns, 23 * it's safe to drop the initial ref. 24 * 25 * USAGE: 26 * 27 * See fs/aio.c for some example usage; it's used there for struct kioctx, which 28 * is created when userspaces calls io_setup(), and destroyed when userspace 29 * calls io_destroy() or the process exits. 30 * 31 * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it 32 * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref. 33 * After that, there can't be any new users of the kioctx (from lookup_ioctx()) 34 * and it's then safe to drop the initial ref with percpu_ref_put(). 35 * 36 * Note that the free path, free_ioctx(), needs to go through explicit call_rcu() 37 * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't 38 * imply RCU grace periods of any kind and if a user wants to combine percpu_ref 39 * with RCU protection, it must be done explicitly. 40 * 41 * Code that does a two stage shutdown like this often needs some kind of 42 * explicit synchronization to ensure the initial refcount can only be dropped 43 * once - percpu_ref_kill() does this for you, it returns true once and false if 44 * someone else already called it. The aio code uses it this way, but it's not 45 * necessary if the code has some other mechanism to synchronize teardown. 46 * around. 47 */ 48 49#ifndef _LINUX_PERCPU_REFCOUNT_H 50#define _LINUX_PERCPU_REFCOUNT_H 51 52#include <linux/atomic.h> 53#include <linux/kernel.h> 54#include <linux/percpu.h> 55#include <linux/rcupdate.h> 56#include <linux/gfp.h> 57 58struct percpu_ref; 59typedef void (percpu_ref_func_t)(struct percpu_ref *); 60 61/* flags set in the lower bits of percpu_ref->percpu_count_ptr */ 62enum { 63 __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */ 64 __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */ 65 __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD, 66 67 __PERCPU_REF_FLAG_BITS = 2, 68}; 69 70/* @flags for percpu_ref_init() */ 71enum { 72 /* 73 * Start w/ ref == 1 in atomic mode. Can be switched to percpu 74 * operation using percpu_ref_switch_to_percpu(). If initialized 75 * with this flag, the ref will stay in atomic mode until 76 * percpu_ref_switch_to_percpu() is invoked on it. 77 */ 78 PERCPU_REF_INIT_ATOMIC = 1 << 0, 79 80 /* 81 * Start dead w/ ref == 0 in atomic mode. Must be revived with 82 * percpu_ref_reinit() before used. Implies INIT_ATOMIC. 83 */ 84 PERCPU_REF_INIT_DEAD = 1 << 1, 85}; 86 87struct percpu_ref { 88 atomic_long_t count; 89 /* 90 * The low bit of the pointer indicates whether the ref is in percpu 91 * mode; if set, then get/put will manipulate the atomic_t. 92 */ 93 unsigned long percpu_count_ptr; 94 percpu_ref_func_t *release; 95 percpu_ref_func_t *confirm_switch; 96 bool force_atomic:1; 97 struct rcu_head rcu; 98}; 99 100int __must_check percpu_ref_init(struct percpu_ref *ref, 101 percpu_ref_func_t *release, unsigned int flags, 102 gfp_t gfp); 103void percpu_ref_exit(struct percpu_ref *ref); 104void percpu_ref_switch_to_atomic(struct percpu_ref *ref, 105 percpu_ref_func_t *confirm_switch); 106void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); 107void percpu_ref_switch_to_percpu(struct percpu_ref *ref); 108void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 109 percpu_ref_func_t *confirm_kill); 110void percpu_ref_reinit(struct percpu_ref *ref); 111 112/** 113 * percpu_ref_kill - drop the initial ref 114 * @ref: percpu_ref to kill 115 * 116 * Must be used to drop the initial ref on a percpu refcount; must be called 117 * precisely once before shutdown. 118 * 119 * Switches @ref into atomic mode before gathering up the percpu counters 120 * and dropping the initial ref. 121 * 122 * There are no implied RCU grace periods between kill and release. 123 */ 124static inline void percpu_ref_kill(struct percpu_ref *ref) 125{ 126 return percpu_ref_kill_and_confirm(ref, NULL); 127} 128 129/* 130 * Internal helper. Don't use outside percpu-refcount proper. The 131 * function doesn't return the pointer and let the caller test it for NULL 132 * because doing so forces the compiler to generate two conditional 133 * branches as it can't assume that @ref->percpu_count is not NULL. 134 */ 135static inline bool __ref_is_percpu(struct percpu_ref *ref, 136 unsigned long __percpu **percpu_countp) 137{ 138 unsigned long percpu_ptr; 139 140 /* 141 * The value of @ref->percpu_count_ptr is tested for 142 * !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then 143 * used as a pointer. If the compiler generates a separate fetch 144 * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in 145 * between contaminating the pointer value, meaning that 146 * READ_ONCE() is required when fetching it. 147 * 148 * The smp_read_barrier_depends() implied by READ_ONCE() pairs 149 * with smp_store_release() in __percpu_ref_switch_to_percpu(). 150 */ 151 percpu_ptr = READ_ONCE(ref->percpu_count_ptr); 152 153 /* 154 * Theoretically, the following could test just ATOMIC; however, 155 * then we'd have to mask off DEAD separately as DEAD may be 156 * visible without ATOMIC if we race with percpu_ref_kill(). DEAD 157 * implies ATOMIC anyway. Test them together. 158 */ 159 if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)) 160 return false; 161 162 *percpu_countp = (unsigned long __percpu *)percpu_ptr; 163 return true; 164} 165 166/** 167 * percpu_ref_get - increment a percpu refcount 168 * @ref: percpu_ref to get 169 * 170 * Analagous to atomic_long_inc(). 171 * 172 * This function is safe to call as long as @ref is between init and exit. 173 */ 174static inline void percpu_ref_get(struct percpu_ref *ref) 175{ 176 unsigned long __percpu *percpu_count; 177 178 rcu_read_lock_sched(); 179 180 if (__ref_is_percpu(ref, &percpu_count)) 181 this_cpu_inc(*percpu_count); 182 else 183 atomic_long_inc(&ref->count); 184 185 rcu_read_unlock_sched(); 186} 187 188/** 189 * percpu_ref_tryget - try to increment a percpu refcount 190 * @ref: percpu_ref to try-get 191 * 192 * Increment a percpu refcount unless its count already reached zero. 193 * Returns %true on success; %false on failure. 194 * 195 * This function is safe to call as long as @ref is between init and exit. 196 */ 197static inline bool percpu_ref_tryget(struct percpu_ref *ref) 198{ 199 unsigned long __percpu *percpu_count; 200 bool ret; 201 202 rcu_read_lock_sched(); 203 204 if (__ref_is_percpu(ref, &percpu_count)) { 205 this_cpu_inc(*percpu_count); 206 ret = true; 207 } else { 208 ret = atomic_long_inc_not_zero(&ref->count); 209 } 210 211 rcu_read_unlock_sched(); 212 213 return ret; 214} 215 216/** 217 * percpu_ref_tryget_live - try to increment a live percpu refcount 218 * @ref: percpu_ref to try-get 219 * 220 * Increment a percpu refcount unless it has already been killed. Returns 221 * %true on success; %false on failure. 222 * 223 * Completion of percpu_ref_kill() in itself doesn't guarantee that this 224 * function will fail. For such guarantee, percpu_ref_kill_and_confirm() 225 * should be used. After the confirm_kill callback is invoked, it's 226 * guaranteed that no new reference will be given out by 227 * percpu_ref_tryget_live(). 228 * 229 * This function is safe to call as long as @ref is between init and exit. 230 */ 231static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) 232{ 233 unsigned long __percpu *percpu_count; 234 bool ret = false; 235 236 rcu_read_lock_sched(); 237 238 if (__ref_is_percpu(ref, &percpu_count)) { 239 this_cpu_inc(*percpu_count); 240 ret = true; 241 } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) { 242 ret = atomic_long_inc_not_zero(&ref->count); 243 } 244 245 rcu_read_unlock_sched(); 246 247 return ret; 248} 249 250/** 251 * percpu_ref_put - decrement a percpu refcount 252 * @ref: percpu_ref to put 253 * 254 * Decrement the refcount, and if 0, call the release function (which was passed 255 * to percpu_ref_init()) 256 * 257 * This function is safe to call as long as @ref is between init and exit. 258 */ 259static inline void percpu_ref_put(struct percpu_ref *ref) 260{ 261 unsigned long __percpu *percpu_count; 262 263 rcu_read_lock_sched(); 264 265 if (__ref_is_percpu(ref, &percpu_count)) 266 this_cpu_dec(*percpu_count); 267 else if (unlikely(atomic_long_dec_and_test(&ref->count))) 268 ref->release(ref); 269 270 rcu_read_unlock_sched(); 271} 272 273/** 274 * percpu_ref_is_dying - test whether a percpu refcount is dying or dead 275 * @ref: percpu_ref to test 276 * 277 * Returns %true if @ref is dying or dead. 278 * 279 * This function is safe to call as long as @ref is between init and exit 280 * and the caller is responsible for synchronizing against state changes. 281 */ 282static inline bool percpu_ref_is_dying(struct percpu_ref *ref) 283{ 284 return ref->percpu_count_ptr & __PERCPU_REF_DEAD; 285} 286 287/** 288 * percpu_ref_is_zero - test whether a percpu refcount reached zero 289 * @ref: percpu_ref to test 290 * 291 * Returns %true if @ref reached zero. 292 * 293 * This function is safe to call as long as @ref is between init and exit. 294 */ 295static inline bool percpu_ref_is_zero(struct percpu_ref *ref) 296{ 297 unsigned long __percpu *percpu_count; 298 299 if (__ref_is_percpu(ref, &percpu_count)) 300 return false; 301 return !atomic_long_read(&ref->count); 302} 303 304#endif 305