1/* 2 * Read-Copy Update mechanism for mutual exclusion 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright IBM Corporation, 2001 19 * 20 * Author: Dipankar Sarma <dipankar@in.ibm.com> 21 * 22 * Based on the original work by Paul McKenney <paulmck@us.ibm.com> 23 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. 24 * Papers: 25 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf 26 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) 27 * 28 * For detailed explanation of Read-Copy Update mechanism see - 29 * http://lse.sourceforge.net/locking/rcupdate.html 30 * 31 */ 32 33#ifndef __LINUX_RCUPDATE_H 34#define __LINUX_RCUPDATE_H 35 36#include <linux/types.h> 37#include <linux/cache.h> 38#include <linux/spinlock.h> 39#include <linux/threads.h> 40#include <linux/cpumask.h> 41#include <linux/seqlock.h> 42#include <linux/lockdep.h> 43#include <linux/completion.h> 44#include <linux/debugobjects.h> 45#include <linux/bug.h> 46#include <linux/compiler.h> 47 48#ifdef CONFIG_RCU_TORTURE_TEST 49extern int rcutorture_runnable; /* for sysctl */ 50#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 51 52#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 53extern void rcutorture_record_test_transition(void); 54extern void rcutorture_record_progress(unsigned long vernum); 55extern void do_trace_rcu_torture_read(char *rcutorturename, 56 struct rcu_head *rhp, 57 unsigned long secs, 58 unsigned long c_old, 59 unsigned long c); 60#else 61static inline void rcutorture_record_test_transition(void) 62{ 63} 64static inline void rcutorture_record_progress(unsigned long vernum) 65{ 66} 67#ifdef CONFIG_RCU_TRACE 68extern void do_trace_rcu_torture_read(char *rcutorturename, 69 struct rcu_head *rhp, 70 unsigned long secs, 71 unsigned long c_old, 72 unsigned long c); 73#else 74#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ 75 do { } while (0) 76#endif 77#endif 78 79#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b)) 80#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) 81#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 82#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) 83#define ulong2long(a) (*(long *)(&(a))) 84 85/* Exported common interfaces */ 86 87#ifdef CONFIG_PREEMPT_RCU 88 89/** 90 * call_rcu() - Queue an RCU callback for invocation after a grace period. 91 * @head: structure to be used for queueing the RCU updates. 92 * @func: actual callback function to be invoked after the grace period 93 * 94 * The callback function will be invoked some time after a full grace 95 * period elapses, in other words after all pre-existing RCU read-side 96 * critical sections have completed. However, the callback function 97 * might well execute concurrently with RCU read-side critical sections 98 * that started after call_rcu() was invoked. RCU read-side critical 99 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 100 * and may be nested. 101 * 102 * Note that all CPUs must agree that the grace period extended beyond 103 * all pre-existing RCU read-side critical section. On systems with more 104 * than one CPU, this means that when "func()" is invoked, each CPU is 105 * guaranteed to have executed a full memory barrier since the end of its 106 * last RCU read-side critical section whose beginning preceded the call 107 * to call_rcu(). It also means that each CPU executing an RCU read-side 108 * critical section that continues beyond the start of "func()" must have 109 * executed a memory barrier after the call_rcu() but before the beginning 110 * of that RCU read-side critical section. Note that these guarantees 111 * include CPUs that are offline, idle, or executing in user mode, as 112 * well as CPUs that are executing in the kernel. 113 * 114 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the 115 * resulting RCU callback function "func()", then both CPU A and CPU B are 116 * guaranteed to execute a full memory barrier during the time interval 117 * between the call to call_rcu() and the invocation of "func()" -- even 118 * if CPU A and CPU B are the same CPU (but again only if the system has 119 * more than one CPU). 120 */ 121extern void call_rcu(struct rcu_head *head, 122 void (*func)(struct rcu_head *head)); 123 124#else /* #ifdef CONFIG_PREEMPT_RCU */ 125 126/* In classic RCU, call_rcu() is just call_rcu_sched(). */ 127#define call_rcu call_rcu_sched 128 129#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 130 131/** 132 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. 133 * @head: structure to be used for queueing the RCU updates. 134 * @func: actual callback function to be invoked after the grace period 135 * 136 * The callback function will be invoked some time after a full grace 137 * period elapses, in other words after all currently executing RCU 138 * read-side critical sections have completed. call_rcu_bh() assumes 139 * that the read-side critical sections end on completion of a softirq 140 * handler. This means that read-side critical sections in process 141 * context must not be interrupted by softirqs. This interface is to be 142 * used when most of the read-side critical sections are in softirq context. 143 * RCU read-side critical sections are delimited by : 144 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. 145 * OR 146 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. 147 * These may be nested. 148 * 149 * See the description of call_rcu() for more detailed information on 150 * memory ordering guarantees. 151 */ 152extern void call_rcu_bh(struct rcu_head *head, 153 void (*func)(struct rcu_head *head)); 154 155/** 156 * call_rcu_sched() - Queue an RCU for invocation after sched grace period. 157 * @head: structure to be used for queueing the RCU updates. 158 * @func: actual callback function to be invoked after the grace period 159 * 160 * The callback function will be invoked some time after a full grace 161 * period elapses, in other words after all currently executing RCU 162 * read-side critical sections have completed. call_rcu_sched() assumes 163 * that the read-side critical sections end on enabling of preemption 164 * or on voluntary preemption. 165 * RCU read-side critical sections are delimited by : 166 * - rcu_read_lock_sched() and rcu_read_unlock_sched(), 167 * OR 168 * anything that disables preemption. 169 * These may be nested. 170 * 171 * See the description of call_rcu() for more detailed information on 172 * memory ordering guarantees. 173 */ 174extern void call_rcu_sched(struct rcu_head *head, 175 void (*func)(struct rcu_head *rcu)); 176 177extern void synchronize_sched(void); 178 179#ifdef CONFIG_PREEMPT_RCU 180 181extern void __rcu_read_lock(void); 182extern void __rcu_read_unlock(void); 183extern void rcu_read_unlock_special(struct task_struct *t); 184void synchronize_rcu(void); 185 186/* 187 * Defined as a macro as it is a very low level header included from 188 * areas that don't even know about current. This gives the rcu_read_lock() 189 * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other 190 * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. 191 */ 192#define rcu_preempt_depth() (current->rcu_read_lock_nesting) 193 194#else /* #ifdef CONFIG_PREEMPT_RCU */ 195 196static inline void __rcu_read_lock(void) 197{ 198 preempt_disable(); 199} 200 201static inline void __rcu_read_unlock(void) 202{ 203 preempt_enable(); 204} 205 206static inline void synchronize_rcu(void) 207{ 208 synchronize_sched(); 209} 210 211static inline int rcu_preempt_depth(void) 212{ 213 return 0; 214} 215 216#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 217 218/* Internal to kernel */ 219extern void rcu_sched_qs(int cpu); 220extern void rcu_bh_qs(int cpu); 221extern void rcu_check_callbacks(int cpu, int user); 222struct notifier_block; 223extern void rcu_idle_enter(void); 224extern void rcu_idle_exit(void); 225extern void rcu_irq_enter(void); 226extern void rcu_irq_exit(void); 227 228#ifdef CONFIG_RCU_STALL_COMMON 229void rcu_sysrq_start(void); 230void rcu_sysrq_end(void); 231#else /* #ifdef CONFIG_RCU_STALL_COMMON */ 232static inline void rcu_sysrq_start(void) 233{ 234} 235static inline void rcu_sysrq_end(void) 236{ 237} 238#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ 239 240#ifdef CONFIG_RCU_USER_QS 241extern void rcu_user_enter(void); 242extern void rcu_user_exit(void); 243extern void rcu_user_enter_after_irq(void); 244extern void rcu_user_exit_after_irq(void); 245#else 246static inline void rcu_user_enter(void) { } 247static inline void rcu_user_exit(void) { } 248static inline void rcu_user_enter_after_irq(void) { } 249static inline void rcu_user_exit_after_irq(void) { } 250static inline void rcu_user_hooks_switch(struct task_struct *prev, 251 struct task_struct *next) { } 252#endif /* CONFIG_RCU_USER_QS */ 253 254extern void exit_rcu(void); 255 256#ifdef CONFIG_RCU_NOCB_CPU 257void rcu_init_nohz(void); 258#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 259static inline void rcu_init_nohz(void) 260{ 261} 262#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 263 264/** 265 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers 266 * @a: Code that RCU needs to pay attention to. 267 * 268 * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden 269 * in the inner idle loop, that is, between the rcu_idle_enter() and 270 * the rcu_idle_exit() -- RCU will happily ignore any such read-side 271 * critical sections. However, things like powertop need tracepoints 272 * in the inner idle loop. 273 * 274 * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) 275 * will tell RCU that it needs to pay attending, invoke its argument 276 * (in this example, a call to the do_something_with_RCU() function), 277 * and then tell RCU to go back to ignoring this CPU. It is permissible 278 * to nest RCU_NONIDLE() wrappers, but the nesting level is currently 279 * quite limited. If deeper nesting is required, it will be necessary 280 * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. 281 */ 282#define RCU_NONIDLE(a) \ 283 do { \ 284 rcu_irq_enter(); \ 285 do { a; } while (0); \ 286 rcu_irq_exit(); \ 287 } while (0) 288 289#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) 290extern bool __rcu_is_watching(void); 291#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ 292 293/* 294 * Infrastructure to implement the synchronize_() primitives in 295 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. 296 */ 297 298typedef void call_rcu_func_t(struct rcu_head *head, 299 void (*func)(struct rcu_head *head)); 300void wait_rcu_gp(call_rcu_func_t crf); 301 302#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 303#include <linux/rcutree.h> 304#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) 305#include <linux/rcutiny.h> 306#else 307#error "Unknown RCU implementation specified to kernel configuration" 308#endif 309 310/* 311 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 312 * initialization and destruction of rcu_head on the stack. rcu_head structures 313 * allocated dynamically in the heap or defined statically don't need any 314 * initialization. 315 */ 316#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 317extern void init_rcu_head_on_stack(struct rcu_head *head); 318extern void destroy_rcu_head_on_stack(struct rcu_head *head); 319#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 320static inline void init_rcu_head_on_stack(struct rcu_head *head) 321{ 322} 323 324static inline void destroy_rcu_head_on_stack(struct rcu_head *head) 325{ 326} 327#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 328 329#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) 330bool rcu_lockdep_current_cpu_online(void); 331#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 332static inline bool rcu_lockdep_current_cpu_online(void) 333{ 334 return 1; 335} 336#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 337 338#ifdef CONFIG_DEBUG_LOCK_ALLOC 339 340static inline void rcu_lock_acquire(struct lockdep_map *map) 341{ 342 lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); 343} 344 345static inline void rcu_lock_release(struct lockdep_map *map) 346{ 347 lock_release(map, 1, _THIS_IP_); 348} 349 350extern struct lockdep_map rcu_lock_map; 351extern struct lockdep_map rcu_bh_lock_map; 352extern struct lockdep_map rcu_sched_lock_map; 353extern int debug_lockdep_rcu_enabled(void); 354 355/** 356 * rcu_read_lock_held() - might we be in RCU read-side critical section? 357 * 358 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU 359 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, 360 * this assumes we are in an RCU read-side critical section unless it can 361 * prove otherwise. This is useful for debug checks in functions that 362 * require that they be called within an RCU read-side critical section. 363 * 364 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot 365 * and while lockdep is disabled. 366 * 367 * Note that rcu_read_lock() and the matching rcu_read_unlock() must 368 * occur in the same context, for example, it is illegal to invoke 369 * rcu_read_unlock() in process context if the matching rcu_read_lock() 370 * was invoked from within an irq handler. 371 * 372 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 373 * offline from an RCU perspective, so check for those as well. 374 */ 375static inline int rcu_read_lock_held(void) 376{ 377 if (!debug_lockdep_rcu_enabled()) 378 return 1; 379 if (!rcu_is_watching()) 380 return 0; 381 if (!rcu_lockdep_current_cpu_online()) 382 return 0; 383 return lock_is_held(&rcu_lock_map); 384} 385 386/* 387 * rcu_read_lock_bh_held() is defined out of line to avoid #include-file 388 * hell. 389 */ 390extern int rcu_read_lock_bh_held(void); 391 392/** 393 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? 394 * 395 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an 396 * RCU-sched read-side critical section. In absence of 397 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side 398 * critical section unless it can prove otherwise. Note that disabling 399 * of preemption (including disabling irqs) counts as an RCU-sched 400 * read-side critical section. This is useful for debug checks in functions 401 * that required that they be called within an RCU-sched read-side 402 * critical section. 403 * 404 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot 405 * and while lockdep is disabled. 406 * 407 * Note that if the CPU is in the idle loop from an RCU point of 408 * view (ie: that we are in the section between rcu_idle_enter() and 409 * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU 410 * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs 411 * that are in such a section, considering these as in extended quiescent 412 * state, so such a CPU is effectively never in an RCU read-side critical 413 * section regardless of what RCU primitives it invokes. This state of 414 * affairs is required --- we need to keep an RCU-free window in idle 415 * where the CPU may possibly enter into low power mode. This way we can 416 * notice an extended quiescent state to other CPUs that started a grace 417 * period. Otherwise we would delay any grace period as long as we run in 418 * the idle task. 419 * 420 * Similarly, we avoid claiming an SRCU read lock held if the current 421 * CPU is offline. 422 */ 423#ifdef CONFIG_PREEMPT_COUNT 424static inline int rcu_read_lock_sched_held(void) 425{ 426 int lockdep_opinion = 0; 427 428 if (!debug_lockdep_rcu_enabled()) 429 return 1; 430 if (!rcu_is_watching()) 431 return 0; 432 if (!rcu_lockdep_current_cpu_online()) 433 return 0; 434 if (debug_locks) 435 lockdep_opinion = lock_is_held(&rcu_sched_lock_map); 436 return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); 437} 438#else /* #ifdef CONFIG_PREEMPT_COUNT */ 439static inline int rcu_read_lock_sched_held(void) 440{ 441 return 1; 442} 443#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ 444 445#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 446 447# define rcu_lock_acquire(a) do { } while (0) 448# define rcu_lock_release(a) do { } while (0) 449 450static inline int rcu_read_lock_held(void) 451{ 452 return 1; 453} 454 455static inline int rcu_read_lock_bh_held(void) 456{ 457 return 1; 458} 459 460#ifdef CONFIG_PREEMPT_COUNT 461static inline int rcu_read_lock_sched_held(void) 462{ 463 return preempt_count() != 0 || irqs_disabled(); 464} 465#else /* #ifdef CONFIG_PREEMPT_COUNT */ 466static inline int rcu_read_lock_sched_held(void) 467{ 468 return 1; 469} 470#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */ 471 472#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 473 474#ifdef CONFIG_PROVE_RCU 475 476extern int rcu_my_thread_group_empty(void); 477 478/** 479 * rcu_lockdep_assert - emit lockdep splat if specified condition not met 480 * @c: condition to check 481 * @s: informative message 482 */ 483#define rcu_lockdep_assert(c, s) \ 484 do { \ 485 static bool __section(.data.unlikely) __warned; \ 486 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ 487 __warned = true; \ 488 lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ 489 } \ 490 } while (0) 491 492#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) 493static inline void rcu_preempt_sleep_check(void) 494{ 495 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), 496 "Illegal context switch in RCU read-side critical section"); 497} 498#else /* #ifdef CONFIG_PROVE_RCU */ 499static inline void rcu_preempt_sleep_check(void) 500{ 501} 502#endif /* #else #ifdef CONFIG_PROVE_RCU */ 503 504#define rcu_sleep_check() \ 505 do { \ 506 rcu_preempt_sleep_check(); \ 507 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ 508 "Illegal context switch in RCU-bh" \ 509 " read-side critical section"); \ 510 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ 511 "Illegal context switch in RCU-sched"\ 512 " read-side critical section"); \ 513 } while (0) 514 515#else /* #ifdef CONFIG_PROVE_RCU */ 516 517#define rcu_lockdep_assert(c, s) do { } while (0) 518#define rcu_sleep_check() do { } while (0) 519 520#endif /* #else #ifdef CONFIG_PROVE_RCU */ 521 522/* 523 * Helper functions for rcu_dereference_check(), rcu_dereference_protected() 524 * and rcu_assign_pointer(). Some of these could be folded into their 525 * callers, but they are left separate in order to ease introduction of 526 * multiple flavors of pointers to match the multiple flavors of RCU 527 * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in 528 * the future. 529 */ 530 531#ifdef __CHECKER__ 532#define rcu_dereference_sparse(p, space) \ 533 ((void)(((typeof(*p) space *)p) == p)) 534#else /* #ifdef __CHECKER__ */ 535#define rcu_dereference_sparse(p, space) 536#endif /* #else #ifdef __CHECKER__ */ 537 538#define __rcu_access_pointer(p, space) \ 539 ({ \ 540 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ 541 rcu_dereference_sparse(p, space); \ 542 ((typeof(*p) __force __kernel *)(_________p1)); \ 543 }) 544#define __rcu_dereference_check(p, c, space) \ 545 ({ \ 546 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ 547 rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ 548 " usage"); \ 549 rcu_dereference_sparse(p, space); \ 550 smp_read_barrier_depends(); \ 551 ((typeof(*p) __force __kernel *)(_________p1)); \ 552 }) 553#define __rcu_dereference_protected(p, c, space) \ 554 ({ \ 555 rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ 556 " usage"); \ 557 rcu_dereference_sparse(p, space); \ 558 ((typeof(*p) __force __kernel *)(p)); \ 559 }) 560 561#define __rcu_access_index(p, space) \ 562 ({ \ 563 typeof(p) _________p1 = ACCESS_ONCE(p); \ 564 rcu_dereference_sparse(p, space); \ 565 (_________p1); \ 566 }) 567#define __rcu_dereference_index_check(p, c) \ 568 ({ \ 569 typeof(p) _________p1 = ACCESS_ONCE(p); \ 570 rcu_lockdep_assert(c, \ 571 "suspicious rcu_dereference_index_check()" \ 572 " usage"); \ 573 smp_read_barrier_depends(); \ 574 (_________p1); \ 575 }) 576#define __rcu_assign_pointer(p, v, space) \ 577 do { \ 578 smp_wmb(); \ 579 (p) = (typeof(*v) __force space *)(v); \ 580 } while (0) 581 582/** 583 * lockless_dereference() - safely load a pointer for later dereference 584 * @p: The pointer to load 585 * 586 * Similar to rcu_dereference(), but for situations where the pointed-to 587 * object's lifetime is managed by something other than RCU. That 588 * "something other" might be reference counting or simple immortality. 589 */ 590#define lockless_dereference(p) \ 591({ \ 592 typeof(p) _________p1 = ACCESS_ONCE(p); \ 593 smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ 594 (_________p1); \ 595}) 596 597/** 598 * rcu_access_pointer() - fetch RCU pointer with no dereferencing 599 * @p: The pointer to read 600 * 601 * Return the value of the specified RCU-protected pointer, but omit the 602 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 603 * when the value of this pointer is accessed, but the pointer is not 604 * dereferenced, for example, when testing an RCU-protected pointer against 605 * NULL. Although rcu_access_pointer() may also be used in cases where 606 * update-side locks prevent the value of the pointer from changing, you 607 * should instead use rcu_dereference_protected() for this use case. 608 * 609 * It is also permissible to use rcu_access_pointer() when read-side 610 * access to the pointer was removed at least one grace period ago, as 611 * is the case in the context of the RCU callback that is freeing up 612 * the data, or after a synchronize_rcu() returns. This can be useful 613 * when tearing down multi-linked structures after a grace period 614 * has elapsed. 615 */ 616#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) 617 618/** 619 * rcu_dereference_check() - rcu_dereference with debug checking 620 * @p: The pointer to read, prior to dereferencing 621 * @c: The conditions under which the dereference will take place 622 * 623 * Do an rcu_dereference(), but check that the conditions under which the 624 * dereference will take place are correct. Typically the conditions 625 * indicate the various locking conditions that should be held at that 626 * point. The check should return true if the conditions are satisfied. 627 * An implicit check for being in an RCU read-side critical section 628 * (rcu_read_lock()) is included. 629 * 630 * For example: 631 * 632 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); 633 * 634 * could be used to indicate to lockdep that foo->bar may only be dereferenced 635 * if either rcu_read_lock() is held, or that the lock required to replace 636 * the bar struct at foo->bar is held. 637 * 638 * Note that the list of conditions may also include indications of when a lock 639 * need not be held, for example during initialisation or destruction of the 640 * target struct: 641 * 642 * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || 643 * atomic_read(&foo->usage) == 0); 644 * 645 * Inserts memory barriers on architectures that require them 646 * (currently only the Alpha), prevents the compiler from refetching 647 * (and from merging fetches), and, more importantly, documents exactly 648 * which pointers are protected by RCU and checks that the pointer is 649 * annotated as __rcu. 650 */ 651#define rcu_dereference_check(p, c) \ 652 __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) 653 654/** 655 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking 656 * @p: The pointer to read, prior to dereferencing 657 * @c: The conditions under which the dereference will take place 658 * 659 * This is the RCU-bh counterpart to rcu_dereference_check(). 660 */ 661#define rcu_dereference_bh_check(p, c) \ 662 __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) 663 664/** 665 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking 666 * @p: The pointer to read, prior to dereferencing 667 * @c: The conditions under which the dereference will take place 668 * 669 * This is the RCU-sched counterpart to rcu_dereference_check(). 670 */ 671#define rcu_dereference_sched_check(p, c) \ 672 __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ 673 __rcu) 674 675#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ 676 677/* 678 * The tracing infrastructure traces RCU (we want that), but unfortunately 679 * some of the RCU checks causes tracing to lock up the system. 680 * 681 * The tracing version of rcu_dereference_raw() must not call 682 * rcu_read_lock_held(). 683 */ 684#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu) 685 686/** 687 * rcu_access_index() - fetch RCU index with no dereferencing 688 * @p: The index to read 689 * 690 * Return the value of the specified RCU-protected index, but omit the 691 * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful 692 * when the value of this index is accessed, but the index is not 693 * dereferenced, for example, when testing an RCU-protected index against 694 * -1. Although rcu_access_index() may also be used in cases where 695 * update-side locks prevent the value of the index from changing, you 696 * should instead use rcu_dereference_index_protected() for this use case. 697 */ 698#define rcu_access_index(p) __rcu_access_index((p), __rcu) 699 700/** 701 * rcu_dereference_index_check() - rcu_dereference for indices with debug checking 702 * @p: The pointer to read, prior to dereferencing 703 * @c: The conditions under which the dereference will take place 704 * 705 * Similar to rcu_dereference_check(), but omits the sparse checking. 706 * This allows rcu_dereference_index_check() to be used on integers, 707 * which can then be used as array indices. Attempting to use 708 * rcu_dereference_check() on an integer will give compiler warnings 709 * because the sparse address-space mechanism relies on dereferencing 710 * the RCU-protected pointer. Dereferencing integers is not something 711 * that even gcc will put up with. 712 * 713 * Note that this function does not implicitly check for RCU read-side 714 * critical sections. If this function gains lots of uses, it might 715 * make sense to provide versions for each flavor of RCU, but it does 716 * not make sense as of early 2010. 717 */ 718#define rcu_dereference_index_check(p, c) \ 719 __rcu_dereference_index_check((p), (c)) 720 721/** 722 * rcu_dereference_protected() - fetch RCU pointer when updates prevented 723 * @p: The pointer to read, prior to dereferencing 724 * @c: The conditions under which the dereference will take place 725 * 726 * Return the value of the specified RCU-protected pointer, but omit 727 * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This 728 * is useful in cases where update-side locks prevent the value of the 729 * pointer from changing. Please note that this primitive does -not- 730 * prevent the compiler from repeating this reference or combining it 731 * with other references, so it should not be used without protection 732 * of appropriate locks. 733 * 734 * This function is only for update-side use. Using this function 735 * when protected only by rcu_read_lock() will result in infrequent 736 * but very ugly failures. 737 */ 738#define rcu_dereference_protected(p, c) \ 739 __rcu_dereference_protected((p), (c), __rcu) 740 741 742/** 743 * rcu_dereference() - fetch RCU-protected pointer for dereferencing 744 * @p: The pointer to read, prior to dereferencing 745 * 746 * This is a simple wrapper around rcu_dereference_check(). 747 */ 748#define rcu_dereference(p) rcu_dereference_check(p, 0) 749 750/** 751 * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing 752 * @p: The pointer to read, prior to dereferencing 753 * 754 * Makes rcu_dereference_check() do the dirty work. 755 */ 756#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) 757 758/** 759 * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing 760 * @p: The pointer to read, prior to dereferencing 761 * 762 * Makes rcu_dereference_check() do the dirty work. 763 */ 764#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) 765 766/** 767 * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism 768 * @p: The pointer to hand off 769 * 770 * This is simply an identity function, but it documents where a pointer 771 * is handed off from RCU to some other synchronization mechanism, for 772 * example, reference counting or locking. In C11, it would map to 773 * kill_dependency(). It could be used as follows: 774 * 775 * rcu_read_lock(); 776 * p = rcu_dereference(gp); 777 * long_lived = is_long_lived(p); 778 * if (long_lived) { 779 * if (!atomic_inc_not_zero(p->refcnt)) 780 * long_lived = false; 781 * else 782 * p = rcu_pointer_handoff(p); 783 * } 784 * rcu_read_unlock(); 785 */ 786#define rcu_pointer_handoff(p) (p) 787 788/** 789 * rcu_read_lock() - mark the beginning of an RCU read-side critical section 790 * 791 * When synchronize_rcu() is invoked on one CPU while other CPUs 792 * are within RCU read-side critical sections, then the 793 * synchronize_rcu() is guaranteed to block until after all the other 794 * CPUs exit their critical sections. Similarly, if call_rcu() is invoked 795 * on one CPU while other CPUs are within RCU read-side critical 796 * sections, invocation of the corresponding RCU callback is deferred 797 * until after the all the other CPUs exit their critical sections. 798 * 799 * Note, however, that RCU callbacks are permitted to run concurrently 800 * with new RCU read-side critical sections. One way that this can happen 801 * is via the following sequence of events: (1) CPU 0 enters an RCU 802 * read-side critical section, (2) CPU 1 invokes call_rcu() to register 803 * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, 804 * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU 805 * callback is invoked. This is legal, because the RCU read-side critical 806 * section that was running concurrently with the call_rcu() (and which 807 * therefore might be referencing something that the corresponding RCU 808 * callback would free up) has completed before the corresponding 809 * RCU callback is invoked. 810 * 811 * RCU read-side critical sections may be nested. Any deferred actions 812 * will be deferred until the outermost RCU read-side critical section 813 * completes. 814 * 815 * You can avoid reading and understanding the next paragraph by 816 * following this rule: don't put anything in an rcu_read_lock() RCU 817 * read-side critical section that would block in a !PREEMPT kernel. 818 * But if you want the full story, read on! 819 * 820 * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it 821 * is illegal to block while in an RCU read-side critical section. In 822 * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) 823 * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may 824 * be preempted, but explicit blocking is illegal. Finally, in preemptible 825 * RCU implementations in real-time (with -rt patchset) kernel builds, 826 * RCU read-side critical sections may be preempted and they may also 827 * block, but only when acquiring spinlocks that are subject to priority 828 * inheritance. 829 */ 830static inline void rcu_read_lock(void) 831{ 832 __rcu_read_lock(); 833 __acquire(RCU); 834 rcu_lock_acquire(&rcu_lock_map); 835 rcu_lockdep_assert(rcu_is_watching(), 836 "rcu_read_lock() used illegally while idle"); 837} 838 839/* 840 * So where is rcu_write_lock()? It does not exist, as there is no 841 * way for writers to lock out RCU readers. This is a feature, not 842 * a bug -- this property is what provides RCU's performance benefits. 843 * Of course, writers must coordinate with each other. The normal 844 * spinlock primitives work well for this, but any other technique may be 845 * used as well. RCU does not care how the writers keep out of each 846 * others' way, as long as they do so. 847 */ 848 849/** 850 * rcu_read_unlock() - marks the end of an RCU read-side critical section. 851 * 852 * See rcu_read_lock() for more information. 853 */ 854static inline void rcu_read_unlock(void) 855{ 856 rcu_lockdep_assert(rcu_is_watching(), 857 "rcu_read_unlock() used illegally while idle"); 858 rcu_lock_release(&rcu_lock_map); 859 __release(RCU); 860 __rcu_read_unlock(); 861} 862 863/** 864 * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section 865 * 866 * This is equivalent of rcu_read_lock(), but to be used when updates 867 * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since 868 * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a 869 * softirq handler to be a quiescent state, a process in RCU read-side 870 * critical section must be protected by disabling softirqs. Read-side 871 * critical sections in interrupt context can use just rcu_read_lock(), 872 * though this should at least be commented to avoid confusing people 873 * reading the code. 874 * 875 * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh() 876 * must occur in the same context, for example, it is illegal to invoke 877 * rcu_read_unlock_bh() from one task if the matching rcu_read_lock_bh() 878 * was invoked from some other task. 879 */ 880static inline void rcu_read_lock_bh(void) 881{ 882 local_bh_disable(); 883 __acquire(RCU_BH); 884 rcu_lock_acquire(&rcu_bh_lock_map); 885 rcu_lockdep_assert(rcu_is_watching(), 886 "rcu_read_lock_bh() used illegally while idle"); 887} 888 889/* 890 * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section 891 * 892 * See rcu_read_lock_bh() for more information. 893 */ 894static inline void rcu_read_unlock_bh(void) 895{ 896 rcu_lockdep_assert(rcu_is_watching(), 897 "rcu_read_unlock_bh() used illegally while idle"); 898 rcu_lock_release(&rcu_bh_lock_map); 899 __release(RCU_BH); 900 local_bh_enable(); 901} 902 903/** 904 * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section 905 * 906 * This is equivalent of rcu_read_lock(), but to be used when updates 907 * are being done using call_rcu_sched() or synchronize_rcu_sched(). 908 * Read-side critical sections can also be introduced by anything that 909 * disables preemption, including local_irq_disable() and friends. 910 * 911 * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched() 912 * must occur in the same context, for example, it is illegal to invoke 913 * rcu_read_unlock_sched() from process context if the matching 914 * rcu_read_lock_sched() was invoked from an NMI handler. 915 */ 916static inline void rcu_read_lock_sched(void) 917{ 918 preempt_disable(); 919 __acquire(RCU_SCHED); 920 rcu_lock_acquire(&rcu_sched_lock_map); 921 rcu_lockdep_assert(rcu_is_watching(), 922 "rcu_read_lock_sched() used illegally while idle"); 923} 924 925/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 926static inline notrace void rcu_read_lock_sched_notrace(void) 927{ 928 preempt_disable_notrace(); 929 __acquire(RCU_SCHED); 930} 931 932/* 933 * rcu_read_unlock_sched - marks the end of a RCU-classic critical section 934 * 935 * See rcu_read_lock_sched for more information. 936 */ 937static inline void rcu_read_unlock_sched(void) 938{ 939 rcu_lockdep_assert(rcu_is_watching(), 940 "rcu_read_unlock_sched() used illegally while idle"); 941 rcu_lock_release(&rcu_sched_lock_map); 942 __release(RCU_SCHED); 943 preempt_enable(); 944} 945 946/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ 947static inline notrace void rcu_read_unlock_sched_notrace(void) 948{ 949 __release(RCU_SCHED); 950 preempt_enable_notrace(); 951} 952 953/** 954 * rcu_assign_pointer() - assign to RCU-protected pointer 955 * @p: pointer to assign to 956 * @v: value to assign (publish) 957 * 958 * Assigns the specified value to the specified RCU-protected 959 * pointer, ensuring that any concurrent RCU readers will see 960 * any prior initialization. 961 * 962 * Inserts memory barriers on architectures that require them 963 * (which is most of them), and also prevents the compiler from 964 * reordering the code that initializes the structure after the pointer 965 * assignment. More importantly, this call documents which pointers 966 * will be dereferenced by RCU read-side code. 967 * 968 * In some special cases, you may use RCU_INIT_POINTER() instead 969 * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due 970 * to the fact that it does not constrain either the CPU or the compiler. 971 * That said, using RCU_INIT_POINTER() when you should have used 972 * rcu_assign_pointer() is a very bad thing that results in 973 * impossible-to-diagnose memory corruption. So please be careful. 974 * See the RCU_INIT_POINTER() comment header for details. 975 */ 976#define rcu_assign_pointer(p, v) \ 977 __rcu_assign_pointer((p), (v), __rcu) 978 979/** 980 * RCU_INIT_POINTER() - initialize an RCU protected pointer 981 * 982 * Initialize an RCU-protected pointer in special cases where readers 983 * do not need ordering constraints on the CPU or the compiler. These 984 * special cases are: 985 * 986 * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- 987 * 2. The caller has taken whatever steps are required to prevent 988 * RCU readers from concurrently accessing this pointer -or- 989 * 3. The referenced data structure has already been exposed to 990 * readers either at compile time or via rcu_assign_pointer() -and- 991 * a. You have not made -any- reader-visible changes to 992 * this structure since then -or- 993 * b. It is OK for readers accessing this structure from its 994 * new location to see the old state of the structure. (For 995 * example, the changes were to statistical counters or to 996 * other state where exact synchronization is not required.) 997 * 998 * Failure to follow these rules governing use of RCU_INIT_POINTER() will 999 * result in impossible-to-diagnose memory corruption. As in the structures 1000 * will look OK in crash dumps, but any concurrent RCU readers might
1001 * see pre-initialized values of the referenced data structure. So 1002 * please be very careful how you use RCU_INIT_POINTER()!!! 1003 * 1004 * If you are creating an RCU-protected linked structure that is accessed 1005 * by a single external-to-structure RCU-protected pointer, then you may 1006 * use RCU_INIT_POINTER() to initialize the internal RCU-protected 1007 * pointers, but you must use rcu_assign_pointer() to initialize the 1008 * external-to-structure pointer -after- you have completely initialized 1009 * the reader-accessible portions of the linked structure. 1010 */ 1011#define RCU_INIT_POINTER(p, v) \ 1012 do { \ 1013 p = (typeof(*v) __force __rcu *)(v); \ 1014 } while (0) 1015 1016/** 1017 * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer 1018 * 1019 * GCC-style initialization for an RCU-protected pointer in a structure field. 1020 */ 1021#define RCU_POINTER_INITIALIZER(p, v) \ 1022 .p = (typeof(*v) __force __rcu *)(v) 1023 1024/* 1025 * Does the specified offset indicate that the corresponding rcu_head 1026 * structure can be handled by kfree_rcu()? 1027 */ 1028#define __is_kfree_rcu_offset(offset) ((offset) < 4096) 1029 1030/* 1031 * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain. 1032 */ 1033#define __kfree_rcu(head, offset) \ 1034 do { \ 1035 BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ 1036 kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ 1037 } while (0) 1038 1039/** 1040 * kfree_rcu() - kfree an object after a grace period. 1041 * @ptr: pointer to kfree 1042 * @rcu_head: the name of the struct rcu_head within the type of @ptr. 1043 * 1044 * Many rcu callbacks functions just call kfree() on the base structure. 1045 * These functions are trivial, but their size adds up, and furthermore 1046 * when they are used in a kernel module, that module must invoke the 1047 * high-latency rcu_barrier() function at module-unload time. 1048 * 1049 * The kfree_rcu() function handles this issue. Rather than encoding a 1050 * function address in the embedded rcu_head structure, kfree_rcu() instead 1051 * encodes the offset of the rcu_head structure within the base structure. 1052 * Because the functions are not allowed in the low-order 4096 bytes of 1053 * kernel virtual memory, offsets up to 4095 bytes can be accommodated. 1054 * If the offset is larger than 4095 bytes, a compile-time error will 1055 * be generated in __kfree_rcu(). If this error is triggered, you can 1056 * either fall back to use of call_rcu() or rearrange the structure to 1057 * position the rcu_head structure into the first 4096 bytes. 1058 * 1059 * Note that the allowable offset might decrease in the future, for example, 1060 * to allow something like kmem_cache_free_rcu(). 1061 * 1062 * The BUILD_BUG_ON check must not involve any function calls, hence the 1063 * checks are done in macros here. 1064 */ 1065#define kfree_rcu(ptr, rcu_head) \ 1066 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 1067 1068#ifdef CONFIG_RCU_NOCB_CPU 1069extern bool rcu_is_nocb_cpu(int cpu); 1070#else 1071static inline bool rcu_is_nocb_cpu(int cpu) { return false; } 1072#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 1073 1074 1075#endif /* __LINUX_RCUPDATE_H */ 1076