linux/kernel/rcu/rcu.h
<<
>>
Prefs
   1/*
   2 * Read-Copy Update definitions shared among RCU implementations.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, you can access it online at
  16 * http://www.gnu.org/licenses/gpl-2.0.html.
  17 *
  18 * Copyright IBM Corporation, 2011
  19 *
  20 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21 */
  22
  23#ifndef __LINUX_RCU_H
  24#define __LINUX_RCU_H
  25
  26#include <trace/events/rcu.h>
  27
  28/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
  29#define DYNTICK_IRQ_NONIDLE     ((LONG_MAX / 2) + 1)
  30
  31
  32/*
  33 * Grace-period counter management.
  34 */
  35
  36#define RCU_SEQ_CTR_SHIFT       2
  37#define RCU_SEQ_STATE_MASK      ((1 << RCU_SEQ_CTR_SHIFT) - 1)
  38
  39/*
  40 * Return the counter portion of a sequence number previously returned
  41 * by rcu_seq_snap() or rcu_seq_current().
  42 */
  43static inline unsigned long rcu_seq_ctr(unsigned long s)
  44{
  45        return s >> RCU_SEQ_CTR_SHIFT;
  46}
  47
  48/*
  49 * Return the state portion of a sequence number previously returned
  50 * by rcu_seq_snap() or rcu_seq_current().
  51 */
  52static inline int rcu_seq_state(unsigned long s)
  53{
  54        return s & RCU_SEQ_STATE_MASK;
  55}
  56
  57/*
  58 * Set the state portion of the pointed-to sequence number.
  59 * The caller is responsible for preventing conflicting updates.
  60 */
  61static inline void rcu_seq_set_state(unsigned long *sp, int newstate)
  62{
  63        WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK);
  64        WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate);
  65}
  66
  67/* Adjust sequence number for start of update-side operation. */
  68static inline void rcu_seq_start(unsigned long *sp)
  69{
  70        WRITE_ONCE(*sp, *sp + 1);
  71        smp_mb(); /* Ensure update-side operation after counter increment. */
  72        WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
  73}
  74
  75/* Compute the end-of-grace-period value for the specified sequence number. */
  76static inline unsigned long rcu_seq_endval(unsigned long *sp)
  77{
  78        return (*sp | RCU_SEQ_STATE_MASK) + 1;
  79}
  80
  81/* Adjust sequence number for end of update-side operation. */
  82static inline void rcu_seq_end(unsigned long *sp)
  83{
  84        smp_mb(); /* Ensure update-side operation before counter increment. */
  85        WARN_ON_ONCE(!rcu_seq_state(*sp));
  86        WRITE_ONCE(*sp, rcu_seq_endval(sp));
  87}
  88
  89/*
  90 * rcu_seq_snap - Take a snapshot of the update side's sequence number.
  91 *
  92 * This function returns the earliest value of the grace-period sequence number
  93 * that will indicate that a full grace period has elapsed since the current
  94 * time.  Once the grace-period sequence number has reached this value, it will
  95 * be safe to invoke all callbacks that have been registered prior to the
  96 * current time. This value is the current grace-period number plus two to the
  97 * power of the number of low-order bits reserved for state, then rounded up to
  98 * the next value in which the state bits are all zero.
  99 */
 100static inline unsigned long rcu_seq_snap(unsigned long *sp)
 101{
 102        unsigned long s;
 103
 104        s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK;
 105        smp_mb(); /* Above access must not bleed into critical section. */
 106        return s;
 107}
 108
 109/* Return the current value the update side's sequence number, no ordering. */
 110static inline unsigned long rcu_seq_current(unsigned long *sp)
 111{
 112        return READ_ONCE(*sp);
 113}
 114
 115/*
 116 * Given a snapshot from rcu_seq_snap(), determine whether or not the
 117 * corresponding update-side operation has started.
 118 */
 119static inline bool rcu_seq_started(unsigned long *sp, unsigned long s)
 120{
 121        return ULONG_CMP_LT((s - 1) & ~RCU_SEQ_STATE_MASK, READ_ONCE(*sp));
 122}
 123
 124/*
 125 * Given a snapshot from rcu_seq_snap(), determine whether or not a
 126 * full update-side operation has occurred.
 127 */
 128static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
 129{
 130        return ULONG_CMP_GE(READ_ONCE(*sp), s);
 131}
 132
 133/*
 134 * Has a grace period completed since the time the old gp_seq was collected?
 135 */
 136static inline bool rcu_seq_completed_gp(unsigned long old, unsigned long new)
 137{
 138        return ULONG_CMP_LT(old, new & ~RCU_SEQ_STATE_MASK);
 139}
 140
 141/*
 142 * Has a grace period started since the time the old gp_seq was collected?
 143 */
 144static inline bool rcu_seq_new_gp(unsigned long old, unsigned long new)
 145{
 146        return ULONG_CMP_LT((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK,
 147                            new);
 148}
 149
 150/*
 151 * Roughly how many full grace periods have elapsed between the collection
 152 * of the two specified grace periods?
 153 */
 154static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
 155{
 156        unsigned long rnd_diff;
 157
 158        if (old == new)
 159                return 0;
 160        /*
 161         * Compute the number of grace periods (still shifted up), plus
 162         * one if either of new and old is not an exact grace period.
 163         */
 164        rnd_diff = (new & ~RCU_SEQ_STATE_MASK) -
 165                   ((old + RCU_SEQ_STATE_MASK) & ~RCU_SEQ_STATE_MASK) +
 166                   ((new & RCU_SEQ_STATE_MASK) || (old & RCU_SEQ_STATE_MASK));
 167        if (ULONG_CMP_GE(RCU_SEQ_STATE_MASK, rnd_diff))
 168                return 1; /* Definitely no grace period has elapsed. */
 169        return ((rnd_diff - RCU_SEQ_STATE_MASK - 1) >> RCU_SEQ_CTR_SHIFT) + 2;
 170}
 171
 172/*
 173 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
 174 * by call_rcu() and rcu callback execution, and are therefore not part
 175 * of the RCU API. These are in rcupdate.h because they are used by all
 176 * RCU implementations.
 177 */
 178
 179#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
 180# define STATE_RCU_HEAD_READY   0
 181# define STATE_RCU_HEAD_QUEUED  1
 182
 183extern struct debug_obj_descr rcuhead_debug_descr;
 184
 185static inline int debug_rcu_head_queue(struct rcu_head *head)
 186{
 187        int r1;
 188
 189        r1 = debug_object_activate(head, &rcuhead_debug_descr);
 190        debug_object_active_state(head, &rcuhead_debug_descr,
 191                                  STATE_RCU_HEAD_READY,
 192                                  STATE_RCU_HEAD_QUEUED);
 193        return r1;
 194}
 195
 196static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 197{
 198        debug_object_active_state(head, &rcuhead_debug_descr,
 199                                  STATE_RCU_HEAD_QUEUED,
 200                                  STATE_RCU_HEAD_READY);
 201        debug_object_deactivate(head, &rcuhead_debug_descr);
 202}
 203#else   /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 204static inline int debug_rcu_head_queue(struct rcu_head *head)
 205{
 206        return 0;
 207}
 208
 209static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 210{
 211}
 212#endif  /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 213
 214#ifdef CONFIG_RCU_STALL_COMMON
 215
 216extern int rcu_cpu_stall_ftrace_dump;
 217extern int rcu_cpu_stall_suppress;
 218extern int rcu_cpu_stall_timeout;
 219int rcu_jiffies_till_stall_check(void);
 220
 221#define rcu_ftrace_dump_stall_suppress() \
 222do { \
 223        if (!rcu_cpu_stall_suppress) \
 224                rcu_cpu_stall_suppress = 3; \
 225} while (0)
 226
 227#define rcu_ftrace_dump_stall_unsuppress() \
 228do { \
 229        if (rcu_cpu_stall_suppress == 3) \
 230                rcu_cpu_stall_suppress = 0; \
 231} while (0)
 232
 233#else /* #endif #ifdef CONFIG_RCU_STALL_COMMON */
 234#define rcu_ftrace_dump_stall_suppress()
 235#define rcu_ftrace_dump_stall_unsuppress()
 236#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 237
 238/*
 239 * Strings used in tracepoints need to be exported via the
 240 * tracing system such that tools like perf and trace-cmd can
 241 * translate the string address pointers to actual text.
 242 */
 243#define TPS(x)  tracepoint_string(x)
 244
 245/*
 246 * Dump the ftrace buffer, but only one time per callsite per boot.
 247 */
 248#define rcu_ftrace_dump(oops_dump_mode) \
 249do { \
 250        static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
 251        \
 252        if (!atomic_read(&___rfd_beenhere) && \
 253            !atomic_xchg(&___rfd_beenhere, 1)) { \
 254                tracing_off(); \
 255                rcu_ftrace_dump_stall_suppress(); \
 256                ftrace_dump(oops_dump_mode); \
 257                rcu_ftrace_dump_stall_unsuppress(); \
 258        } \
 259} while (0)
 260
 261void rcu_early_boot_tests(void);
 262void rcu_test_sync_prims(void);
 263
 264/*
 265 * This function really isn't for public consumption, but RCU is special in
 266 * that context switches can allow the state machine to make progress.
 267 */
 268extern void resched_cpu(int cpu);
 269
 270#if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU)
 271
 272#include <linux/rcu_node_tree.h>
 273
 274extern int rcu_num_lvls;
 275extern int num_rcu_lvl[];
 276extern int rcu_num_nodes;
 277static bool rcu_fanout_exact;
 278static int rcu_fanout_leaf;
 279
 280/*
 281 * Compute the per-level fanout, either using the exact fanout specified
 282 * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
 283 */
 284static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 285{
 286        int i;
 287
 288        for (i = 0; i < RCU_NUM_LVLS; i++)
 289                levelspread[i] = INT_MIN;
 290        if (rcu_fanout_exact) {
 291                levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
 292                for (i = rcu_num_lvls - 2; i >= 0; i--)
 293                        levelspread[i] = RCU_FANOUT;
 294        } else {
 295                int ccur;
 296                int cprv;
 297
 298                cprv = nr_cpu_ids;
 299                for (i = rcu_num_lvls - 1; i >= 0; i--) {
 300                        ccur = levelcnt[i];
 301                        levelspread[i] = (cprv + ccur - 1) / ccur;
 302                        cprv = ccur;
 303                }
 304        }
 305}
 306
 307/* Returns a pointer to the first leaf rcu_node structure. */
 308#define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1])
 309
 310/* Is this rcu_node a leaf? */
 311#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
 312
 313/* Is this rcu_node the last leaf? */
 314#define rcu_is_last_leaf_node(rnp) ((rnp) == &rcu_state.node[rcu_num_nodes - 1])
 315
 316/*
 317 * Do a full breadth-first scan of the {s,}rcu_node structures for the
 318 * specified state structure (for SRCU) or the only rcu_state structure
 319 * (for RCU).
 320 */
 321#define srcu_for_each_node_breadth_first(sp, rnp) \
 322        for ((rnp) = &(sp)->node[0]; \
 323             (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++)
 324#define rcu_for_each_node_breadth_first(rnp) \
 325        srcu_for_each_node_breadth_first(&rcu_state, rnp)
 326
 327/*
 328 * Scan the leaves of the rcu_node hierarchy for the rcu_state structure.
 329 * Note that if there is a singleton rcu_node tree with but one rcu_node
 330 * structure, this loop -will- visit the rcu_node structure.  It is still
 331 * a leaf node, even if it is also the root node.
 332 */
 333#define rcu_for_each_leaf_node(rnp) \
 334        for ((rnp) = rcu_first_leaf_node(); \
 335             (rnp) < &rcu_state.node[rcu_num_nodes]; (rnp)++)
 336
 337/*
 338 * Iterate over all possible CPUs in a leaf RCU node.
 339 */
 340#define for_each_leaf_node_possible_cpu(rnp, cpu) \
 341        for ((cpu) = cpumask_next((rnp)->grplo - 1, cpu_possible_mask); \
 342             (cpu) <= rnp->grphi; \
 343             (cpu) = cpumask_next((cpu), cpu_possible_mask))
 344
 345/*
 346 * Iterate over all CPUs in a leaf RCU node's specified mask.
 347 */
 348#define rcu_find_next_bit(rnp, cpu, mask) \
 349        ((rnp)->grplo + find_next_bit(&(mask), BITS_PER_LONG, (cpu)))
 350#define for_each_leaf_node_cpu_mask(rnp, cpu, mask) \
 351        for ((cpu) = rcu_find_next_bit((rnp), 0, (mask)); \
 352             (cpu) <= rnp->grphi; \
 353             (cpu) = rcu_find_next_bit((rnp), (cpu) + 1 - (rnp->grplo), (mask)))
 354
 355/*
 356 * Wrappers for the rcu_node::lock acquire and release.
 357 *
 358 * Because the rcu_nodes form a tree, the tree traversal locking will observe
 359 * different lock values, this in turn means that an UNLOCK of one level
 360 * followed by a LOCK of another level does not imply a full memory barrier;
 361 * and most importantly transitivity is lost.
 362 *
 363 * In order to restore full ordering between tree levels, augment the regular
 364 * lock acquire functions with smp_mb__after_unlock_lock().
 365 *
 366 * As ->lock of struct rcu_node is a __private field, therefore one should use
 367 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
 368 */
 369#define raw_spin_lock_rcu_node(p)                                       \
 370do {                                                                    \
 371        raw_spin_lock(&ACCESS_PRIVATE(p, lock));                        \
 372        smp_mb__after_unlock_lock();                                    \
 373} while (0)
 374
 375#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
 376
 377#define raw_spin_lock_irq_rcu_node(p)                                   \
 378do {                                                                    \
 379        raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock));                    \
 380        smp_mb__after_unlock_lock();                                    \
 381} while (0)
 382
 383#define raw_spin_unlock_irq_rcu_node(p)                                 \
 384        raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
 385
 386#define raw_spin_lock_irqsave_rcu_node(p, flags)                        \
 387do {                                                                    \
 388        raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
 389        smp_mb__after_unlock_lock();                                    \
 390} while (0)
 391
 392#define raw_spin_unlock_irqrestore_rcu_node(p, flags)                   \
 393        raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)
 394
 395#define raw_spin_trylock_rcu_node(p)                                    \
 396({                                                                      \
 397        bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock));    \
 398                                                                        \
 399        if (___locked)                                                  \
 400                smp_mb__after_unlock_lock();                            \
 401        ___locked;                                                      \
 402})
 403
 404#define raw_lockdep_assert_held_rcu_node(p)                             \
 405        lockdep_assert_held(&ACCESS_PRIVATE(p, lock))
 406
 407#endif /* #if defined(CONFIG_SRCU) || !defined(CONFIG_TINY_RCU) */
 408
 409#ifdef CONFIG_SRCU
 410void srcu_init(void);
 411#else /* #ifdef CONFIG_SRCU */
 412static inline void srcu_init(void) { }
 413#endif /* #else #ifdef CONFIG_SRCU */
 414
 415#ifdef CONFIG_TINY_RCU
 416/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 417static inline bool rcu_gp_is_normal(void) { return true; }
 418static inline bool rcu_gp_is_expedited(void) { return false; }
 419static inline void rcu_expedite_gp(void) { }
 420static inline void rcu_unexpedite_gp(void) { }
 421static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
 422#else /* #ifdef CONFIG_TINY_RCU */
 423bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 424bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
 425void rcu_expedite_gp(void);
 426void rcu_unexpedite_gp(void);
 427void rcupdate_announce_bootup_oddness(void);
 428void rcu_request_urgent_qs_task(struct task_struct *t);
 429#endif /* #else #ifdef CONFIG_TINY_RCU */
 430
 431#define RCU_SCHEDULER_INACTIVE  0
 432#define RCU_SCHEDULER_INIT      1
 433#define RCU_SCHEDULER_RUNNING   2
 434
 435enum rcutorture_type {
 436        RCU_FLAVOR,
 437        RCU_TASKS_FLAVOR,
 438        RCU_TRIVIAL_FLAVOR,
 439        SRCU_FLAVOR,
 440        INVALID_RCU_FLAVOR
 441};
 442
 443#if defined(CONFIG_TREE_RCU)
 444void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 445                            unsigned long *gp_seq);
 446void do_trace_rcu_torture_read(const char *rcutorturename,
 447                               struct rcu_head *rhp,
 448                               unsigned long secs,
 449                               unsigned long c_old,
 450                               unsigned long c);
 451#else
 452static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
 453                                          int *flags, unsigned long *gp_seq)
 454{
 455        *flags = 0;
 456        *gp_seq = 0;
 457}
 458#ifdef CONFIG_RCU_TRACE
 459void do_trace_rcu_torture_read(const char *rcutorturename,
 460                               struct rcu_head *rhp,
 461                               unsigned long secs,
 462                               unsigned long c_old,
 463                               unsigned long c);
 464#else
 465#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
 466        do { } while (0)
 467#endif
 468#endif
 469
 470#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
 471long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
 472#endif
 473
 474#ifdef CONFIG_TINY_SRCU
 475
 476static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
 477                                           struct srcu_struct *sp, int *flags,
 478                                           unsigned long *gp_seq)
 479{
 480        if (test_type != SRCU_FLAVOR)
 481                return;
 482        *flags = 0;
 483        *gp_seq = sp->srcu_idx;
 484}
 485
 486#elif defined(CONFIG_TREE_SRCU)
 487
 488void srcutorture_get_gp_data(enum rcutorture_type test_type,
 489                             struct srcu_struct *sp, int *flags,
 490                             unsigned long *gp_seq);
 491
 492#endif
 493
 494#ifdef CONFIG_TINY_RCU
 495static inline unsigned long rcu_get_gp_seq(void) { return 0; }
 496static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
 497static inline unsigned long
 498srcu_batches_completed(struct srcu_struct *sp) { return 0; }
 499static inline void rcu_force_quiescent_state(void) { }
 500static inline void show_rcu_gp_kthreads(void) { }
 501static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
 502static inline void rcu_fwd_progress_check(unsigned long j) { }
 503#else /* #ifdef CONFIG_TINY_RCU */
 504unsigned long rcu_get_gp_seq(void);
 505unsigned long rcu_exp_batches_completed(void);
 506unsigned long srcu_batches_completed(struct srcu_struct *sp);
 507void show_rcu_gp_kthreads(void);
 508int rcu_get_gp_kthreads_prio(void);
 509void rcu_fwd_progress_check(unsigned long j);
 510void rcu_force_quiescent_state(void);
 511extern struct workqueue_struct *rcu_gp_wq;
 512extern struct workqueue_struct *rcu_par_gp_wq;
 513#endif /* #else #ifdef CONFIG_TINY_RCU */
 514
 515#ifdef CONFIG_RCU_NOCB_CPU
 516bool rcu_is_nocb_cpu(int cpu);
 517void rcu_bind_current_to_nocb(void);
 518#else
 519static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
 520static inline void rcu_bind_current_to_nocb(void) { }
 521#endif
 522
 523#endif /* __LINUX_RCU_H */
 524