linux/kernel/rcu/rcu.h
<<
>>
Prefs
   1/*
   2 * Read-Copy Update definitions shared among RCU implementations.
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, you can access it online at
  16 * http://www.gnu.org/licenses/gpl-2.0.html.
  17 *
  18 * Copyright IBM Corporation, 2011
  19 *
  20 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  21 */
  22
  23#ifndef __LINUX_RCU_H
  24#define __LINUX_RCU_H
  25
  26#include <trace/events/rcu.h>
  27#ifdef CONFIG_RCU_TRACE
  28#define RCU_TRACE(stmt) stmt
  29#else /* #ifdef CONFIG_RCU_TRACE */
  30#define RCU_TRACE(stmt)
  31#endif /* #else #ifdef CONFIG_RCU_TRACE */
  32
  33/*
  34 * Process-level increment to ->dynticks_nesting field.  This allows for
  35 * architectures that use half-interrupts and half-exceptions from
  36 * process context.
  37 *
  38 * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
  39 * that counts the number of process-based reasons why RCU cannot
  40 * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
  41 * is the value used to increment or decrement this field.
  42 *
  43 * The rest of the bits could in principle be used to count interrupts,
  44 * but this would mean that a negative-one value in the interrupt
  45 * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
  46 * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
  47 * that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
  48 * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
  49 * initial exit from idle.
  50 */
  51#define DYNTICK_TASK_NEST_WIDTH 7
  52#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
  53#define DYNTICK_TASK_NEST_MASK  (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
  54#define DYNTICK_TASK_FLAG          ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
  55#define DYNTICK_TASK_MASK          ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
  56#define DYNTICK_TASK_EXIT_IDLE     (DYNTICK_TASK_NEST_VALUE + \
  57                                    DYNTICK_TASK_FLAG)
  58
  59
  60/*
  61 * Grace-period counter management.
  62 */
  63
  64#define RCU_SEQ_CTR_SHIFT       2
  65#define RCU_SEQ_STATE_MASK      ((1 << RCU_SEQ_CTR_SHIFT) - 1)
  66
  67/*
  68 * Return the counter portion of a sequence number previously returned
  69 * by rcu_seq_snap() or rcu_seq_current().
  70 */
  71static inline unsigned long rcu_seq_ctr(unsigned long s)
  72{
  73        return s >> RCU_SEQ_CTR_SHIFT;
  74}
  75
  76/*
  77 * Return the state portion of a sequence number previously returned
  78 * by rcu_seq_snap() or rcu_seq_current().
  79 */
  80static inline int rcu_seq_state(unsigned long s)
  81{
  82        return s & RCU_SEQ_STATE_MASK;
  83}
  84
  85/*
  86 * Set the state portion of the pointed-to sequence number.
  87 * The caller is responsible for preventing conflicting updates.
  88 */
  89static inline void rcu_seq_set_state(unsigned long *sp, int newstate)
  90{
  91        WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK);
  92        WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate);
  93}
  94
  95/* Adjust sequence number for start of update-side operation. */
  96static inline void rcu_seq_start(unsigned long *sp)
  97{
  98        WRITE_ONCE(*sp, *sp + 1);
  99        smp_mb(); /* Ensure update-side operation after counter increment. */
 100        WARN_ON_ONCE(rcu_seq_state(*sp) != 1);
 101}
 102
 103/* Adjust sequence number for end of update-side operation. */
 104static inline void rcu_seq_end(unsigned long *sp)
 105{
 106        smp_mb(); /* Ensure update-side operation before counter increment. */
 107        WARN_ON_ONCE(!rcu_seq_state(*sp));
 108        WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1);
 109}
 110
 111/* Take a snapshot of the update side's sequence number. */
 112static inline unsigned long rcu_seq_snap(unsigned long *sp)
 113{
 114        unsigned long s;
 115
 116        s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK;
 117        smp_mb(); /* Above access must not bleed into critical section. */
 118        return s;
 119}
 120
 121/* Return the current value the update side's sequence number, no ordering. */
 122static inline unsigned long rcu_seq_current(unsigned long *sp)
 123{
 124        return READ_ONCE(*sp);
 125}
 126
 127/*
 128 * Given a snapshot from rcu_seq_snap(), determine whether or not a
 129 * full update-side operation has occurred.
 130 */
 131static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
 132{
 133        return ULONG_CMP_GE(READ_ONCE(*sp), s);
 134}
 135
 136/*
 137 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
 138 * by call_rcu() and rcu callback execution, and are therefore not part of the
 139 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
 140 */
 141
 142#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
 143# define STATE_RCU_HEAD_READY   0
 144# define STATE_RCU_HEAD_QUEUED  1
 145
 146extern struct debug_obj_descr rcuhead_debug_descr;
 147
 148static inline int debug_rcu_head_queue(struct rcu_head *head)
 149{
 150        int r1;
 151
 152        r1 = debug_object_activate(head, &rcuhead_debug_descr);
 153        debug_object_active_state(head, &rcuhead_debug_descr,
 154                                  STATE_RCU_HEAD_READY,
 155                                  STATE_RCU_HEAD_QUEUED);
 156        return r1;
 157}
 158
 159static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 160{
 161        debug_object_active_state(head, &rcuhead_debug_descr,
 162                                  STATE_RCU_HEAD_QUEUED,
 163                                  STATE_RCU_HEAD_READY);
 164        debug_object_deactivate(head, &rcuhead_debug_descr);
 165}
 166#else   /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 167static inline int debug_rcu_head_queue(struct rcu_head *head)
 168{
 169        return 0;
 170}
 171
 172static inline void debug_rcu_head_unqueue(struct rcu_head *head)
 173{
 174}
 175#endif  /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 176
 177void kfree(const void *);
 178
 179/*
 180 * Reclaim the specified callback, either by invoking it (non-lazy case)
 181 * or freeing it directly (lazy case).  Return true if lazy, false otherwise.
 182 */
 183static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 184{
 185        unsigned long offset = (unsigned long)head->func;
 186
 187        rcu_lock_acquire(&rcu_callback_map);
 188        if (__is_kfree_rcu_offset(offset)) {
 189                RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);)
 190                kfree((void *)head - offset);
 191                rcu_lock_release(&rcu_callback_map);
 192                return true;
 193        } else {
 194                RCU_TRACE(trace_rcu_invoke_callback(rn, head);)
 195                head->func(head);
 196                rcu_lock_release(&rcu_callback_map);
 197                return false;
 198        }
 199}
 200
 201#ifdef CONFIG_RCU_STALL_COMMON
 202
 203extern int rcu_cpu_stall_suppress;
 204int rcu_jiffies_till_stall_check(void);
 205
 206#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
 207
 208/*
 209 * Strings used in tracepoints need to be exported via the
 210 * tracing system such that tools like perf and trace-cmd can
 211 * translate the string address pointers to actual text.
 212 */
 213#define TPS(x)  tracepoint_string(x)
 214
 215/*
 216 * Dump the ftrace buffer, but only one time per callsite per boot.
 217 */
 218#define rcu_ftrace_dump(oops_dump_mode) \
 219do { \
 220        static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
 221        \
 222        if (!atomic_read(&___rfd_beenhere) && \
 223            !atomic_xchg(&___rfd_beenhere, 1)) \
 224                ftrace_dump(oops_dump_mode); \
 225} while (0)
 226
 227void rcu_early_boot_tests(void);
 228void rcu_test_sync_prims(void);
 229
 230/*
 231 * This function really isn't for public consumption, but RCU is special in
 232 * that context switches can allow the state machine to make progress.
 233 */
 234extern void resched_cpu(int cpu);
 235
 236#if defined(SRCU) || !defined(TINY_RCU)
 237
 238#include <linux/rcu_node_tree.h>
 239
 240extern int rcu_num_lvls;
 241extern int num_rcu_lvl[];
 242extern int rcu_num_nodes;
 243static bool rcu_fanout_exact;
 244static int rcu_fanout_leaf;
 245
 246/*
 247 * Compute the per-level fanout, either using the exact fanout specified
 248 * or balancing the tree, depending on the rcu_fanout_exact boot parameter.
 249 */
 250static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 251{
 252        int i;
 253
 254        if (rcu_fanout_exact) {
 255                levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
 256                for (i = rcu_num_lvls - 2; i >= 0; i--)
 257                        levelspread[i] = RCU_FANOUT;
 258        } else {
 259                int ccur;
 260                int cprv;
 261
 262                cprv = nr_cpu_ids;
 263                for (i = rcu_num_lvls - 1; i >= 0; i--) {
 264                        ccur = levelcnt[i];
 265                        levelspread[i] = (cprv + ccur - 1) / ccur;
 266                        cprv = ccur;
 267                }
 268        }
 269}
 270
 271/*
 272 * Do a full breadth-first scan of the rcu_node structures for the
 273 * specified rcu_state structure.
 274 */
 275#define rcu_for_each_node_breadth_first(rsp, rnp) \
 276        for ((rnp) = &(rsp)->node[0]; \
 277             (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 278
 279/*
 280 * Do a breadth-first scan of the non-leaf rcu_node structures for the
 281 * specified rcu_state structure.  Note that if there is a singleton
 282 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
 283 */
 284#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
 285        for ((rnp) = &(rsp)->node[0]; \
 286             (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
 287
 288/*
 289 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
 290 * structure.  Note that if there is a singleton rcu_node tree with but
 291 * one rcu_node structure, this loop -will- visit the rcu_node structure.
 292 * It is still a leaf node, even if it is also the root node.
 293 */
 294#define rcu_for_each_leaf_node(rsp, rnp) \
 295        for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
 296             (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 297
 298/*
 299 * Iterate over all possible CPUs in a leaf RCU node.
 300 */
 301#define for_each_leaf_node_possible_cpu(rnp, cpu) \
 302        for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
 303             cpu <= rnp->grphi; \
 304             cpu = cpumask_next((cpu), cpu_possible_mask))
 305
 306/*
 307 * Wrappers for the rcu_node::lock acquire and release.
 308 *
 309 * Because the rcu_nodes form a tree, the tree traversal locking will observe
 310 * different lock values, this in turn means that an UNLOCK of one level
 311 * followed by a LOCK of another level does not imply a full memory barrier;
 312 * and most importantly transitivity is lost.
 313 *
 314 * In order to restore full ordering between tree levels, augment the regular
 315 * lock acquire functions with smp_mb__after_unlock_lock().
 316 *
 317 * As ->lock of struct rcu_node is a __private field, therefore one should use
 318 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
 319 */
 320#define raw_spin_lock_rcu_node(p)                                       \
 321do {                                                                    \
 322        raw_spin_lock(&ACCESS_PRIVATE(p, lock));                        \
 323        smp_mb__after_unlock_lock();                                    \
 324} while (0)
 325
 326#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
 327
 328#define raw_spin_lock_irq_rcu_node(p)                                   \
 329do {                                                                    \
 330        raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock));                    \
 331        smp_mb__after_unlock_lock();                                    \
 332} while (0)
 333
 334#define raw_spin_unlock_irq_rcu_node(p)                                 \
 335        raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
 336
 337#define raw_spin_lock_irqsave_rcu_node(p, flags)                        \
 338do {                                                                    \
 339        raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
 340        smp_mb__after_unlock_lock();                                    \
 341} while (0)
 342
 343#define raw_spin_unlock_irqrestore_rcu_node(p, flags)                   \
 344        raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags)     \
 345
 346#define raw_spin_trylock_rcu_node(p)                                    \
 347({                                                                      \
 348        bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock));    \
 349                                                                        \
 350        if (___locked)                                                  \
 351                smp_mb__after_unlock_lock();                            \
 352        ___locked;                                                      \
 353})
 354
 355#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 356
 357#ifdef CONFIG_TINY_RCU
 358/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 359static inline bool rcu_gp_is_normal(void) { return true; }
 360static inline bool rcu_gp_is_expedited(void) { return false; }
 361static inline void rcu_expedite_gp(void) { }
 362static inline void rcu_unexpedite_gp(void) { }
 363#else /* #ifdef CONFIG_TINY_RCU */
 364bool rcu_gp_is_normal(void);     /* Internal RCU use. */
 365bool rcu_gp_is_expedited(void);  /* Internal RCU use. */
 366void rcu_expedite_gp(void);
 367void rcu_unexpedite_gp(void);
 368void rcupdate_announce_bootup_oddness(void);
 369#endif /* #else #ifdef CONFIG_TINY_RCU */
 370
 371#define RCU_SCHEDULER_INACTIVE  0
 372#define RCU_SCHEDULER_INIT      1
 373#define RCU_SCHEDULER_RUNNING   2
 374
 375#ifdef CONFIG_TINY_RCU
 376static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
 377#else /* #ifdef CONFIG_TINY_RCU */
 378void rcu_request_urgent_qs_task(struct task_struct *t);
 379#endif /* #else #ifdef CONFIG_TINY_RCU */
 380
 381enum rcutorture_type {
 382        RCU_FLAVOR,
 383        RCU_BH_FLAVOR,
 384        RCU_SCHED_FLAVOR,
 385        RCU_TASKS_FLAVOR,
 386        SRCU_FLAVOR,
 387        INVALID_RCU_FLAVOR
 388};
 389
 390#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
 391void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 392                            unsigned long *gpnum, unsigned long *completed);
 393void rcutorture_record_test_transition(void);
 394void rcutorture_record_progress(unsigned long vernum);
 395void do_trace_rcu_torture_read(const char *rcutorturename,
 396                               struct rcu_head *rhp,
 397                               unsigned long secs,
 398                               unsigned long c_old,
 399                               unsigned long c);
 400#else
 401static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
 402                                          int *flags,
 403                                          unsigned long *gpnum,
 404                                          unsigned long *completed)
 405{
 406        *flags = 0;
 407        *gpnum = 0;
 408        *completed = 0;
 409}
 410static inline void rcutorture_record_test_transition(void) { }
 411static inline void rcutorture_record_progress(unsigned long vernum) { }
 412#ifdef CONFIG_RCU_TRACE
 413void do_trace_rcu_torture_read(const char *rcutorturename,
 414                               struct rcu_head *rhp,
 415                               unsigned long secs,
 416                               unsigned long c_old,
 417                               unsigned long c);
 418#else
 419#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
 420        do { } while (0)
 421#endif
 422#endif
 423
 424#ifdef CONFIG_TINY_SRCU
 425
 426static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
 427                                           struct srcu_struct *sp, int *flags,
 428                                           unsigned long *gpnum,
 429                                           unsigned long *completed)
 430{
 431        if (test_type != SRCU_FLAVOR)
 432                return;
 433        *flags = 0;
 434        *completed = sp->srcu_idx;
 435        *gpnum = *completed;
 436}
 437
 438#elif defined(CONFIG_TREE_SRCU)
 439
 440void srcutorture_get_gp_data(enum rcutorture_type test_type,
 441                             struct srcu_struct *sp, int *flags,
 442                             unsigned long *gpnum, unsigned long *completed);
 443
 444#endif
 445
 446#ifdef CONFIG_TINY_RCU
 447static inline unsigned long rcu_batches_started(void) { return 0; }
 448static inline unsigned long rcu_batches_started_bh(void) { return 0; }
 449static inline unsigned long rcu_batches_started_sched(void) { return 0; }
 450static inline unsigned long rcu_batches_completed(void) { return 0; }
 451static inline unsigned long rcu_batches_completed_bh(void) { return 0; }
 452static inline unsigned long rcu_batches_completed_sched(void) { return 0; }
 453static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
 454static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; }
 455static inline unsigned long
 456srcu_batches_completed(struct srcu_struct *sp) { return 0; }
 457static inline void rcu_force_quiescent_state(void) { }
 458static inline void rcu_bh_force_quiescent_state(void) { }
 459static inline void rcu_sched_force_quiescent_state(void) { }
 460static inline void show_rcu_gp_kthreads(void) { }
 461#else /* #ifdef CONFIG_TINY_RCU */
 462extern unsigned long rcutorture_testseq;
 463extern unsigned long rcutorture_vernum;
 464unsigned long rcu_batches_started(void);
 465unsigned long rcu_batches_started_bh(void);
 466unsigned long rcu_batches_started_sched(void);
 467unsigned long rcu_batches_completed(void);
 468unsigned long rcu_batches_completed_bh(void);
 469unsigned long rcu_batches_completed_sched(void);
 470unsigned long rcu_exp_batches_completed(void);
 471unsigned long rcu_exp_batches_completed_sched(void);
 472unsigned long srcu_batches_completed(struct srcu_struct *sp);
 473void show_rcu_gp_kthreads(void);
 474void rcu_force_quiescent_state(void);
 475void rcu_bh_force_quiescent_state(void);
 476void rcu_sched_force_quiescent_state(void);
 477#endif /* #else #ifdef CONFIG_TINY_RCU */
 478
 479#ifdef CONFIG_RCU_NOCB_CPU
 480bool rcu_is_nocb_cpu(int cpu);
 481#else
 482static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
 483#endif
 484
 485#endif /* __LINUX_RCU_H */
 486