linux/kernel/rcutiny_plugin.h
<<
>>
Prefs
   1/*
   2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
   3 * Internal non-public definitions that provide either classic
   4 * or preemptible semantics.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19 *
  20 * Copyright (c) 2010 Linaro
  21 *
  22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  23 */
  24
  25#include <linux/kthread.h>
  26#include <linux/module.h>
  27#include <linux/debugfs.h>
  28#include <linux/seq_file.h>
  29
  30/* Global control variables for rcupdate callback mechanism. */
  31struct rcu_ctrlblk {
  32        struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
  33        struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
  34        struct rcu_head **curtail;      /* ->next pointer of last CB. */
  35        RCU_TRACE(long qlen);           /* Number of pending CBs. */
  36        RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
  37        RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
  38        RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
  39        RCU_TRACE(char *name);          /* Name of RCU type. */
  40};
  41
  42/* Definition for rcupdate control block. */
  43static struct rcu_ctrlblk rcu_sched_ctrlblk = {
  44        .donetail       = &rcu_sched_ctrlblk.rcucblist,
  45        .curtail        = &rcu_sched_ctrlblk.rcucblist,
  46        RCU_TRACE(.name = "rcu_sched")
  47};
  48
  49static struct rcu_ctrlblk rcu_bh_ctrlblk = {
  50        .donetail       = &rcu_bh_ctrlblk.rcucblist,
  51        .curtail        = &rcu_bh_ctrlblk.rcucblist,
  52        RCU_TRACE(.name = "rcu_bh")
  53};
  54
  55#ifdef CONFIG_DEBUG_LOCK_ALLOC
  56int rcu_scheduler_active __read_mostly;
  57EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  58#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  59
  60#ifdef CONFIG_RCU_TRACE
  61
  62static void check_cpu_stall(struct rcu_ctrlblk *rcp)
  63{
  64        unsigned long j;
  65        unsigned long js;
  66
  67        if (rcu_cpu_stall_suppress)
  68                return;
  69        rcp->ticks_this_gp++;
  70        j = jiffies;
  71        js = rcp->jiffies_stall;
  72        if (*rcp->curtail && ULONG_CMP_GE(j, js)) {
  73                pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
  74                       rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting,
  75                       jiffies - rcp->gp_start, rcp->qlen);
  76                dump_stack();
  77        }
  78        if (*rcp->curtail && ULONG_CMP_GE(j, js))
  79                rcp->jiffies_stall = jiffies +
  80                        3 * rcu_jiffies_till_stall_check() + 3;
  81        else if (ULONG_CMP_GE(j, js))
  82                rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
  83}
  84
  85static void check_cpu_stall_preempt(void);
  86
  87#endif /* #ifdef CONFIG_RCU_TRACE */
  88
  89static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
  90{
  91#ifdef CONFIG_RCU_TRACE
  92        rcp->ticks_this_gp = 0;
  93        rcp->gp_start = jiffies;
  94        rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
  95#endif /* #ifdef CONFIG_RCU_TRACE */
  96}
  97
  98static void check_cpu_stalls(void)
  99{
 100        RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk));
 101        RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk));
 102        RCU_TRACE(check_cpu_stall_preempt());
 103}
 104
 105#ifdef CONFIG_TINY_PREEMPT_RCU
 106
 107#include <linux/delay.h>
 108
 109/* Global control variables for preemptible RCU. */
 110struct rcu_preempt_ctrlblk {
 111        struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
 112        struct rcu_head **nexttail;
 113                                /* Tasks blocked in a preemptible RCU */
 114                                /*  read-side critical section while an */
 115                                /*  preemptible-RCU grace period is in */
 116                                /*  progress must wait for a later grace */
 117                                /*  period.  This pointer points to the */
 118                                /*  ->next pointer of the last task that */
 119                                /*  must wait for a later grace period, or */
 120                                /*  to &->rcb.rcucblist if there is no */
 121                                /*  such task. */
 122        struct list_head blkd_tasks;
 123                                /* Tasks blocked in RCU read-side critical */
 124                                /*  section.  Tasks are placed at the head */
 125                                /*  of this list and age towards the tail. */
 126        struct list_head *gp_tasks;
 127                                /* Pointer to the first task blocking the */
 128                                /*  current grace period, or NULL if there */
 129                                /*  is no such task. */
 130        struct list_head *exp_tasks;
 131                                /* Pointer to first task blocking the */
 132                                /*  current expedited grace period, or NULL */
 133                                /*  if there is no such task.  If there */
 134                                /*  is no current expedited grace period, */
 135                                /*  then there cannot be any such task. */
 136#ifdef CONFIG_RCU_BOOST
 137        struct list_head *boost_tasks;
 138                                /* Pointer to first task that needs to be */
 139                                /*  priority-boosted, or NULL if no priority */
 140                                /*  boosting is needed.  If there is no */
 141                                /*  current or expedited grace period, there */
 142                                /*  can be no such task. */
 143#endif /* #ifdef CONFIG_RCU_BOOST */
 144        u8 gpnum;               /* Current grace period. */
 145        u8 gpcpu;               /* Last grace period blocked by the CPU. */
 146        u8 completed;           /* Last grace period completed. */
 147                                /*  If all three are equal, RCU is idle. */
 148#ifdef CONFIG_RCU_BOOST
 149        unsigned long boost_time; /* When to start boosting (jiffies) */
 150#endif /* #ifdef CONFIG_RCU_BOOST */
 151#ifdef CONFIG_RCU_TRACE
 152        unsigned long n_grace_periods;
 153#ifdef CONFIG_RCU_BOOST
 154        unsigned long n_tasks_boosted;
 155                                /* Total number of tasks boosted. */
 156        unsigned long n_exp_boosts;
 157                                /* Number of tasks boosted for expedited GP. */
 158        unsigned long n_normal_boosts;
 159                                /* Number of tasks boosted for normal GP. */
 160        unsigned long n_balk_blkd_tasks;
 161                                /* Refused to boost: no blocked tasks. */
 162        unsigned long n_balk_exp_gp_tasks;
 163                                /* Refused to boost: nothing blocking GP. */
 164        unsigned long n_balk_boost_tasks;
 165                                /* Refused to boost: already boosting. */
 166        unsigned long n_balk_notyet;
 167                                /* Refused to boost: not yet time. */
 168        unsigned long n_balk_nos;
 169                                /* Refused to boost: not sure why, though. */
 170                                /*  This can happen due to race conditions. */
 171#endif /* #ifdef CONFIG_RCU_BOOST */
 172#endif /* #ifdef CONFIG_RCU_TRACE */
 173};
 174
 175static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
 176        .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
 177        .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
 178        .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
 179        .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
 180        RCU_TRACE(.rcb.name = "rcu_preempt")
 181};
 182
 183static int rcu_preempted_readers_exp(void);
 184static void rcu_report_exp_done(void);
 185
 186/*
 187 * Return true if the CPU has not yet responded to the current grace period.
 188 */
 189static int rcu_cpu_blocking_cur_gp(void)
 190{
 191        return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
 192}
 193
 194/*
 195 * Check for a running RCU reader.  Because there is only one CPU,
 196 * there can be but one running RCU reader at a time.  ;-)
 197 *
 198 * Returns zero if there are no running readers.  Returns a positive
 199 * number if there is at least one reader within its RCU read-side
 200 * critical section.  Returns a negative number if an outermost reader
 201 * is in the midst of exiting from its RCU read-side critical section
 202 *
 203 * Returns zero if there are no running readers.  Returns a positive
 204 * number if there is at least one reader within its RCU read-side
 205 * critical section.  Returns a negative number if an outermost reader
 206 * is in the midst of exiting from its RCU read-side critical section.
 207 */
 208static int rcu_preempt_running_reader(void)
 209{
 210        return current->rcu_read_lock_nesting;
 211}
 212
 213/*
 214 * Check for preempted RCU readers blocking any grace period.
 215 * If the caller needs a reliable answer, it must disable hard irqs.
 216 */
 217static int rcu_preempt_blocked_readers_any(void)
 218{
 219        return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
 220}
 221
 222/*
 223 * Check for preempted RCU readers blocking the current grace period.
 224 * If the caller needs a reliable answer, it must disable hard irqs.
 225 */
 226static int rcu_preempt_blocked_readers_cgp(void)
 227{
 228        return rcu_preempt_ctrlblk.gp_tasks != NULL;
 229}
 230
 231/*
 232 * Return true if another preemptible-RCU grace period is needed.
 233 */
 234static int rcu_preempt_needs_another_gp(void)
 235{
 236        return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
 237}
 238
 239/*
 240 * Return true if a preemptible-RCU grace period is in progress.
 241 * The caller must disable hardirqs.
 242 */
 243static int rcu_preempt_gp_in_progress(void)
 244{
 245        return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
 246}
 247
 248/*
 249 * Advance a ->blkd_tasks-list pointer to the next entry, instead
 250 * returning NULL if at the end of the list.
 251 */
 252static struct list_head *rcu_next_node_entry(struct task_struct *t)
 253{
 254        struct list_head *np;
 255
 256        np = t->rcu_node_entry.next;
 257        if (np == &rcu_preempt_ctrlblk.blkd_tasks)
 258                np = NULL;
 259        return np;
 260}
 261
 262#ifdef CONFIG_RCU_TRACE
 263
 264#ifdef CONFIG_RCU_BOOST
 265static void rcu_initiate_boost_trace(void);
 266#endif /* #ifdef CONFIG_RCU_BOOST */
 267
 268/*
 269 * Dump additional statistice for TINY_PREEMPT_RCU.
 270 */
 271static void show_tiny_preempt_stats(struct seq_file *m)
 272{
 273        seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
 274                   rcu_preempt_ctrlblk.rcb.qlen,
 275                   rcu_preempt_ctrlblk.n_grace_periods,
 276                   rcu_preempt_ctrlblk.gpnum,
 277                   rcu_preempt_ctrlblk.gpcpu,
 278                   rcu_preempt_ctrlblk.completed,
 279                   "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
 280                   "N."[!rcu_preempt_ctrlblk.gp_tasks],
 281                   "E."[!rcu_preempt_ctrlblk.exp_tasks]);
 282#ifdef CONFIG_RCU_BOOST
 283        seq_printf(m, "%sttb=%c ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
 284                   "             ",
 285                   "B."[!rcu_preempt_ctrlblk.boost_tasks],
 286                   rcu_preempt_ctrlblk.n_tasks_boosted,
 287                   rcu_preempt_ctrlblk.n_exp_boosts,
 288                   rcu_preempt_ctrlblk.n_normal_boosts,
 289                   (int)(jiffies & 0xffff),
 290                   (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
 291        seq_printf(m, "%s: nt=%lu egt=%lu bt=%lu ny=%lu nos=%lu\n",
 292                   "             balk",
 293                   rcu_preempt_ctrlblk.n_balk_blkd_tasks,
 294                   rcu_preempt_ctrlblk.n_balk_exp_gp_tasks,
 295                   rcu_preempt_ctrlblk.n_balk_boost_tasks,
 296                   rcu_preempt_ctrlblk.n_balk_notyet,
 297                   rcu_preempt_ctrlblk.n_balk_nos);
 298#endif /* #ifdef CONFIG_RCU_BOOST */
 299}
 300
 301#endif /* #ifdef CONFIG_RCU_TRACE */
 302
 303#ifdef CONFIG_RCU_BOOST
 304
 305#include "rtmutex_common.h"
 306
 307#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
 308
 309/* Controls for rcu_kthread() kthread. */
 310static struct task_struct *rcu_kthread_task;
 311static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
 312static unsigned long have_rcu_kthread_work;
 313
 314/*
 315 * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
 316 * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
 317 */
 318static int rcu_boost(void)
 319{
 320        unsigned long flags;
 321        struct rt_mutex mtx;
 322        struct task_struct *t;
 323        struct list_head *tb;
 324
 325        if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
 326            rcu_preempt_ctrlblk.exp_tasks == NULL)
 327                return 0;  /* Nothing to boost. */
 328
 329        local_irq_save(flags);
 330
 331        /*
 332         * Recheck with irqs disabled: all tasks in need of boosting
 333         * might exit their RCU read-side critical sections on their own
 334         * if we are preempted just before disabling irqs.
 335         */
 336        if (rcu_preempt_ctrlblk.boost_tasks == NULL &&
 337            rcu_preempt_ctrlblk.exp_tasks == NULL) {
 338                local_irq_restore(flags);
 339                return 0;
 340        }
 341
 342        /*
 343         * Preferentially boost tasks blocking expedited grace periods.
 344         * This cannot starve the normal grace periods because a second
 345         * expedited grace period must boost all blocked tasks, including
 346         * those blocking the pre-existing normal grace period.
 347         */
 348        if (rcu_preempt_ctrlblk.exp_tasks != NULL) {
 349                tb = rcu_preempt_ctrlblk.exp_tasks;
 350                RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
 351        } else {
 352                tb = rcu_preempt_ctrlblk.boost_tasks;
 353                RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
 354        }
 355        RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
 356
 357        /*
 358         * We boost task t by manufacturing an rt_mutex that appears to
 359         * be held by task t.  We leave a pointer to that rt_mutex where
 360         * task t can find it, and task t will release the mutex when it
 361         * exits its outermost RCU read-side critical section.  Then
 362         * simply acquiring this artificial rt_mutex will boost task
 363         * t's priority.  (Thanks to tglx for suggesting this approach!)
 364         */
 365        t = container_of(tb, struct task_struct, rcu_node_entry);
 366        rt_mutex_init_proxy_locked(&mtx, t);
 367        t->rcu_boost_mutex = &mtx;
 368        local_irq_restore(flags);
 369        rt_mutex_lock(&mtx);
 370        rt_mutex_unlock(&mtx);  /* Keep lockdep happy. */
 371
 372        return ACCESS_ONCE(rcu_preempt_ctrlblk.boost_tasks) != NULL ||
 373               ACCESS_ONCE(rcu_preempt_ctrlblk.exp_tasks) != NULL;
 374}
 375
 376/*
 377 * Check to see if it is now time to start boosting RCU readers blocking
 378 * the current grace period, and, if so, tell the rcu_kthread_task to
 379 * start boosting them.  If there is an expedited boost in progress,
 380 * we wait for it to complete.
 381 *
 382 * If there are no blocked readers blocking the current grace period,
 383 * return 0 to let the caller know, otherwise return 1.  Note that this
 384 * return value is independent of whether or not boosting was done.
 385 */
 386static int rcu_initiate_boost(void)
 387{
 388        if (!rcu_preempt_blocked_readers_cgp() &&
 389            rcu_preempt_ctrlblk.exp_tasks == NULL) {
 390                RCU_TRACE(rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++);
 391                return 0;
 392        }
 393        if (rcu_preempt_ctrlblk.exp_tasks != NULL ||
 394            (rcu_preempt_ctrlblk.gp_tasks != NULL &&
 395             rcu_preempt_ctrlblk.boost_tasks == NULL &&
 396             ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))) {
 397                if (rcu_preempt_ctrlblk.exp_tasks == NULL)
 398                        rcu_preempt_ctrlblk.boost_tasks =
 399                                rcu_preempt_ctrlblk.gp_tasks;
 400                invoke_rcu_callbacks();
 401        } else {
 402                RCU_TRACE(rcu_initiate_boost_trace());
 403        }
 404        return 1;
 405}
 406
 407#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
 408
 409/*
 410 * Do priority-boost accounting for the start of a new grace period.
 411 */
 412static void rcu_preempt_boost_start_gp(void)
 413{
 414        rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
 415}
 416
 417#else /* #ifdef CONFIG_RCU_BOOST */
 418
 419/*
 420 * If there is no RCU priority boosting, we don't initiate boosting,
 421 * but we do indicate whether there are blocked readers blocking the
 422 * current grace period.
 423 */
 424static int rcu_initiate_boost(void)
 425{
 426        return rcu_preempt_blocked_readers_cgp();
 427}
 428
 429/*
 430 * If there is no RCU priority boosting, nothing to do at grace-period start.
 431 */
 432static void rcu_preempt_boost_start_gp(void)
 433{
 434}
 435
 436#endif /* else #ifdef CONFIG_RCU_BOOST */
 437
 438/*
 439 * Record a preemptible-RCU quiescent state for the specified CPU.  Note
 440 * that this just means that the task currently running on the CPU is
 441 * in a quiescent state.  There might be any number of tasks blocked
 442 * while in an RCU read-side critical section.
 443 *
 444 * Unlike the other rcu_*_qs() functions, callers to this function
 445 * must disable irqs in order to protect the assignment to
 446 * ->rcu_read_unlock_special.
 447 *
 448 * Because this is a single-CPU implementation, the only way a grace
 449 * period can end is if the CPU is in a quiescent state.  The reason is
 450 * that a blocked preemptible-RCU reader can exit its critical section
 451 * only if the CPU is running it at the time.  Therefore, when the
 452 * last task blocking the current grace period exits its RCU read-side
 453 * critical section, neither the CPU nor blocked tasks will be stopping
 454 * the current grace period.  (In contrast, SMP implementations
 455 * might have CPUs running in RCU read-side critical sections that
 456 * block later grace periods -- but this is not possible given only
 457 * one CPU.)
 458 */
 459static void rcu_preempt_cpu_qs(void)
 460{
 461        /* Record both CPU and task as having responded to current GP. */
 462        rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
 463        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 464
 465        /* If there is no GP then there is nothing more to do.  */
 466        if (!rcu_preempt_gp_in_progress())
 467                return;
 468        /*
 469         * Check up on boosting.  If there are readers blocking the
 470         * current grace period, leave.
 471         */
 472        if (rcu_initiate_boost())
 473                return;
 474
 475        /* Advance callbacks. */
 476        rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
 477        rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
 478        rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
 479
 480        /* If there are no blocked readers, next GP is done instantly. */
 481        if (!rcu_preempt_blocked_readers_any())
 482                rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
 483
 484        /* If there are done callbacks, cause them to be invoked. */
 485        if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
 486                invoke_rcu_callbacks();
 487}
 488
 489/*
 490 * Start a new RCU grace period if warranted.  Hard irqs must be disabled.
 491 */
 492static void rcu_preempt_start_gp(void)
 493{
 494        if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
 495
 496                /* Official start of GP. */
 497                rcu_preempt_ctrlblk.gpnum++;
 498                RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
 499                reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb);
 500
 501                /* Any blocked RCU readers block new GP. */
 502                if (rcu_preempt_blocked_readers_any())
 503                        rcu_preempt_ctrlblk.gp_tasks =
 504                                rcu_preempt_ctrlblk.blkd_tasks.next;
 505
 506                /* Set up for RCU priority boosting. */
 507                rcu_preempt_boost_start_gp();
 508
 509                /* If there is no running reader, CPU is done with GP. */
 510                if (!rcu_preempt_running_reader())
 511                        rcu_preempt_cpu_qs();
 512        }
 513}
 514
 515/*
 516 * We have entered the scheduler, and the current task might soon be
 517 * context-switched away from.  If this task is in an RCU read-side
 518 * critical section, we will no longer be able to rely on the CPU to
 519 * record that fact, so we enqueue the task on the blkd_tasks list.
 520 * If the task started after the current grace period began, as recorded
 521 * by ->gpcpu, we enqueue at the beginning of the list.  Otherwise
 522 * before the element referenced by ->gp_tasks (or at the tail if
 523 * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
 524 * The task will dequeue itself when it exits the outermost enclosing
 525 * RCU read-side critical section.  Therefore, the current grace period
 526 * cannot be permitted to complete until the ->gp_tasks pointer becomes
 527 * NULL.
 528 *
 529 * Caller must disable preemption.
 530 */
 531void rcu_preempt_note_context_switch(void)
 532{
 533        struct task_struct *t = current;
 534        unsigned long flags;
 535
 536        local_irq_save(flags); /* must exclude scheduler_tick(). */
 537        if (rcu_preempt_running_reader() > 0 &&
 538            (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 539
 540                /* Possibly blocking in an RCU read-side critical section. */
 541                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 542
 543                /*
 544                 * If this CPU has already checked in, then this task
 545                 * will hold up the next grace period rather than the
 546                 * current grace period.  Queue the task accordingly.
 547                 * If the task is queued for the current grace period
 548                 * (i.e., this CPU has not yet passed through a quiescent
 549                 * state for the current grace period), then as long
 550                 * as that task remains queued, the current grace period
 551                 * cannot end.
 552                 */
 553                list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
 554                if (rcu_cpu_blocking_cur_gp())
 555                        rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
 556        } else if (rcu_preempt_running_reader() < 0 &&
 557                   t->rcu_read_unlock_special) {
 558                /*
 559                 * Complete exit from RCU read-side critical section on
 560                 * behalf of preempted instance of __rcu_read_unlock().
 561                 */
 562                rcu_read_unlock_special(t);
 563        }
 564
 565        /*
 566         * Either we were not in an RCU read-side critical section to
 567         * begin with, or we have now recorded that critical section
 568         * globally.  Either way, we can now note a quiescent state
 569         * for this CPU.  Again, if we were in an RCU read-side critical
 570         * section, and if that critical section was blocking the current
 571         * grace period, then the fact that the task has been enqueued
 572         * means that current grace period continues to be blocked.
 573         */
 574        rcu_preempt_cpu_qs();
 575        local_irq_restore(flags);
 576}
 577
 578/*
 579 * Handle special cases during rcu_read_unlock(), such as needing to
 580 * notify RCU core processing or task having blocked during the RCU
 581 * read-side critical section.
 582 */
 583void rcu_read_unlock_special(struct task_struct *t)
 584{
 585        int empty;
 586        int empty_exp;
 587        unsigned long flags;
 588        struct list_head *np;
 589#ifdef CONFIG_RCU_BOOST
 590        struct rt_mutex *rbmp = NULL;
 591#endif /* #ifdef CONFIG_RCU_BOOST */
 592        int special;
 593
 594        /*
 595         * NMI handlers cannot block and cannot safely manipulate state.
 596         * They therefore cannot possibly be special, so just leave.
 597         */
 598        if (in_nmi())
 599                return;
 600
 601        local_irq_save(flags);
 602
 603        /*
 604         * If RCU core is waiting for this CPU to exit critical section,
 605         * let it know that we have done so.
 606         */
 607        special = t->rcu_read_unlock_special;
 608        if (special & RCU_READ_UNLOCK_NEED_QS)
 609                rcu_preempt_cpu_qs();
 610
 611        /* Hardware IRQ handlers cannot block. */
 612        if (in_irq() || in_serving_softirq()) {
 613                local_irq_restore(flags);
 614                return;
 615        }
 616
 617        /* Clean up if blocked during RCU read-side critical section. */
 618        if (special & RCU_READ_UNLOCK_BLOCKED) {
 619                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 620
 621                /*
 622                 * Remove this task from the ->blkd_tasks list and adjust
 623                 * any pointers that might have been referencing it.
 624                 */
 625                empty = !rcu_preempt_blocked_readers_cgp();
 626                empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
 627                np = rcu_next_node_entry(t);
 628                list_del_init(&t->rcu_node_entry);
 629                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
 630                        rcu_preempt_ctrlblk.gp_tasks = np;
 631                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
 632                        rcu_preempt_ctrlblk.exp_tasks = np;
 633#ifdef CONFIG_RCU_BOOST
 634                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
 635                        rcu_preempt_ctrlblk.boost_tasks = np;
 636#endif /* #ifdef CONFIG_RCU_BOOST */
 637
 638                /*
 639                 * If this was the last task on the current list, and if
 640                 * we aren't waiting on the CPU, report the quiescent state
 641                 * and start a new grace period if needed.
 642                 */
 643                if (!empty && !rcu_preempt_blocked_readers_cgp()) {
 644                        rcu_preempt_cpu_qs();
 645                        rcu_preempt_start_gp();
 646                }
 647
 648                /*
 649                 * If this was the last task on the expedited lists,
 650                 * then we need wake up the waiting task.
 651                 */
 652                if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
 653                        rcu_report_exp_done();
 654        }
 655#ifdef CONFIG_RCU_BOOST
 656        /* Unboost self if was boosted. */
 657        if (t->rcu_boost_mutex != NULL) {
 658                rbmp = t->rcu_boost_mutex;
 659                t->rcu_boost_mutex = NULL;
 660                rt_mutex_unlock(rbmp);
 661        }
 662#endif /* #ifdef CONFIG_RCU_BOOST */
 663        local_irq_restore(flags);
 664}
 665
 666/*
 667 * Check for a quiescent state from the current CPU.  When a task blocks,
 668 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
 669 * checked elsewhere.  This is called from the scheduling-clock interrupt.
 670 *
 671 * Caller must disable hard irqs.
 672 */
 673static void rcu_preempt_check_callbacks(void)
 674{
 675        struct task_struct *t = current;
 676
 677        if (rcu_preempt_gp_in_progress() &&
 678            (!rcu_preempt_running_reader() ||
 679             !rcu_cpu_blocking_cur_gp()))
 680                rcu_preempt_cpu_qs();
 681        if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
 682            rcu_preempt_ctrlblk.rcb.donetail)
 683                invoke_rcu_callbacks();
 684        if (rcu_preempt_gp_in_progress() &&
 685            rcu_cpu_blocking_cur_gp() &&
 686            rcu_preempt_running_reader() > 0)
 687                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 688}
 689
 690/*
 691 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
 692 * update, so this is invoked from rcu_process_callbacks() to
 693 * handle that case.  Of course, it is invoked for all flavors of
 694 * RCU, but RCU callbacks can appear only on one of the lists, and
 695 * neither ->nexttail nor ->donetail can possibly be NULL, so there
 696 * is no need for an explicit check.
 697 */
 698static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
 699{
 700        if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
 701                rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
 702}
 703
 704/*
 705 * Process callbacks for preemptible RCU.
 706 */
 707static void rcu_preempt_process_callbacks(void)
 708{
 709        __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
 710}
 711
 712/*
 713 * Queue a preemptible -RCU callback for invocation after a grace period.
 714 */
 715void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 716{
 717        unsigned long flags;
 718
 719        debug_rcu_head_queue(head);
 720        head->func = func;
 721        head->next = NULL;
 722
 723        local_irq_save(flags);
 724        *rcu_preempt_ctrlblk.nexttail = head;
 725        rcu_preempt_ctrlblk.nexttail = &head->next;
 726        RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
 727        rcu_preempt_start_gp();  /* checks to see if GP needed. */
 728        local_irq_restore(flags);
 729}
 730EXPORT_SYMBOL_GPL(call_rcu);
 731
 732/*
 733 * synchronize_rcu - wait until a grace period has elapsed.
 734 *
 735 * Control will return to the caller some time after a full grace
 736 * period has elapsed, in other words after all currently executing RCU
 737 * read-side critical sections have completed.  RCU read-side critical
 738 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 739 * and may be nested.
 740 */
 741void synchronize_rcu(void)
 742{
 743        rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
 744                           !lock_is_held(&rcu_lock_map) &&
 745                           !lock_is_held(&rcu_sched_lock_map),
 746                           "Illegal synchronize_rcu() in RCU read-side critical section");
 747
 748#ifdef CONFIG_DEBUG_LOCK_ALLOC
 749        if (!rcu_scheduler_active)
 750                return;
 751#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 752
 753        WARN_ON_ONCE(rcu_preempt_running_reader());
 754        if (!rcu_preempt_blocked_readers_any())
 755                return;
 756
 757        /* Once we get past the fastpath checks, same code as rcu_barrier(). */
 758        if (rcu_expedited)
 759                synchronize_rcu_expedited();
 760        else
 761                rcu_barrier();
 762}
 763EXPORT_SYMBOL_GPL(synchronize_rcu);
 764
 765static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 766static unsigned long sync_rcu_preempt_exp_count;
 767static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 768
 769/*
 770 * Return non-zero if there are any tasks in RCU read-side critical
 771 * sections blocking the current preemptible-RCU expedited grace period.
 772 * If there is no preemptible-RCU expedited grace period currently in
 773 * progress, returns zero unconditionally.
 774 */
 775static int rcu_preempted_readers_exp(void)
 776{
 777        return rcu_preempt_ctrlblk.exp_tasks != NULL;
 778}
 779
 780/*
 781 * Report the exit from RCU read-side critical section for the last task
 782 * that queued itself during or before the current expedited preemptible-RCU
 783 * grace period.
 784 */
 785static void rcu_report_exp_done(void)
 786{
 787        wake_up(&sync_rcu_preempt_exp_wq);
 788}
 789
 790/*
 791 * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
 792 * is to rely in the fact that there is but one CPU, and that it is
 793 * illegal for a task to invoke synchronize_rcu_expedited() while in a
 794 * preemptible-RCU read-side critical section.  Therefore, any such
 795 * critical sections must correspond to blocked tasks, which must therefore
 796 * be on the ->blkd_tasks list.  So just record the current head of the
 797 * list in the ->exp_tasks pointer, and wait for all tasks including and
 798 * after the task pointed to by ->exp_tasks to drain.
 799 */
 800void synchronize_rcu_expedited(void)
 801{
 802        unsigned long flags;
 803        struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
 804        unsigned long snap;
 805
 806        barrier(); /* ensure prior action seen before grace period. */
 807
 808        WARN_ON_ONCE(rcu_preempt_running_reader());
 809
 810        /*
 811         * Acquire lock so that there is only one preemptible RCU grace
 812         * period in flight.  Of course, if someone does the expedited
 813         * grace period for us while we are acquiring the lock, just leave.
 814         */
 815        snap = sync_rcu_preempt_exp_count + 1;
 816        mutex_lock(&sync_rcu_preempt_exp_mutex);
 817        if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
 818                goto unlock_mb_ret; /* Others did our work for us. */
 819
 820        local_irq_save(flags);
 821
 822        /*
 823         * All RCU readers have to already be on blkd_tasks because
 824         * we cannot legally be executing in an RCU read-side critical
 825         * section.
 826         */
 827
 828        /* Snapshot current head of ->blkd_tasks list. */
 829        rpcp->exp_tasks = rpcp->blkd_tasks.next;
 830        if (rpcp->exp_tasks == &rpcp->blkd_tasks)
 831                rpcp->exp_tasks = NULL;
 832
 833        /* Wait for tail of ->blkd_tasks list to drain. */
 834        if (!rcu_preempted_readers_exp()) {
 835                local_irq_restore(flags);
 836        } else {
 837                rcu_initiate_boost();
 838                local_irq_restore(flags);
 839                wait_event(sync_rcu_preempt_exp_wq,
 840                           !rcu_preempted_readers_exp());
 841        }
 842
 843        /* Clean up and exit. */
 844        barrier(); /* ensure expedited GP seen before counter increment. */
 845        sync_rcu_preempt_exp_count++;
 846unlock_mb_ret:
 847        mutex_unlock(&sync_rcu_preempt_exp_mutex);
 848        barrier(); /* ensure subsequent action seen after grace period. */
 849}
 850EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 851
 852/*
 853 * Does preemptible RCU need the CPU to stay out of dynticks mode?
 854 */
 855int rcu_preempt_needs_cpu(void)
 856{
 857        return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
 858}
 859
 860#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 861
 862#ifdef CONFIG_RCU_TRACE
 863
 864/*
 865 * Because preemptible RCU does not exist, it is not necessary to
 866 * dump out its statistics.
 867 */
 868static void show_tiny_preempt_stats(struct seq_file *m)
 869{
 870}
 871
 872#endif /* #ifdef CONFIG_RCU_TRACE */
 873
 874/*
 875 * Because preemptible RCU does not exist, it never has any callbacks
 876 * to check.
 877 */
 878static void rcu_preempt_check_callbacks(void)
 879{
 880}
 881
 882/*
 883 * Because preemptible RCU does not exist, it never has any callbacks
 884 * to remove.
 885 */
 886static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
 887{
 888}
 889
 890/*
 891 * Because preemptible RCU does not exist, it never has any callbacks
 892 * to process.
 893 */
 894static void rcu_preempt_process_callbacks(void)
 895{
 896}
 897
 898#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
 899
 900#ifdef CONFIG_RCU_BOOST
 901
 902/*
 903 * Wake up rcu_kthread() to process callbacks now eligible for invocation
 904 * or to boost readers.
 905 */
 906static void invoke_rcu_callbacks(void)
 907{
 908        have_rcu_kthread_work = 1;
 909        if (rcu_kthread_task != NULL)
 910                wake_up(&rcu_kthread_wq);
 911}
 912
 913#ifdef CONFIG_RCU_TRACE
 914
 915/*
 916 * Is the current CPU running the RCU-callbacks kthread?
 917 * Caller must have preemption disabled.
 918 */
 919static bool rcu_is_callbacks_kthread(void)
 920{
 921        return rcu_kthread_task == current;
 922}
 923
 924#endif /* #ifdef CONFIG_RCU_TRACE */
 925
 926/*
 927 * This kthread invokes RCU callbacks whose grace periods have
 928 * elapsed.  It is awakened as needed, and takes the place of the
 929 * RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
 930 * This is a kthread, but it is never stopped, at least not until
 931 * the system goes down.
 932 */
 933static int rcu_kthread(void *arg)
 934{
 935        unsigned long work;
 936        unsigned long morework;
 937        unsigned long flags;
 938
 939        for (;;) {
 940                wait_event_interruptible(rcu_kthread_wq,
 941                                         have_rcu_kthread_work != 0);
 942                morework = rcu_boost();
 943                local_irq_save(flags);
 944                work = have_rcu_kthread_work;
 945                have_rcu_kthread_work = morework;
 946                local_irq_restore(flags);
 947                if (work)
 948                        rcu_process_callbacks(NULL);
 949                schedule_timeout_interruptible(1); /* Leave CPU for others. */
 950        }
 951
 952        return 0;  /* Not reached, but needed to shut gcc up. */
 953}
 954
 955/*
 956 * Spawn the kthread that invokes RCU callbacks.
 957 */
 958static int __init rcu_spawn_kthreads(void)
 959{
 960        struct sched_param sp;
 961
 962        rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
 963        sp.sched_priority = RCU_BOOST_PRIO;
 964        sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
 965        return 0;
 966}
 967early_initcall(rcu_spawn_kthreads);
 968
 969#else /* #ifdef CONFIG_RCU_BOOST */
 970
 971/* Hold off callback invocation until early_initcall() time. */
 972static int rcu_scheduler_fully_active __read_mostly;
 973
 974/*
 975 * Start up softirq processing of callbacks.
 976 */
 977void invoke_rcu_callbacks(void)
 978{
 979        if (rcu_scheduler_fully_active)
 980                raise_softirq(RCU_SOFTIRQ);
 981}
 982
 983#ifdef CONFIG_RCU_TRACE
 984
 985/*
 986 * There is no callback kthread, so this thread is never it.
 987 */
 988static bool rcu_is_callbacks_kthread(void)
 989{
 990        return false;
 991}
 992
 993#endif /* #ifdef CONFIG_RCU_TRACE */
 994
 995static int __init rcu_scheduler_really_started(void)
 996{
 997        rcu_scheduler_fully_active = 1;
 998        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 999        raise_softirq(RCU_SOFTIRQ);  /* Invoke any callbacks from early boot. */
1000        return 0;
1001}
1002early_initcall(rcu_scheduler_really_started);
1003
1004#endif /* #else #ifdef CONFIG_RCU_BOOST */
1005
1006#ifdef CONFIG_DEBUG_LOCK_ALLOC
1007#include <linux/kernel_stat.h>
1008
1009/*
1010 * During boot, we forgive RCU lockdep issues.  After this function is
1011 * invoked, we start taking RCU lockdep issues seriously.
1012 */
1013void __init rcu_scheduler_starting(void)
1014{
1015        WARN_ON(nr_context_switches() > 0);
1016        rcu_scheduler_active = 1;
1017}
1018
1019#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
1020
1021#ifdef CONFIG_RCU_TRACE
1022
1023#ifdef CONFIG_RCU_BOOST
1024
1025static void rcu_initiate_boost_trace(void)
1026{
1027        if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
1028                rcu_preempt_ctrlblk.n_balk_blkd_tasks++;
1029        else if (rcu_preempt_ctrlblk.gp_tasks == NULL &&
1030                 rcu_preempt_ctrlblk.exp_tasks == NULL)
1031                rcu_preempt_ctrlblk.n_balk_exp_gp_tasks++;
1032        else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
1033                rcu_preempt_ctrlblk.n_balk_boost_tasks++;
1034        else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
1035                rcu_preempt_ctrlblk.n_balk_notyet++;
1036        else
1037                rcu_preempt_ctrlblk.n_balk_nos++;
1038}
1039
1040#endif /* #ifdef CONFIG_RCU_BOOST */
1041
1042static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
1043{
1044        unsigned long flags;
1045
1046        local_irq_save(flags);
1047        rcp->qlen -= n;
1048        local_irq_restore(flags);
1049}
1050
1051/*
1052 * Dump statistics for TINY_RCU, such as they are.
1053 */
1054static int show_tiny_stats(struct seq_file *m, void *unused)
1055{
1056        show_tiny_preempt_stats(m);
1057        seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
1058        seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
1059        return 0;
1060}
1061
1062static int show_tiny_stats_open(struct inode *inode, struct file *file)
1063{
1064        return single_open(file, show_tiny_stats, NULL);
1065}
1066
1067static const struct file_operations show_tiny_stats_fops = {
1068        .owner = THIS_MODULE,
1069        .open = show_tiny_stats_open,
1070        .read = seq_read,
1071        .llseek = seq_lseek,
1072        .release = single_release,
1073};
1074
1075static struct dentry *rcudir;
1076
1077static int __init rcutiny_trace_init(void)
1078{
1079        struct dentry *retval;
1080
1081        rcudir = debugfs_create_dir("rcu", NULL);
1082        if (!rcudir)
1083                goto free_out;
1084        retval = debugfs_create_file("rcudata", 0444, rcudir,
1085                                     NULL, &show_tiny_stats_fops);
1086        if (!retval)
1087                goto free_out;
1088        return 0;
1089free_out:
1090        debugfs_remove_recursive(rcudir);
1091        return 1;
1092}
1093
1094static void __exit rcutiny_trace_cleanup(void)
1095{
1096        debugfs_remove_recursive(rcudir);
1097}
1098
1099module_init(rcutiny_trace_init);
1100module_exit(rcutiny_trace_cleanup);
1101
1102MODULE_AUTHOR("Paul E. McKenney");
1103MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1104MODULE_LICENSE("GPL");
1105
1106static void check_cpu_stall_preempt(void)
1107{
1108#ifdef CONFIG_TINY_PREEMPT_RCU
1109        check_cpu_stall(&rcu_preempt_ctrlblk.rcb);
1110#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */
1111}
1112
1113#endif /* #ifdef CONFIG_RCU_TRACE */
1114