linux/kernel/rcutiny_plugin.h
<<
>>
Prefs
   1/*
   2 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
   3 * Internal non-public definitions that provide either classic
   4 * or preemptible semantics.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  19 *
  20 * Copyright (c) 2010 Linaro
  21 *
  22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  23 */
  24
  25#include <linux/kthread.h>
  26#include <linux/debugfs.h>
  27#include <linux/seq_file.h>
  28
  29#ifdef CONFIG_RCU_TRACE
  30#define RCU_TRACE(stmt) stmt
  31#else /* #ifdef CONFIG_RCU_TRACE */
  32#define RCU_TRACE(stmt)
  33#endif /* #else #ifdef CONFIG_RCU_TRACE */
  34
  35/* Global control variables for rcupdate callback mechanism. */
  36struct rcu_ctrlblk {
  37        struct rcu_head *rcucblist;     /* List of pending callbacks (CBs). */
  38        struct rcu_head **donetail;     /* ->next pointer of last "done" CB. */
  39        struct rcu_head **curtail;      /* ->next pointer of last CB. */
  40        RCU_TRACE(long qlen);           /* Number of pending CBs. */
  41};
  42
  43/* Definition for rcupdate control block. */
  44static struct rcu_ctrlblk rcu_sched_ctrlblk = {
  45        .donetail       = &rcu_sched_ctrlblk.rcucblist,
  46        .curtail        = &rcu_sched_ctrlblk.rcucblist,
  47};
  48
  49static struct rcu_ctrlblk rcu_bh_ctrlblk = {
  50        .donetail       = &rcu_bh_ctrlblk.rcucblist,
  51        .curtail        = &rcu_bh_ctrlblk.rcucblist,
  52};
  53
  54#ifdef CONFIG_DEBUG_LOCK_ALLOC
  55int rcu_scheduler_active __read_mostly;
  56EXPORT_SYMBOL_GPL(rcu_scheduler_active);
  57#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
  58
  59#ifdef CONFIG_TINY_PREEMPT_RCU
  60
  61#include <linux/delay.h>
  62
  63/* Global control variables for preemptible RCU. */
  64struct rcu_preempt_ctrlblk {
  65        struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
  66        struct rcu_head **nexttail;
  67                                /* Tasks blocked in a preemptible RCU */
  68                                /*  read-side critical section while an */
  69                                /*  preemptible-RCU grace period is in */
  70                                /*  progress must wait for a later grace */
  71                                /*  period.  This pointer points to the */
  72                                /*  ->next pointer of the last task that */
  73                                /*  must wait for a later grace period, or */
  74                                /*  to &->rcb.rcucblist if there is no */
  75                                /*  such task. */
  76        struct list_head blkd_tasks;
  77                                /* Tasks blocked in RCU read-side critical */
  78                                /*  section.  Tasks are placed at the head */
  79                                /*  of this list and age towards the tail. */
  80        struct list_head *gp_tasks;
  81                                /* Pointer to the first task blocking the */
  82                                /*  current grace period, or NULL if there */
  83                                /*  is no such task. */
  84        struct list_head *exp_tasks;
  85                                /* Pointer to first task blocking the */
  86                                /*  current expedited grace period, or NULL */
  87                                /*  if there is no such task.  If there */
  88                                /*  is no current expedited grace period, */
  89                                /*  then there cannot be any such task. */
  90#ifdef CONFIG_RCU_BOOST
  91        struct list_head *boost_tasks;
  92                                /* Pointer to first task that needs to be */
  93                                /*  priority-boosted, or NULL if no priority */
  94                                /*  boosting is needed.  If there is no */
  95                                /*  current or expedited grace period, there */
  96                                /*  can be no such task. */
  97#endif /* #ifdef CONFIG_RCU_BOOST */
  98        u8 gpnum;               /* Current grace period. */
  99        u8 gpcpu;               /* Last grace period blocked by the CPU. */
 100        u8 completed;           /* Last grace period completed. */
 101                                /*  If all three are equal, RCU is idle. */
 102#ifdef CONFIG_RCU_BOOST
 103        s8 boosted_this_gp;     /* Has boosting already happened? */
 104        unsigned long boost_time; /* When to start boosting (jiffies) */
 105#endif /* #ifdef CONFIG_RCU_BOOST */
 106#ifdef CONFIG_RCU_TRACE
 107        unsigned long n_grace_periods;
 108#ifdef CONFIG_RCU_BOOST
 109        unsigned long n_tasks_boosted;
 110        unsigned long n_exp_boosts;
 111        unsigned long n_normal_boosts;
 112        unsigned long n_normal_balk_blkd_tasks;
 113        unsigned long n_normal_balk_gp_tasks;
 114        unsigned long n_normal_balk_boost_tasks;
 115        unsigned long n_normal_balk_boosted;
 116        unsigned long n_normal_balk_notyet;
 117        unsigned long n_normal_balk_nos;
 118        unsigned long n_exp_balk_blkd_tasks;
 119        unsigned long n_exp_balk_nos;
 120#endif /* #ifdef CONFIG_RCU_BOOST */
 121#endif /* #ifdef CONFIG_RCU_TRACE */
 122};
 123
 124static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
 125        .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
 126        .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
 127        .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
 128        .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
 129};
 130
 131static int rcu_preempted_readers_exp(void);
 132static void rcu_report_exp_done(void);
 133
 134/*
 135 * Return true if the CPU has not yet responded to the current grace period.
 136 */
 137static int rcu_cpu_blocking_cur_gp(void)
 138{
 139        return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
 140}
 141
 142/*
 143 * Check for a running RCU reader.  Because there is only one CPU,
 144 * there can be but one running RCU reader at a time.  ;-)
 145 */
 146static int rcu_preempt_running_reader(void)
 147{
 148        return current->rcu_read_lock_nesting;
 149}
 150
 151/*
 152 * Check for preempted RCU readers blocking any grace period.
 153 * If the caller needs a reliable answer, it must disable hard irqs.
 154 */
 155static int rcu_preempt_blocked_readers_any(void)
 156{
 157        return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
 158}
 159
 160/*
 161 * Check for preempted RCU readers blocking the current grace period.
 162 * If the caller needs a reliable answer, it must disable hard irqs.
 163 */
 164static int rcu_preempt_blocked_readers_cgp(void)
 165{
 166        return rcu_preempt_ctrlblk.gp_tasks != NULL;
 167}
 168
 169/*
 170 * Return true if another preemptible-RCU grace period is needed.
 171 */
 172static int rcu_preempt_needs_another_gp(void)
 173{
 174        return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
 175}
 176
 177/*
 178 * Return true if a preemptible-RCU grace period is in progress.
 179 * The caller must disable hardirqs.
 180 */
 181static int rcu_preempt_gp_in_progress(void)
 182{
 183        return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
 184}
 185
 186/*
 187 * Advance a ->blkd_tasks-list pointer to the next entry, instead
 188 * returning NULL if at the end of the list.
 189 */
 190static struct list_head *rcu_next_node_entry(struct task_struct *t)
 191{
 192        struct list_head *np;
 193
 194        np = t->rcu_node_entry.next;
 195        if (np == &rcu_preempt_ctrlblk.blkd_tasks)
 196                np = NULL;
 197        return np;
 198}
 199
 200#ifdef CONFIG_RCU_TRACE
 201
 202#ifdef CONFIG_RCU_BOOST
 203static void rcu_initiate_boost_trace(void);
 204static void rcu_initiate_exp_boost_trace(void);
 205#endif /* #ifdef CONFIG_RCU_BOOST */
 206
 207/*
 208 * Dump additional statistice for TINY_PREEMPT_RCU.
 209 */
 210static void show_tiny_preempt_stats(struct seq_file *m)
 211{
 212        seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n",
 213                   rcu_preempt_ctrlblk.rcb.qlen,
 214                   rcu_preempt_ctrlblk.n_grace_periods,
 215                   rcu_preempt_ctrlblk.gpnum,
 216                   rcu_preempt_ctrlblk.gpcpu,
 217                   rcu_preempt_ctrlblk.completed,
 218                   "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)],
 219                   "N."[!rcu_preempt_ctrlblk.gp_tasks],
 220                   "E."[!rcu_preempt_ctrlblk.exp_tasks]);
 221#ifdef CONFIG_RCU_BOOST
 222        seq_printf(m, "             ttb=%c btg=",
 223                   "B."[!rcu_preempt_ctrlblk.boost_tasks]);
 224        switch (rcu_preempt_ctrlblk.boosted_this_gp) {
 225        case -1:
 226                seq_puts(m, "exp");
 227                break;
 228        case 0:
 229                seq_puts(m, "no");
 230                break;
 231        case 1:
 232                seq_puts(m, "begun");
 233                break;
 234        case 2:
 235                seq_puts(m, "done");
 236                break;
 237        default:
 238                seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp);
 239        }
 240        seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n",
 241                   rcu_preempt_ctrlblk.n_tasks_boosted,
 242                   rcu_preempt_ctrlblk.n_exp_boosts,
 243                   rcu_preempt_ctrlblk.n_normal_boosts,
 244                   (int)(jiffies & 0xffff),
 245                   (int)(rcu_preempt_ctrlblk.boost_time & 0xffff));
 246        seq_printf(m, "             %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n",
 247                   "normal balk",
 248                   rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks,
 249                   rcu_preempt_ctrlblk.n_normal_balk_gp_tasks,
 250                   rcu_preempt_ctrlblk.n_normal_balk_boost_tasks,
 251                   rcu_preempt_ctrlblk.n_normal_balk_boosted,
 252                   rcu_preempt_ctrlblk.n_normal_balk_notyet,
 253                   rcu_preempt_ctrlblk.n_normal_balk_nos);
 254        seq_printf(m, "             exp balk: bt=%lu nos=%lu\n",
 255                   rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks,
 256                   rcu_preempt_ctrlblk.n_exp_balk_nos);
 257#endif /* #ifdef CONFIG_RCU_BOOST */
 258}
 259
 260#endif /* #ifdef CONFIG_RCU_TRACE */
 261
 262#ifdef CONFIG_RCU_BOOST
 263
 264#include "rtmutex_common.h"
 265
 266/*
 267 * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
 268 * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
 269 */
 270static int rcu_boost(void)
 271{
 272        unsigned long flags;
 273        struct rt_mutex mtx;
 274        struct list_head *np;
 275        struct task_struct *t;
 276
 277        if (rcu_preempt_ctrlblk.boost_tasks == NULL)
 278                return 0;  /* Nothing to boost. */
 279        raw_local_irq_save(flags);
 280        rcu_preempt_ctrlblk.boosted_this_gp++;
 281        t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
 282                         rcu_node_entry);
 283        np = rcu_next_node_entry(t);
 284        rt_mutex_init_proxy_locked(&mtx, t);
 285        t->rcu_boost_mutex = &mtx;
 286        t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
 287        raw_local_irq_restore(flags);
 288        rt_mutex_lock(&mtx);
 289        RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++);
 290        rcu_preempt_ctrlblk.boosted_this_gp++;
 291        rt_mutex_unlock(&mtx);
 292        return rcu_preempt_ctrlblk.boost_tasks != NULL;
 293}
 294
 295/*
 296 * Check to see if it is now time to start boosting RCU readers blocking
 297 * the current grace period, and, if so, tell the rcu_kthread_task to
 298 * start boosting them.  If there is an expedited boost in progress,
 299 * we wait for it to complete.
 300 *
 301 * If there are no blocked readers blocking the current grace period,
 302 * return 0 to let the caller know, otherwise return 1.  Note that this
 303 * return value is independent of whether or not boosting was done.
 304 */
 305static int rcu_initiate_boost(void)
 306{
 307        if (!rcu_preempt_blocked_readers_cgp()) {
 308                RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++);
 309                return 0;
 310        }
 311        if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
 312            rcu_preempt_ctrlblk.boost_tasks == NULL &&
 313            rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
 314            ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
 315                rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
 316                invoke_rcu_kthread();
 317                RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++);
 318        } else
 319                RCU_TRACE(rcu_initiate_boost_trace());
 320        return 1;
 321}
 322
 323/*
 324 * Initiate boosting for an expedited grace period.
 325 */
 326static void rcu_initiate_expedited_boost(void)
 327{
 328        unsigned long flags;
 329
 330        raw_local_irq_save(flags);
 331        if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
 332                rcu_preempt_ctrlblk.boost_tasks =
 333                        rcu_preempt_ctrlblk.blkd_tasks.next;
 334                rcu_preempt_ctrlblk.boosted_this_gp = -1;
 335                invoke_rcu_kthread();
 336                RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++);
 337        } else
 338                RCU_TRACE(rcu_initiate_exp_boost_trace());
 339        raw_local_irq_restore(flags);
 340}
 341
 342#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
 343
 344/*
 345 * Do priority-boost accounting for the start of a new grace period.
 346 */
 347static void rcu_preempt_boost_start_gp(void)
 348{
 349        rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
 350        if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
 351                rcu_preempt_ctrlblk.boosted_this_gp = 0;
 352}
 353
 354#else /* #ifdef CONFIG_RCU_BOOST */
 355
 356/*
 357 * If there is no RCU priority boosting, we don't boost.
 358 */
 359static int rcu_boost(void)
 360{
 361        return 0;
 362}
 363
 364/*
 365 * If there is no RCU priority boosting, we don't initiate boosting,
 366 * but we do indicate whether there are blocked readers blocking the
 367 * current grace period.
 368 */
 369static int rcu_initiate_boost(void)
 370{
 371        return rcu_preempt_blocked_readers_cgp();
 372}
 373
 374/*
 375 * If there is no RCU priority boosting, we don't initiate expedited boosting.
 376 */
 377static void rcu_initiate_expedited_boost(void)
 378{
 379}
 380
 381/*
 382 * If there is no RCU priority boosting, nothing to do at grace-period start.
 383 */
 384static void rcu_preempt_boost_start_gp(void)
 385{
 386}
 387
 388#endif /* else #ifdef CONFIG_RCU_BOOST */
 389
 390/*
 391 * Record a preemptible-RCU quiescent state for the specified CPU.  Note
 392 * that this just means that the task currently running on the CPU is
 393 * in a quiescent state.  There might be any number of tasks blocked
 394 * while in an RCU read-side critical section.
 395 *
 396 * Unlike the other rcu_*_qs() functions, callers to this function
 397 * must disable irqs in order to protect the assignment to
 398 * ->rcu_read_unlock_special.
 399 *
 400 * Because this is a single-CPU implementation, the only way a grace
 401 * period can end is if the CPU is in a quiescent state.  The reason is
 402 * that a blocked preemptible-RCU reader can exit its critical section
 403 * only if the CPU is running it at the time.  Therefore, when the
 404 * last task blocking the current grace period exits its RCU read-side
 405 * critical section, neither the CPU nor blocked tasks will be stopping
 406 * the current grace period.  (In contrast, SMP implementations
 407 * might have CPUs running in RCU read-side critical sections that
 408 * block later grace periods -- but this is not possible given only
 409 * one CPU.)
 410 */
 411static void rcu_preempt_cpu_qs(void)
 412{
 413        /* Record both CPU and task as having responded to current GP. */
 414        rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
 415        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 416
 417        /* If there is no GP then there is nothing more to do.  */
 418        if (!rcu_preempt_gp_in_progress())
 419                return;
 420        /*
 421         * Check up on boosting.  If there are no readers blocking the
 422         * current grace period, leave.
 423         */
 424        if (rcu_initiate_boost())
 425                return;
 426
 427        /* Advance callbacks. */
 428        rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
 429        rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
 430        rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
 431
 432        /* If there are no blocked readers, next GP is done instantly. */
 433        if (!rcu_preempt_blocked_readers_any())
 434                rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
 435
 436        /* If there are done callbacks, cause them to be invoked. */
 437        if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
 438                invoke_rcu_kthread();
 439}
 440
 441/*
 442 * Start a new RCU grace period if warranted.  Hard irqs must be disabled.
 443 */
 444static void rcu_preempt_start_gp(void)
 445{
 446        if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
 447
 448                /* Official start of GP. */
 449                rcu_preempt_ctrlblk.gpnum++;
 450                RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++);
 451
 452                /* Any blocked RCU readers block new GP. */
 453                if (rcu_preempt_blocked_readers_any())
 454                        rcu_preempt_ctrlblk.gp_tasks =
 455                                rcu_preempt_ctrlblk.blkd_tasks.next;
 456
 457                /* Set up for RCU priority boosting. */
 458                rcu_preempt_boost_start_gp();
 459
 460                /* If there is no running reader, CPU is done with GP. */
 461                if (!rcu_preempt_running_reader())
 462                        rcu_preempt_cpu_qs();
 463        }
 464}
 465
 466/*
 467 * We have entered the scheduler, and the current task might soon be
 468 * context-switched away from.  If this task is in an RCU read-side
 469 * critical section, we will no longer be able to rely on the CPU to
 470 * record that fact, so we enqueue the task on the blkd_tasks list.
 471 * If the task started after the current grace period began, as recorded
 472 * by ->gpcpu, we enqueue at the beginning of the list.  Otherwise
 473 * before the element referenced by ->gp_tasks (or at the tail if
 474 * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
 475 * The task will dequeue itself when it exits the outermost enclosing
 476 * RCU read-side critical section.  Therefore, the current grace period
 477 * cannot be permitted to complete until the ->gp_tasks pointer becomes
 478 * NULL.
 479 *
 480 * Caller must disable preemption.
 481 */
 482void rcu_preempt_note_context_switch(void)
 483{
 484        struct task_struct *t = current;
 485        unsigned long flags;
 486
 487        local_irq_save(flags); /* must exclude scheduler_tick(). */
 488        if (rcu_preempt_running_reader() &&
 489            (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
 490
 491                /* Possibly blocking in an RCU read-side critical section. */
 492                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 493
 494                /*
 495                 * If this CPU has already checked in, then this task
 496                 * will hold up the next grace period rather than the
 497                 * current grace period.  Queue the task accordingly.
 498                 * If the task is queued for the current grace period
 499                 * (i.e., this CPU has not yet passed through a quiescent
 500                 * state for the current grace period), then as long
 501                 * as that task remains queued, the current grace period
 502                 * cannot end.
 503                 */
 504                list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
 505                if (rcu_cpu_blocking_cur_gp())
 506                        rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
 507        }
 508
 509        /*
 510         * Either we were not in an RCU read-side critical section to
 511         * begin with, or we have now recorded that critical section
 512         * globally.  Either way, we can now note a quiescent state
 513         * for this CPU.  Again, if we were in an RCU read-side critical
 514         * section, and if that critical section was blocking the current
 515         * grace period, then the fact that the task has been enqueued
 516         * means that current grace period continues to be blocked.
 517         */
 518        rcu_preempt_cpu_qs();
 519        local_irq_restore(flags);
 520}
 521
 522/*
 523 * Tiny-preemptible RCU implementation for rcu_read_lock().
 524 * Just increment ->rcu_read_lock_nesting, shared state will be updated
 525 * if we block.
 526 */
 527void __rcu_read_lock(void)
 528{
 529        current->rcu_read_lock_nesting++;
 530        barrier();  /* needed if we ever invoke rcu_read_lock in rcutiny.c */
 531}
 532EXPORT_SYMBOL_GPL(__rcu_read_lock);
 533
 534/*
 535 * Handle special cases during rcu_read_unlock(), such as needing to
 536 * notify RCU core processing or task having blocked during the RCU
 537 * read-side critical section.
 538 */
 539static void rcu_read_unlock_special(struct task_struct *t)
 540{
 541        int empty;
 542        int empty_exp;
 543        unsigned long flags;
 544        struct list_head *np;
 545        int special;
 546
 547        /*
 548         * NMI handlers cannot block and cannot safely manipulate state.
 549         * They therefore cannot possibly be special, so just leave.
 550         */
 551        if (in_nmi())
 552                return;
 553
 554        local_irq_save(flags);
 555
 556        /*
 557         * If RCU core is waiting for this CPU to exit critical section,
 558         * let it know that we have done so.
 559         */
 560        special = t->rcu_read_unlock_special;
 561        if (special & RCU_READ_UNLOCK_NEED_QS)
 562                rcu_preempt_cpu_qs();
 563
 564        /* Hardware IRQ handlers cannot block. */
 565        if (in_irq()) {
 566                local_irq_restore(flags);
 567                return;
 568        }
 569
 570        /* Clean up if blocked during RCU read-side critical section. */
 571        if (special & RCU_READ_UNLOCK_BLOCKED) {
 572                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 573
 574                /*
 575                 * Remove this task from the ->blkd_tasks list and adjust
 576                 * any pointers that might have been referencing it.
 577                 */
 578                empty = !rcu_preempt_blocked_readers_cgp();
 579                empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
 580                np = rcu_next_node_entry(t);
 581                list_del(&t->rcu_node_entry);
 582                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
 583                        rcu_preempt_ctrlblk.gp_tasks = np;
 584                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
 585                        rcu_preempt_ctrlblk.exp_tasks = np;
 586#ifdef CONFIG_RCU_BOOST
 587                if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
 588                        rcu_preempt_ctrlblk.boost_tasks = np;
 589#endif /* #ifdef CONFIG_RCU_BOOST */
 590                INIT_LIST_HEAD(&t->rcu_node_entry);
 591
 592                /*
 593                 * If this was the last task on the current list, and if
 594                 * we aren't waiting on the CPU, report the quiescent state
 595                 * and start a new grace period if needed.
 596                 */
 597                if (!empty && !rcu_preempt_blocked_readers_cgp()) {
 598                        rcu_preempt_cpu_qs();
 599                        rcu_preempt_start_gp();
 600                }
 601
 602                /*
 603                 * If this was the last task on the expedited lists,
 604                 * then we need wake up the waiting task.
 605                 */
 606                if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
 607                        rcu_report_exp_done();
 608        }
 609#ifdef CONFIG_RCU_BOOST
 610        /* Unboost self if was boosted. */
 611        if (special & RCU_READ_UNLOCK_BOOSTED) {
 612                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
 613                rt_mutex_unlock(t->rcu_boost_mutex);
 614                t->rcu_boost_mutex = NULL;
 615        }
 616#endif /* #ifdef CONFIG_RCU_BOOST */
 617        local_irq_restore(flags);
 618}
 619
 620/*
 621 * Tiny-preemptible RCU implementation for rcu_read_unlock().
 622 * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
 623 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
 624 * invoke rcu_read_unlock_special() to clean up after a context switch
 625 * in an RCU read-side critical section and other special cases.
 626 */
 627void __rcu_read_unlock(void)
 628{
 629        struct task_struct *t = current;
 630
 631        barrier();  /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
 632        --t->rcu_read_lock_nesting;
 633        barrier();  /* decrement before load of ->rcu_read_unlock_special */
 634        if (t->rcu_read_lock_nesting == 0 &&
 635            unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
 636                rcu_read_unlock_special(t);
 637#ifdef CONFIG_PROVE_LOCKING
 638        WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
 639#endif /* #ifdef CONFIG_PROVE_LOCKING */
 640}
 641EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 642
 643/*
 644 * Check for a quiescent state from the current CPU.  When a task blocks,
 645 * the task is recorded in the rcu_preempt_ctrlblk structure, which is
 646 * checked elsewhere.  This is called from the scheduling-clock interrupt.
 647 *
 648 * Caller must disable hard irqs.
 649 */
 650static void rcu_preempt_check_callbacks(void)
 651{
 652        struct task_struct *t = current;
 653
 654        if (rcu_preempt_gp_in_progress() &&
 655            (!rcu_preempt_running_reader() ||
 656             !rcu_cpu_blocking_cur_gp()))
 657                rcu_preempt_cpu_qs();
 658        if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
 659            rcu_preempt_ctrlblk.rcb.donetail)
 660                invoke_rcu_kthread();
 661        if (rcu_preempt_gp_in_progress() &&
 662            rcu_cpu_blocking_cur_gp() &&
 663            rcu_preempt_running_reader())
 664                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 665}
 666
 667/*
 668 * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
 669 * update, so this is invoked from rcu_process_callbacks() to
 670 * handle that case.  Of course, it is invoked for all flavors of
 671 * RCU, but RCU callbacks can appear only on one of the lists, and
 672 * neither ->nexttail nor ->donetail can possibly be NULL, so there
 673 * is no need for an explicit check.
 674 */
 675static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
 676{
 677        if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
 678                rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
 679}
 680
 681/*
 682 * Process callbacks for preemptible RCU.
 683 */
 684static void rcu_preempt_process_callbacks(void)
 685{
 686        rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
 687}
 688
 689/*
 690 * Queue a preemptible -RCU callback for invocation after a grace period.
 691 */
 692void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 693{
 694        unsigned long flags;
 695
 696        debug_rcu_head_queue(head);
 697        head->func = func;
 698        head->next = NULL;
 699
 700        local_irq_save(flags);
 701        *rcu_preempt_ctrlblk.nexttail = head;
 702        rcu_preempt_ctrlblk.nexttail = &head->next;
 703        RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++);
 704        rcu_preempt_start_gp();  /* checks to see if GP needed. */
 705        local_irq_restore(flags);
 706}
 707EXPORT_SYMBOL_GPL(call_rcu);
 708
 709void rcu_barrier(void)
 710{
 711        struct rcu_synchronize rcu;
 712
 713        init_rcu_head_on_stack(&rcu.head);
 714        init_completion(&rcu.completion);
 715        /* Will wake me after RCU finished. */
 716        call_rcu(&rcu.head, wakeme_after_rcu);
 717        /* Wait for it. */
 718        wait_for_completion(&rcu.completion);
 719        destroy_rcu_head_on_stack(&rcu.head);
 720}
 721EXPORT_SYMBOL_GPL(rcu_barrier);
 722
 723/*
 724 * synchronize_rcu - wait until a grace period has elapsed.
 725 *
 726 * Control will return to the caller some time after a full grace
 727 * period has elapsed, in other words after all currently executing RCU
 728 * read-side critical sections have completed.  RCU read-side critical
 729 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 730 * and may be nested.
 731 */
 732void synchronize_rcu(void)
 733{
 734#ifdef CONFIG_DEBUG_LOCK_ALLOC
 735        if (!rcu_scheduler_active)
 736                return;
 737#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 738
 739        WARN_ON_ONCE(rcu_preempt_running_reader());
 740        if (!rcu_preempt_blocked_readers_any())
 741                return;
 742
 743        /* Once we get past the fastpath checks, same code as rcu_barrier(). */
 744        rcu_barrier();
 745}
 746EXPORT_SYMBOL_GPL(synchronize_rcu);
 747
 748static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
 749static unsigned long sync_rcu_preempt_exp_count;
 750static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
 751
 752/*
 753 * Return non-zero if there are any tasks in RCU read-side critical
 754 * sections blocking the current preemptible-RCU expedited grace period.
 755 * If there is no preemptible-RCU expedited grace period currently in
 756 * progress, returns zero unconditionally.
 757 */
 758static int rcu_preempted_readers_exp(void)
 759{
 760        return rcu_preempt_ctrlblk.exp_tasks != NULL;
 761}
 762
 763/*
 764 * Report the exit from RCU read-side critical section for the last task
 765 * that queued itself during or before the current expedited preemptible-RCU
 766 * grace period.
 767 */
 768static void rcu_report_exp_done(void)
 769{
 770        wake_up(&sync_rcu_preempt_exp_wq);
 771}
 772
 773/*
 774 * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
 775 * is to rely in the fact that there is but one CPU, and that it is
 776 * illegal for a task to invoke synchronize_rcu_expedited() while in a
 777 * preemptible-RCU read-side critical section.  Therefore, any such
 778 * critical sections must correspond to blocked tasks, which must therefore
 779 * be on the ->blkd_tasks list.  So just record the current head of the
 780 * list in the ->exp_tasks pointer, and wait for all tasks including and
 781 * after the task pointed to by ->exp_tasks to drain.
 782 */
 783void synchronize_rcu_expedited(void)
 784{
 785        unsigned long flags;
 786        struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
 787        unsigned long snap;
 788
 789        barrier(); /* ensure prior action seen before grace period. */
 790
 791        WARN_ON_ONCE(rcu_preempt_running_reader());
 792
 793        /*
 794         * Acquire lock so that there is only one preemptible RCU grace
 795         * period in flight.  Of course, if someone does the expedited
 796         * grace period for us while we are acquiring the lock, just leave.
 797         */
 798        snap = sync_rcu_preempt_exp_count + 1;
 799        mutex_lock(&sync_rcu_preempt_exp_mutex);
 800        if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
 801                goto unlock_mb_ret; /* Others did our work for us. */
 802
 803        local_irq_save(flags);
 804
 805        /*
 806         * All RCU readers have to already be on blkd_tasks because
 807         * we cannot legally be executing in an RCU read-side critical
 808         * section.
 809         */
 810
 811        /* Snapshot current head of ->blkd_tasks list. */
 812        rpcp->exp_tasks = rpcp->blkd_tasks.next;
 813        if (rpcp->exp_tasks == &rpcp->blkd_tasks)
 814                rpcp->exp_tasks = NULL;
 815        local_irq_restore(flags);
 816
 817        /* Wait for tail of ->blkd_tasks list to drain. */
 818        if (rcu_preempted_readers_exp())
 819                rcu_initiate_expedited_boost();
 820                wait_event(sync_rcu_preempt_exp_wq,
 821                           !rcu_preempted_readers_exp());
 822
 823        /* Clean up and exit. */
 824        barrier(); /* ensure expedited GP seen before counter increment. */
 825        sync_rcu_preempt_exp_count++;
 826unlock_mb_ret:
 827        mutex_unlock(&sync_rcu_preempt_exp_mutex);
 828        barrier(); /* ensure subsequent action seen after grace period. */
 829}
 830EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 831
 832/*
 833 * Does preemptible RCU need the CPU to stay out of dynticks mode?
 834 */
 835int rcu_preempt_needs_cpu(void)
 836{
 837        if (!rcu_preempt_running_reader())
 838                rcu_preempt_cpu_qs();
 839        return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
 840}
 841
 842/*
 843 * Check for a task exiting while in a preemptible -RCU read-side
 844 * critical section, clean up if so.  No need to issue warnings,
 845 * as debug_check_no_locks_held() already does this if lockdep
 846 * is enabled.
 847 */
 848void exit_rcu(void)
 849{
 850        struct task_struct *t = current;
 851
 852        if (t->rcu_read_lock_nesting == 0)
 853                return;
 854        t->rcu_read_lock_nesting = 1;
 855        rcu_read_unlock();
 856}
 857
 858#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
 859
 860#ifdef CONFIG_RCU_TRACE
 861
 862/*
 863 * Because preemptible RCU does not exist, it is not necessary to
 864 * dump out its statistics.
 865 */
 866static void show_tiny_preempt_stats(struct seq_file *m)
 867{
 868}
 869
 870#endif /* #ifdef CONFIG_RCU_TRACE */
 871
 872/*
 873 * Because preemptible RCU does not exist, it is never necessary to
 874 * boost preempted RCU readers.
 875 */
 876static int rcu_boost(void)
 877{
 878        return 0;
 879}
 880
 881/*
 882 * Because preemptible RCU does not exist, it never has any callbacks
 883 * to check.
 884 */
 885static void rcu_preempt_check_callbacks(void)
 886{
 887}
 888
 889/*
 890 * Because preemptible RCU does not exist, it never has any callbacks
 891 * to remove.
 892 */
 893static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
 894{
 895}
 896
 897/*
 898 * Because preemptible RCU does not exist, it never has any callbacks
 899 * to process.
 900 */
 901static void rcu_preempt_process_callbacks(void)
 902{
 903}
 904
 905#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
 906
 907#ifdef CONFIG_DEBUG_LOCK_ALLOC
 908#include <linux/kernel_stat.h>
 909
 910/*
 911 * During boot, we forgive RCU lockdep issues.  After this function is
 912 * invoked, we start taking RCU lockdep issues seriously.
 913 */
 914void __init rcu_scheduler_starting(void)
 915{
 916        WARN_ON(nr_context_switches() > 0);
 917        rcu_scheduler_active = 1;
 918}
 919
 920#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 921
 922#ifdef CONFIG_RCU_BOOST
 923#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
 924#else /* #ifdef CONFIG_RCU_BOOST */
 925#define RCU_BOOST_PRIO 1
 926#endif /* #else #ifdef CONFIG_RCU_BOOST */
 927
 928#ifdef CONFIG_RCU_TRACE
 929
 930#ifdef CONFIG_RCU_BOOST
 931
 932static void rcu_initiate_boost_trace(void)
 933{
 934        if (rcu_preempt_ctrlblk.gp_tasks == NULL)
 935                rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++;
 936        else if (rcu_preempt_ctrlblk.boost_tasks != NULL)
 937                rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++;
 938        else if (rcu_preempt_ctrlblk.boosted_this_gp != 0)
 939                rcu_preempt_ctrlblk.n_normal_balk_boosted++;
 940        else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time))
 941                rcu_preempt_ctrlblk.n_normal_balk_notyet++;
 942        else
 943                rcu_preempt_ctrlblk.n_normal_balk_nos++;
 944}
 945
 946static void rcu_initiate_exp_boost_trace(void)
 947{
 948        if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks))
 949                rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++;
 950        else
 951                rcu_preempt_ctrlblk.n_exp_balk_nos++;
 952}
 953
 954#endif /* #ifdef CONFIG_RCU_BOOST */
 955
 956static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
 957{
 958        unsigned long flags;
 959
 960        raw_local_irq_save(flags);
 961        rcp->qlen -= n;
 962        raw_local_irq_restore(flags);
 963}
 964
 965/*
 966 * Dump statistics for TINY_RCU, such as they are.
 967 */
 968static int show_tiny_stats(struct seq_file *m, void *unused)
 969{
 970        show_tiny_preempt_stats(m);
 971        seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
 972        seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
 973        return 0;
 974}
 975
 976static int show_tiny_stats_open(struct inode *inode, struct file *file)
 977{
 978        return single_open(file, show_tiny_stats, NULL);
 979}
 980
 981static const struct file_operations show_tiny_stats_fops = {
 982        .owner = THIS_MODULE,
 983        .open = show_tiny_stats_open,
 984        .read = seq_read,
 985        .llseek = seq_lseek,
 986        .release = single_release,
 987};
 988
 989static struct dentry *rcudir;
 990
 991static int __init rcutiny_trace_init(void)
 992{
 993        struct dentry *retval;
 994
 995        rcudir = debugfs_create_dir("rcu", NULL);
 996        if (!rcudir)
 997                goto free_out;
 998        retval = debugfs_create_file("rcudata", 0444, rcudir,
 999                                     NULL, &show_tiny_stats_fops);
1000        if (!retval)
1001                goto free_out;
1002        return 0;
1003free_out:
1004        debugfs_remove_recursive(rcudir);
1005        return 1;
1006}
1007
1008static void __exit rcutiny_trace_cleanup(void)
1009{
1010        debugfs_remove_recursive(rcudir);
1011}
1012
1013module_init(rcutiny_trace_init);
1014module_exit(rcutiny_trace_cleanup);
1015
1016MODULE_AUTHOR("Paul E. McKenney");
1017MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
1018MODULE_LICENSE("GPL");
1019
1020#endif /* #ifdef CONFIG_RCU_TRACE */
1021