1
2
3
4
5
6
7
8
9
10
11
12
13
14#include "../locking/rtmutex_common.h"
15
16static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
17{
18
19
20
21
22
23
24
25
26
27 RCU_LOCKDEP_WARN(
28 !(lockdep_is_held(&rcu_state.barrier_mutex) ||
29 (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
30 rcu_lockdep_is_held_nocb(rdp) ||
31 (rdp == this_cpu_ptr(&rcu_data) &&
32 !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) ||
33 rcu_current_is_nocb_kthread(rdp)),
34 "Unsafe read of RCU_NOCB offloaded state"
35 );
36
37 return rcu_segcblist_is_offloaded(&rdp->cblist);
38}
39
40
41
42
43
44static void __init rcu_bootup_announce_oddness(void)
45{
46 if (IS_ENABLED(CONFIG_RCU_TRACE))
47 pr_info("\tRCU event tracing is enabled.\n");
48 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
49 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
50 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
51 RCU_FANOUT);
52 if (rcu_fanout_exact)
53 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
54 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
55 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
56 if (IS_ENABLED(CONFIG_PROVE_RCU))
57 pr_info("\tRCU lockdep checking is enabled.\n");
58 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
59 pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
60 if (RCU_NUM_LVLS >= 4)
61 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
62 if (RCU_FANOUT_LEAF != 16)
63 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
64 RCU_FANOUT_LEAF);
65 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
66 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
67 rcu_fanout_leaf);
68 if (nr_cpu_ids != NR_CPUS)
69 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids);
70#ifdef CONFIG_RCU_BOOST
71 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n",
72 kthread_prio, CONFIG_RCU_BOOST_DELAY);
73#endif
74 if (blimit != DEFAULT_RCU_BLIMIT)
75 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
76 if (qhimark != DEFAULT_RCU_QHIMARK)
77 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
78 if (qlowmark != DEFAULT_RCU_QLOMARK)
79 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
80 if (qovld != DEFAULT_RCU_QOVLD)
81 pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
82 if (jiffies_till_first_fqs != ULONG_MAX)
83 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
84 if (jiffies_till_next_fqs != ULONG_MAX)
85 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
86 if (jiffies_till_sched_qs != ULONG_MAX)
87 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
88 if (rcu_kick_kthreads)
89 pr_info("\tKick kthreads if too-long grace period.\n");
90 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
91 pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
92 if (gp_preinit_delay)
93 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
94 if (gp_init_delay)
95 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
96 if (gp_cleanup_delay)
97 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
98 if (!use_softirq)
99 pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
100 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
101 pr_info("\tRCU debug extended QS entry/exit.\n");
102 rcupdate_announce_bootup_oddness();
103}
104
105#ifdef CONFIG_PREEMPT_RCU
106
107static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
108static void rcu_read_unlock_special(struct task_struct *t);
109
110
111
112
113static void __init rcu_bootup_announce(void)
114{
115 pr_info("Preemptible hierarchical RCU implementation.\n");
116 rcu_bootup_announce_oddness();
117}
118
119
120#define RCU_GP_TASKS 0x8
121#define RCU_EXP_TASKS 0x4
122#define RCU_GP_BLKD 0x2
123#define RCU_EXP_BLKD 0x1
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
154 __releases(rnp->lock)
155{
156 int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
157 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
158 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
159 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
160 struct task_struct *t = current;
161
162 raw_lockdep_assert_held_rcu_node(rnp);
163 WARN_ON_ONCE(rdp->mynode != rnp);
164 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
165
166 WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
167 rdp->grpmask);
168
169
170
171
172
173
174 switch (blkd_state) {
175 case 0:
176 case RCU_EXP_TASKS:
177 case RCU_EXP_TASKS + RCU_GP_BLKD:
178 case RCU_GP_TASKS:
179 case RCU_GP_TASKS + RCU_EXP_TASKS:
180
181
182
183
184
185
186
187 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
188 break;
189
190 case RCU_EXP_BLKD:
191 case RCU_GP_BLKD:
192 case RCU_GP_BLKD + RCU_EXP_BLKD:
193 case RCU_GP_TASKS + RCU_EXP_BLKD:
194 case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
195 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
196
197
198
199
200
201
202
203
204
205 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
206 break;
207
208 case RCU_EXP_TASKS + RCU_EXP_BLKD:
209 case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
210 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD:
211
212
213
214
215
216
217
218 list_add(&t->rcu_node_entry, rnp->exp_tasks);
219 break;
220
221 case RCU_GP_TASKS + RCU_GP_BLKD:
222 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
223
224
225
226
227
228
229 list_add(&t->rcu_node_entry, rnp->gp_tasks);
230 break;
231
232 default:
233
234
235 WARN_ON_ONCE(1);
236 break;
237 }
238
239
240
241
242
243
244
245 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
246 WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
247 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
248 }
249 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
250 WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
251 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
252 !(rnp->qsmask & rdp->grpmask));
253 WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
254 !(rnp->expmask & rdp->grpmask));
255 raw_spin_unlock_rcu_node(rnp);
256
257
258
259
260
261
262
263 if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
264 rcu_report_exp_rdp(rdp);
265 else
266 WARN_ON_ONCE(rdp->exp_deferred_qs);
267}
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282static void rcu_qs(void)
283{
284 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
285 if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
286 trace_rcu_grace_period(TPS("rcu_preempt"),
287 __this_cpu_read(rcu_data.gp_seq),
288 TPS("cpuqs"));
289 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
290 barrier();
291 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
292 }
293}
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308void rcu_note_context_switch(bool preempt)
309{
310 struct task_struct *t = current;
311 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
312 struct rcu_node *rnp;
313
314 trace_rcu_utilization(TPS("Start context switch"));
315 lockdep_assert_irqs_disabled();
316 WARN_ONCE(!preempt && rcu_preempt_depth() > 0, "Voluntary context switch within RCU read-side critical section!");
317 if (rcu_preempt_depth() > 0 &&
318 !t->rcu_read_unlock_special.b.blocked) {
319
320
321 rnp = rdp->mynode;
322 raw_spin_lock_rcu_node(rnp);
323 t->rcu_read_unlock_special.b.blocked = true;
324 t->rcu_blocked_node = rnp;
325
326
327
328
329
330
331 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
332 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
333 trace_rcu_preempt_task(rcu_state.name,
334 t->pid,
335 (rnp->qsmask & rdp->grpmask)
336 ? rnp->gp_seq
337 : rcu_seq_snap(&rnp->gp_seq));
338 rcu_preempt_ctxt_queue(rnp, rdp);
339 } else {
340 rcu_preempt_deferred_qs(t);
341 }
342
343
344
345
346
347
348
349
350
351
352 rcu_qs();
353 if (rdp->exp_deferred_qs)
354 rcu_report_exp_rdp(rdp);
355 rcu_tasks_qs(current, preempt);
356 trace_rcu_utilization(TPS("End context switch"));
357}
358EXPORT_SYMBOL_GPL(rcu_note_context_switch);
359
360
361
362
363
364
365static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
366{
367 return READ_ONCE(rnp->gp_tasks) != NULL;
368}
369
370
371#define RCU_NEST_PMAX (INT_MAX / 2)
372
373static void rcu_preempt_read_enter(void)
374{
375 WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
376}
377
378static int rcu_preempt_read_exit(void)
379{
380 int ret = READ_ONCE(current->rcu_read_lock_nesting) - 1;
381
382 WRITE_ONCE(current->rcu_read_lock_nesting, ret);
383 return ret;
384}
385
386static void rcu_preempt_depth_set(int val)
387{
388 WRITE_ONCE(current->rcu_read_lock_nesting, val);
389}
390
391
392
393
394
395
396void __rcu_read_lock(void)
397{
398 rcu_preempt_read_enter();
399 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
400 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
401 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
402 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
403 barrier();
404}
405EXPORT_SYMBOL_GPL(__rcu_read_lock);
406
407
408
409
410
411
412
413
414void __rcu_read_unlock(void)
415{
416 struct task_struct *t = current;
417
418 barrier();
419 if (rcu_preempt_read_exit() == 0) {
420 barrier();
421 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
422 rcu_read_unlock_special(t);
423 }
424 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
425 int rrln = rcu_preempt_depth();
426
427 WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
428 }
429}
430EXPORT_SYMBOL_GPL(__rcu_read_unlock);
431
432
433
434
435
436static struct list_head *rcu_next_node_entry(struct task_struct *t,
437 struct rcu_node *rnp)
438{
439 struct list_head *np;
440
441 np = t->rcu_node_entry.next;
442 if (np == &rnp->blkd_tasks)
443 np = NULL;
444 return np;
445}
446
447
448
449
450
451static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
452{
453 return !list_empty(&rnp->blkd_tasks);
454}
455
456
457
458
459
460
461static void
462rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
463{
464 bool empty_exp;
465 bool empty_norm;
466 bool empty_exp_now;
467 struct list_head *np;
468 bool drop_boost_mutex = false;
469 struct rcu_data *rdp;
470 struct rcu_node *rnp;
471 union rcu_special special;
472
473
474
475
476
477
478 special = t->rcu_read_unlock_special;
479 rdp = this_cpu_ptr(&rcu_data);
480 if (!special.s && !rdp->exp_deferred_qs) {
481 local_irq_restore(flags);
482 return;
483 }
484 t->rcu_read_unlock_special.s = 0;
485 if (special.b.need_qs) {
486 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
487 rcu_report_qs_rdp(rdp);
488 udelay(rcu_unlock_delay);
489 } else {
490 rcu_qs();
491 }
492 }
493
494
495
496
497
498
499
500 if (rdp->exp_deferred_qs)
501 rcu_report_exp_rdp(rdp);
502
503
504 if (special.b.blocked) {
505
506
507
508
509
510
511
512 rnp = t->rcu_blocked_node;
513 raw_spin_lock_rcu_node(rnp);
514 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
515 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
516 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
517 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
518 (!empty_norm || rnp->qsmask));
519 empty_exp = sync_rcu_exp_done(rnp);
520 smp_mb();
521 np = rcu_next_node_entry(t, rnp);
522 list_del_init(&t->rcu_node_entry);
523 t->rcu_blocked_node = NULL;
524 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
525 rnp->gp_seq, t->pid);
526 if (&t->rcu_node_entry == rnp->gp_tasks)
527 WRITE_ONCE(rnp->gp_tasks, np);
528 if (&t->rcu_node_entry == rnp->exp_tasks)
529 WRITE_ONCE(rnp->exp_tasks, np);
530 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
531
532 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
533 if (&t->rcu_node_entry == rnp->boost_tasks)
534 WRITE_ONCE(rnp->boost_tasks, np);
535 }
536
537
538
539
540
541
542
543 empty_exp_now = sync_rcu_exp_done(rnp);
544 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
545 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
546 rnp->gp_seq,
547 0, rnp->qsmask,
548 rnp->level,
549 rnp->grplo,
550 rnp->grphi,
551 !!rnp->gp_tasks);
552 rcu_report_unblock_qs_rnp(rnp, flags);
553 } else {
554 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
555 }
556
557
558 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
559 rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex);
560
561
562
563
564
565 if (!empty_exp && empty_exp_now)
566 rcu_report_exp_rnp(rnp, true);
567 } else {
568 local_irq_restore(flags);
569 }
570}
571
572
573
574
575
576
577
578
579
580
581static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
582{
583 return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
584 READ_ONCE(t->rcu_read_unlock_special.s)) &&
585 rcu_preempt_depth() == 0;
586}
587
588
589
590
591
592
593
594
595static void rcu_preempt_deferred_qs(struct task_struct *t)
596{
597 unsigned long flags;
598
599 if (!rcu_preempt_need_deferred_qs(t))
600 return;
601 local_irq_save(flags);
602 rcu_preempt_deferred_qs_irqrestore(t, flags);
603}
604
605
606
607
608static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
609{
610 struct rcu_data *rdp;
611
612 rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
613 rdp->defer_qs_iw_pending = false;
614}
615
616
617
618
619
620
621static void rcu_read_unlock_special(struct task_struct *t)
622{
623 unsigned long flags;
624 bool irqs_were_disabled;
625 bool preempt_bh_were_disabled =
626 !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
627
628
629 if (in_nmi())
630 return;
631
632 local_irq_save(flags);
633 irqs_were_disabled = irqs_disabled_flags(flags);
634 if (preempt_bh_were_disabled || irqs_were_disabled) {
635 bool expboost;
636 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
637 struct rcu_node *rnp = rdp->mynode;
638
639 expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
640 (rdp->grpmask & READ_ONCE(rnp->expmask)) ||
641 IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
642 (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
643 t->rcu_blocked_node);
644
645 if (use_softirq && (in_irq() || (expboost && !irqs_were_disabled))) {
646
647
648
649 raise_softirq_irqoff(RCU_SOFTIRQ);
650 } else {
651
652
653
654
655 set_tsk_need_resched(current);
656 set_preempt_need_resched();
657 if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
658 expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
659
660
661 init_irq_work(&rdp->defer_qs_iw, rcu_preempt_deferred_qs_handler);
662 rdp->defer_qs_iw_pending = true;
663 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
664 }
665 }
666 local_irq_restore(flags);
667 return;
668 }
669 rcu_preempt_deferred_qs_irqrestore(t, flags);
670}
671
672
673
674
675
676
677
678
679
680
681static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
682{
683 struct task_struct *t;
684
685 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
686 raw_lockdep_assert_held_rcu_node(rnp);
687 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
688 dump_blkd_tasks(rnp, 10);
689 if (rcu_preempt_has_tasks(rnp) &&
690 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
691 WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
692 t = container_of(rnp->gp_tasks, struct task_struct,
693 rcu_node_entry);
694 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
695 rnp->gp_seq, t->pid);
696 }
697 WARN_ON_ONCE(rnp->qsmask);
698}
699
700
701
702
703
704
705
706
707static void rcu_flavor_sched_clock_irq(int user)
708{
709 struct task_struct *t = current;
710
711 lockdep_assert_irqs_disabled();
712 if (user || rcu_is_cpu_rrupt_from_idle()) {
713 rcu_note_voluntary_context_switch(current);
714 }
715 if (rcu_preempt_depth() > 0 ||
716 (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
717
718 if (rcu_preempt_need_deferred_qs(t)) {
719 set_tsk_need_resched(t);
720 set_preempt_need_resched();
721 }
722 } else if (rcu_preempt_need_deferred_qs(t)) {
723 rcu_preempt_deferred_qs(t);
724 return;
725 } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
726 rcu_qs();
727 return;
728 }
729
730
731 if (rcu_preempt_depth() > 0 &&
732 __this_cpu_read(rcu_data.core_needs_qs) &&
733 __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
734 !t->rcu_read_unlock_special.b.need_qs &&
735 time_after(jiffies, rcu_state.gp_start + HZ))
736 t->rcu_read_unlock_special.b.need_qs = true;
737}
738
739
740
741
742
743
744
745
746
747void exit_rcu(void)
748{
749 struct task_struct *t = current;
750
751 if (unlikely(!list_empty(¤t->rcu_node_entry))) {
752 rcu_preempt_depth_set(1);
753 barrier();
754 WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
755 } else if (unlikely(rcu_preempt_depth())) {
756 rcu_preempt_depth_set(1);
757 } else {
758 return;
759 }
760 __rcu_read_unlock();
761 rcu_preempt_deferred_qs(current);
762}
763
764
765
766
767
768static void
769dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
770{
771 int cpu;
772 int i;
773 struct list_head *lhp;
774 bool onl;
775 struct rcu_data *rdp;
776 struct rcu_node *rnp1;
777
778 raw_lockdep_assert_held_rcu_node(rnp);
779 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
780 __func__, rnp->grplo, rnp->grphi, rnp->level,
781 (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
782 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
783 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
784 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
785 pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
786 __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
787 READ_ONCE(rnp->exp_tasks));
788 pr_info("%s: ->blkd_tasks", __func__);
789 i = 0;
790 list_for_each(lhp, &rnp->blkd_tasks) {
791 pr_cont(" %p", lhp);
792 if (++i >= ncheck)
793 break;
794 }
795 pr_cont("\n");
796 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
797 rdp = per_cpu_ptr(&rcu_data, cpu);
798 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
799 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
800 cpu, ".o"[onl],
801 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
802 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
803 }
804}
805
806#else
807
808
809
810
811
812
813void rcu_read_unlock_strict(void)
814{
815 struct rcu_data *rdp;
816
817 if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
818 irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
819 return;
820 rdp = this_cpu_ptr(&rcu_data);
821 rcu_report_qs_rdp(rdp);
822 udelay(rcu_unlock_delay);
823}
824EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
825
826
827
828
829static void __init rcu_bootup_announce(void)
830{
831 pr_info("Hierarchical RCU implementation.\n");
832 rcu_bootup_announce_oddness();
833}
834
835
836
837
838
839
840
841static void rcu_qs(void)
842{
843 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
844 if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
845 return;
846 trace_rcu_grace_period(TPS("rcu_sched"),
847 __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
848 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
849 if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
850 return;
851 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
852 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
853}
854
855
856
857
858
859
860
861
862void rcu_all_qs(void)
863{
864 unsigned long flags;
865
866 if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
867 return;
868 preempt_disable();
869
870 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
871 preempt_enable();
872 return;
873 }
874 this_cpu_write(rcu_data.rcu_urgent_qs, false);
875 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
876 local_irq_save(flags);
877 rcu_momentary_dyntick_idle();
878 local_irq_restore(flags);
879 }
880 rcu_qs();
881 preempt_enable();
882}
883EXPORT_SYMBOL_GPL(rcu_all_qs);
884
885
886
887
888void rcu_note_context_switch(bool preempt)
889{
890 trace_rcu_utilization(TPS("Start context switch"));
891 rcu_qs();
892
893 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
894 goto out;
895 this_cpu_write(rcu_data.rcu_urgent_qs, false);
896 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
897 rcu_momentary_dyntick_idle();
898 rcu_tasks_qs(current, preempt);
899out:
900 trace_rcu_utilization(TPS("End context switch"));
901}
902EXPORT_SYMBOL_GPL(rcu_note_context_switch);
903
904
905
906
907
908static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
909{
910 return 0;
911}
912
913
914
915
916static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
917{
918 return false;
919}
920
921
922
923
924
925static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
926{
927 return false;
928}
929static void rcu_preempt_deferred_qs(struct task_struct *t) { }
930
931
932
933
934
935
936static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
937{
938 WARN_ON_ONCE(rnp->qsmask);
939}
940
941
942
943
944
945static void rcu_flavor_sched_clock_irq(int user)
946{
947 if (user || rcu_is_cpu_rrupt_from_idle()) {
948
949
950
951
952
953
954
955
956
957
958
959
960
961 rcu_qs();
962 }
963}
964
965
966
967
968
969void exit_rcu(void)
970{
971}
972
973
974
975
976static void
977dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
978{
979 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
980}
981
982#endif
983
984
985
986
987static void rcu_cpu_kthread_setup(unsigned int cpu)
988{
989#ifdef CONFIG_RCU_BOOST
990 struct sched_param sp;
991
992 sp.sched_priority = kthread_prio;
993 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
994#endif
995}
996
997#ifdef CONFIG_RCU_BOOST
998
999
1000
1001
1002
1003
1004
1005
1006
1007static int rcu_boost(struct rcu_node *rnp)
1008{
1009 unsigned long flags;
1010 struct task_struct *t;
1011 struct list_head *tb;
1012
1013 if (READ_ONCE(rnp->exp_tasks) == NULL &&
1014 READ_ONCE(rnp->boost_tasks) == NULL)
1015 return 0;
1016
1017 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1018
1019
1020
1021
1022
1023 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1024 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1025 return 0;
1026 }
1027
1028
1029
1030
1031
1032
1033
1034 if (rnp->exp_tasks != NULL)
1035 tb = rnp->exp_tasks;
1036 else
1037 tb = rnp->boost_tasks;
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055 t = container_of(tb, struct task_struct, rcu_node_entry);
1056 rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
1057 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1058
1059 rt_mutex_lock(&rnp->boost_mtx);
1060 rt_mutex_unlock(&rnp->boost_mtx);
1061 rnp->n_boosts++;
1062
1063 return READ_ONCE(rnp->exp_tasks) != NULL ||
1064 READ_ONCE(rnp->boost_tasks) != NULL;
1065}
1066
1067
1068
1069
1070static int rcu_boost_kthread(void *arg)
1071{
1072 struct rcu_node *rnp = (struct rcu_node *)arg;
1073 int spincnt = 0;
1074 int more2boost;
1075
1076 trace_rcu_utilization(TPS("Start boost kthread@init"));
1077 for (;;) {
1078 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
1079 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
1080 rcu_wait(READ_ONCE(rnp->boost_tasks) ||
1081 READ_ONCE(rnp->exp_tasks));
1082 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
1083 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
1084 more2boost = rcu_boost(rnp);
1085 if (more2boost)
1086 spincnt++;
1087 else
1088 spincnt = 0;
1089 if (spincnt > 10) {
1090 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
1091 trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
1092 schedule_timeout_idle(2);
1093 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
1094 spincnt = 0;
1095 }
1096 }
1097
1098 trace_rcu_utilization(TPS("End boost kthread@notreached"));
1099 return 0;
1100}
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1113 __releases(rnp->lock)
1114{
1115 raw_lockdep_assert_held_rcu_node(rnp);
1116 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1117 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1118 return;
1119 }
1120 if (rnp->exp_tasks != NULL ||
1121 (rnp->gp_tasks != NULL &&
1122 rnp->boost_tasks == NULL &&
1123 rnp->qsmask == 0 &&
1124 (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
1125 if (rnp->exp_tasks == NULL)
1126 WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
1127 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1128 rcu_wake_cond(rnp->boost_kthread_task,
1129 READ_ONCE(rnp->boost_kthread_status));
1130 } else {
1131 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1132 }
1133}
1134
1135
1136
1137
1138
1139static bool rcu_is_callbacks_kthread(void)
1140{
1141 return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
1142}
1143
1144#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1145
1146
1147
1148
1149static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1150{
1151 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1152}
1153
1154
1155
1156
1157
1158
1159static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1160{
1161 unsigned long flags;
1162 int rnp_index = rnp - rcu_get_root();
1163 struct sched_param sp;
1164 struct task_struct *t;
1165
1166 if (rnp->boost_kthread_task || !rcu_scheduler_fully_active)
1167 return;
1168
1169 rcu_state.boost = 1;
1170
1171 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1172 "rcub/%d", rnp_index);
1173 if (WARN_ON_ONCE(IS_ERR(t)))
1174 return;
1175
1176 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1177 rnp->boost_kthread_task = t;
1178 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1179 sp.sched_priority = kthread_prio;
1180 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1181 wake_up_process(t);
1182}
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1194{
1195 struct task_struct *t = rnp->boost_kthread_task;
1196 unsigned long mask = rcu_rnp_online_cpus(rnp);
1197 cpumask_var_t cm;
1198 int cpu;
1199
1200 if (!t)
1201 return;
1202 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1203 return;
1204 for_each_leaf_node_possible_cpu(rnp, cpu)
1205 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1206 cpu != outgoingcpu)
1207 cpumask_set_cpu(cpu, cm);
1208 if (cpumask_weight(cm) == 0)
1209 cpumask_setall(cm);
1210 set_cpus_allowed_ptr(t, cm);
1211 free_cpumask_var(cm);
1212}
1213
1214
1215
1216
1217static void __init rcu_spawn_boost_kthreads(void)
1218{
1219 struct rcu_node *rnp;
1220
1221 rcu_for_each_leaf_node(rnp)
1222 if (rcu_rnp_online_cpus(rnp))
1223 rcu_spawn_one_boost_kthread(rnp);
1224}
1225
1226#else
1227
1228static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1229 __releases(rnp->lock)
1230{
1231 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1232}
1233
1234static bool rcu_is_callbacks_kthread(void)
1235{
1236 return false;
1237}
1238
1239static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1240{
1241}
1242
1243static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1244{
1245}
1246
1247static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1248{
1249}
1250
1251static void __init rcu_spawn_boost_kthreads(void)
1252{
1253}
1254
1255#endif
1256
1257#if !defined(CONFIG_RCU_FAST_NO_HZ)
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1269{
1270 *nextevt = KTIME_MAX;
1271 return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
1272 !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
1273}
1274
1275
1276
1277
1278
1279static void rcu_cleanup_after_idle(void)
1280{
1281}
1282
1283
1284
1285
1286
1287static void rcu_prepare_for_idle(void)
1288{
1289}
1290
1291#else
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312#define RCU_IDLE_GP_DELAY 4
1313
1314static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1315module_param(rcu_idle_gp_delay, int, 0644);
1316
1317
1318
1319
1320
1321
1322static bool __maybe_unused rcu_try_advance_all_cbs(void)
1323{
1324 bool cbs_ready = false;
1325 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1326 struct rcu_node *rnp;
1327
1328
1329 if (jiffies == rdp->last_advance_all)
1330 return false;
1331 rdp->last_advance_all = jiffies;
1332
1333 rnp = rdp->mynode;
1334
1335
1336
1337
1338
1339
1340 if ((rcu_seq_completed_gp(rdp->gp_seq,
1341 rcu_seq_current(&rnp->gp_seq)) ||
1342 unlikely(READ_ONCE(rdp->gpwrap))) &&
1343 rcu_segcblist_pend_cbs(&rdp->cblist))
1344 note_gp_changes(rdp);
1345
1346 if (rcu_segcblist_ready_cbs(&rdp->cblist))
1347 cbs_ready = true;
1348 return cbs_ready;
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1359{
1360 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1361 unsigned long dj;
1362
1363 lockdep_assert_irqs_disabled();
1364
1365
1366 if (rcu_segcblist_empty(&rdp->cblist) ||
1367 rcu_rdp_is_offloaded(rdp)) {
1368 *nextevt = KTIME_MAX;
1369 return 0;
1370 }
1371
1372
1373 if (rcu_try_advance_all_cbs()) {
1374
1375 invoke_rcu_core();
1376 return 1;
1377 }
1378 rdp->last_accelerate = jiffies;
1379
1380
1381 dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
1382
1383 *nextevt = basemono + dj * TICK_NSEC;
1384 return 0;
1385}
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395static void rcu_prepare_for_idle(void)
1396{
1397 bool needwake;
1398 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1399 struct rcu_node *rnp;
1400 int tne;
1401
1402 lockdep_assert_irqs_disabled();
1403 if (rcu_rdp_is_offloaded(rdp))
1404 return;
1405
1406
1407 tne = READ_ONCE(tick_nohz_active);
1408 if (tne != rdp->tick_nohz_enabled_snap) {
1409 if (!rcu_segcblist_empty(&rdp->cblist))
1410 invoke_rcu_core();
1411 rdp->tick_nohz_enabled_snap = tne;
1412 return;
1413 }
1414 if (!tne)
1415 return;
1416
1417
1418
1419
1420
1421 if (rdp->last_accelerate == jiffies)
1422 return;
1423 rdp->last_accelerate = jiffies;
1424 if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
1425 rnp = rdp->mynode;
1426 raw_spin_lock_rcu_node(rnp);
1427 needwake = rcu_accelerate_cbs(rnp, rdp);
1428 raw_spin_unlock_rcu_node(rnp);
1429 if (needwake)
1430 rcu_gp_kthread_wake();
1431 }
1432}
1433
1434
1435
1436
1437
1438
1439static void rcu_cleanup_after_idle(void)
1440{
1441 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1442
1443 lockdep_assert_irqs_disabled();
1444 if (rcu_rdp_is_offloaded(rdp))
1445 return;
1446 if (rcu_try_advance_all_cbs())
1447 invoke_rcu_core();
1448}
1449
1450#endif
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461static bool rcu_nohz_full_cpu(void)
1462{
1463#ifdef CONFIG_NO_HZ_FULL
1464 if (tick_nohz_full_cpu(smp_processor_id()) &&
1465 (!rcu_gp_in_progress() ||
1466 time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
1467 return true;
1468#endif
1469 return false;
1470}
1471
1472
1473
1474
1475static void rcu_bind_gp_kthread(void)
1476{
1477 if (!tick_nohz_full_enabled())
1478 return;
1479 housekeeping_affine(current, HK_FLAG_RCU);
1480}
1481
1482
1483static void noinstr rcu_dynticks_task_enter(void)
1484{
1485#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
1486 WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
1487#endif
1488}
1489
1490
1491static void noinstr rcu_dynticks_task_exit(void)
1492{
1493#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
1494 WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
1495#endif
1496}
1497
1498
1499static void rcu_dynticks_task_trace_enter(void)
1500{
1501#ifdef CONFIG_TASKS_TRACE_RCU
1502 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
1503 current->trc_reader_special.b.need_mb = true;
1504#endif
1505}
1506
1507
1508static void rcu_dynticks_task_trace_exit(void)
1509{
1510#ifdef CONFIG_TASKS_TRACE_RCU
1511 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
1512 current->trc_reader_special.b.need_mb = false;
1513#endif
1514}
1515