1
2
3
4
5
6
7
8
9
10
11
12
13
14#include "../locking/rtmutex_common.h"
15
16#ifdef CONFIG_RCU_NOCB_CPU
17static cpumask_var_t rcu_nocb_mask;
18static bool __read_mostly rcu_nocb_poll;
19#endif
20
21
22
23
24
25static void __init rcu_bootup_announce_oddness(void)
26{
27 if (IS_ENABLED(CONFIG_RCU_TRACE))
28 pr_info("\tRCU event tracing is enabled.\n");
29 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
30 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
31 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d.\n",
32 RCU_FANOUT);
33 if (rcu_fanout_exact)
34 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
35 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
36 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
37 if (IS_ENABLED(CONFIG_PROVE_RCU))
38 pr_info("\tRCU lockdep checking is enabled.\n");
39 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
40 pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
41 if (RCU_NUM_LVLS >= 4)
42 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
43 if (RCU_FANOUT_LEAF != 16)
44 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
45 RCU_FANOUT_LEAF);
46 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
47 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n",
48 rcu_fanout_leaf);
49 if (nr_cpu_ids != NR_CPUS)
50 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids);
51#ifdef CONFIG_RCU_BOOST
52 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n",
53 kthread_prio, CONFIG_RCU_BOOST_DELAY);
54#endif
55 if (blimit != DEFAULT_RCU_BLIMIT)
56 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
57 if (qhimark != DEFAULT_RCU_QHIMARK)
58 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
59 if (qlowmark != DEFAULT_RCU_QLOMARK)
60 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
61 if (qovld != DEFAULT_RCU_QOVLD)
62 pr_info("\tBoot-time adjustment of callback overload level to %ld.\n", qovld);
63 if (jiffies_till_first_fqs != ULONG_MAX)
64 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
65 if (jiffies_till_next_fqs != ULONG_MAX)
66 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
67 if (jiffies_till_sched_qs != ULONG_MAX)
68 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
69 if (rcu_kick_kthreads)
70 pr_info("\tKick kthreads if too-long grace period.\n");
71 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
72 pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
73 if (gp_preinit_delay)
74 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
75 if (gp_init_delay)
76 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
77 if (gp_cleanup_delay)
78 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
79 if (!use_softirq)
80 pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
81 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
82 pr_info("\tRCU debug extended QS entry/exit.\n");
83 rcupdate_announce_bootup_oddness();
84}
85
86#ifdef CONFIG_PREEMPT_RCU
87
88static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
89static void rcu_read_unlock_special(struct task_struct *t);
90
91
92
93
94static void __init rcu_bootup_announce(void)
95{
96 pr_info("Preemptible hierarchical RCU implementation.\n");
97 rcu_bootup_announce_oddness();
98}
99
100
101#define RCU_GP_TASKS 0x8
102#define RCU_EXP_TASKS 0x4
103#define RCU_GP_BLKD 0x2
104#define RCU_EXP_BLKD 0x1
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
135 __releases(rnp->lock)
136{
137 int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
138 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
139 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
140 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
141 struct task_struct *t = current;
142
143 raw_lockdep_assert_held_rcu_node(rnp);
144 WARN_ON_ONCE(rdp->mynode != rnp);
145 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
146
147 WARN_ON_ONCE(rnp->qsmaskinitnext & ~rnp->qsmaskinit & rnp->qsmask &
148 rdp->grpmask);
149
150
151
152
153
154
155 switch (blkd_state) {
156 case 0:
157 case RCU_EXP_TASKS:
158 case RCU_EXP_TASKS + RCU_GP_BLKD:
159 case RCU_GP_TASKS:
160 case RCU_GP_TASKS + RCU_EXP_TASKS:
161
162
163
164
165
166
167
168 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
169 break;
170
171 case RCU_EXP_BLKD:
172 case RCU_GP_BLKD:
173 case RCU_GP_BLKD + RCU_EXP_BLKD:
174 case RCU_GP_TASKS + RCU_EXP_BLKD:
175 case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
176 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
177
178
179
180
181
182
183
184
185
186 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
187 break;
188
189 case RCU_EXP_TASKS + RCU_EXP_BLKD:
190 case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
191 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD:
192
193
194
195
196
197
198
199 list_add(&t->rcu_node_entry, rnp->exp_tasks);
200 break;
201
202 case RCU_GP_TASKS + RCU_GP_BLKD:
203 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
204
205
206
207
208
209
210 list_add(&t->rcu_node_entry, rnp->gp_tasks);
211 break;
212
213 default:
214
215
216 WARN_ON_ONCE(1);
217 break;
218 }
219
220
221
222
223
224
225
226 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
227 WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
228 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
229 }
230 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
231 WRITE_ONCE(rnp->exp_tasks, &t->rcu_node_entry);
232 WARN_ON_ONCE(!(blkd_state & RCU_GP_BLKD) !=
233 !(rnp->qsmask & rdp->grpmask));
234 WARN_ON_ONCE(!(blkd_state & RCU_EXP_BLKD) !=
235 !(rnp->expmask & rdp->grpmask));
236 raw_spin_unlock_rcu_node(rnp);
237
238
239
240
241
242
243
244 if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
245 rcu_report_exp_rdp(rdp);
246 else
247 WARN_ON_ONCE(rdp->exp_deferred_qs);
248}
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263static void rcu_qs(void)
264{
265 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
266 if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
267 trace_rcu_grace_period(TPS("rcu_preempt"),
268 __this_cpu_read(rcu_data.gp_seq),
269 TPS("cpuqs"));
270 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
271 barrier();
272 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
273 }
274}
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289void rcu_note_context_switch(bool preempt)
290{
291 struct task_struct *t = current;
292 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
293 struct rcu_node *rnp;
294
295 trace_rcu_utilization(TPS("Start context switch"));
296 lockdep_assert_irqs_disabled();
297 WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
298 if (rcu_preempt_depth() > 0 &&
299 !t->rcu_read_unlock_special.b.blocked) {
300
301
302 rnp = rdp->mynode;
303 raw_spin_lock_rcu_node(rnp);
304 t->rcu_read_unlock_special.b.blocked = true;
305 t->rcu_blocked_node = rnp;
306
307
308
309
310
311
312 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
313 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
314 trace_rcu_preempt_task(rcu_state.name,
315 t->pid,
316 (rnp->qsmask & rdp->grpmask)
317 ? rnp->gp_seq
318 : rcu_seq_snap(&rnp->gp_seq));
319 rcu_preempt_ctxt_queue(rnp, rdp);
320 } else {
321 rcu_preempt_deferred_qs(t);
322 }
323
324
325
326
327
328
329
330
331
332
333 rcu_qs();
334 if (rdp->exp_deferred_qs)
335 rcu_report_exp_rdp(rdp);
336 rcu_tasks_qs(current, preempt);
337 trace_rcu_utilization(TPS("End context switch"));
338}
339EXPORT_SYMBOL_GPL(rcu_note_context_switch);
340
341
342
343
344
345
346static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
347{
348 return READ_ONCE(rnp->gp_tasks) != NULL;
349}
350
351
352#define RCU_NEST_PMAX (INT_MAX / 2)
353
354static void rcu_preempt_read_enter(void)
355{
356 current->rcu_read_lock_nesting++;
357}
358
359static int rcu_preempt_read_exit(void)
360{
361 return --current->rcu_read_lock_nesting;
362}
363
364static void rcu_preempt_depth_set(int val)
365{
366 current->rcu_read_lock_nesting = val;
367}
368
369
370
371
372
373
374void __rcu_read_lock(void)
375{
376 rcu_preempt_read_enter();
377 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
378 WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
379 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
380 WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
381 barrier();
382}
383EXPORT_SYMBOL_GPL(__rcu_read_lock);
384
385
386
387
388
389
390
391
392void __rcu_read_unlock(void)
393{
394 struct task_struct *t = current;
395
396 if (rcu_preempt_read_exit() == 0) {
397 barrier();
398 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
399 rcu_read_unlock_special(t);
400 }
401 if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
402 int rrln = rcu_preempt_depth();
403
404 WARN_ON_ONCE(rrln < 0 || rrln > RCU_NEST_PMAX);
405 }
406}
407EXPORT_SYMBOL_GPL(__rcu_read_unlock);
408
409
410
411
412
413static struct list_head *rcu_next_node_entry(struct task_struct *t,
414 struct rcu_node *rnp)
415{
416 struct list_head *np;
417
418 np = t->rcu_node_entry.next;
419 if (np == &rnp->blkd_tasks)
420 np = NULL;
421 return np;
422}
423
424
425
426
427
428static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
429{
430 return !list_empty(&rnp->blkd_tasks);
431}
432
433
434
435
436
437
438static void
439rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
440{
441 bool empty_exp;
442 bool empty_norm;
443 bool empty_exp_now;
444 struct list_head *np;
445 bool drop_boost_mutex = false;
446 struct rcu_data *rdp;
447 struct rcu_node *rnp;
448 union rcu_special special;
449
450
451
452
453
454
455 special = t->rcu_read_unlock_special;
456 rdp = this_cpu_ptr(&rcu_data);
457 if (!special.s && !rdp->exp_deferred_qs) {
458 local_irq_restore(flags);
459 return;
460 }
461 t->rcu_read_unlock_special.s = 0;
462 if (special.b.need_qs) {
463 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
464 rcu_report_qs_rdp(rdp);
465 udelay(rcu_unlock_delay);
466 } else {
467 rcu_qs();
468 }
469 }
470
471
472
473
474
475
476
477 if (rdp->exp_deferred_qs)
478 rcu_report_exp_rdp(rdp);
479
480
481 if (special.b.blocked) {
482
483
484
485
486
487
488
489 rnp = t->rcu_blocked_node;
490 raw_spin_lock_rcu_node(rnp);
491 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
492 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
493 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
494 WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
495 (!empty_norm || rnp->qsmask));
496 empty_exp = sync_rcu_exp_done(rnp);
497 smp_mb();
498 np = rcu_next_node_entry(t, rnp);
499 list_del_init(&t->rcu_node_entry);
500 t->rcu_blocked_node = NULL;
501 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
502 rnp->gp_seq, t->pid);
503 if (&t->rcu_node_entry == rnp->gp_tasks)
504 WRITE_ONCE(rnp->gp_tasks, np);
505 if (&t->rcu_node_entry == rnp->exp_tasks)
506 WRITE_ONCE(rnp->exp_tasks, np);
507 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
508
509 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
510 if (&t->rcu_node_entry == rnp->boost_tasks)
511 WRITE_ONCE(rnp->boost_tasks, np);
512 }
513
514
515
516
517
518
519
520 empty_exp_now = sync_rcu_exp_done(rnp);
521 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
522 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
523 rnp->gp_seq,
524 0, rnp->qsmask,
525 rnp->level,
526 rnp->grplo,
527 rnp->grphi,
528 !!rnp->gp_tasks);
529 rcu_report_unblock_qs_rnp(rnp, flags);
530 } else {
531 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
532 }
533
534
535 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
536 rt_mutex_futex_unlock(&rnp->boost_mtx);
537
538
539
540
541
542 if (!empty_exp && empty_exp_now)
543 rcu_report_exp_rnp(rnp, true);
544 } else {
545 local_irq_restore(flags);
546 }
547}
548
549
550
551
552
553
554
555
556
557
558static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
559{
560 return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
561 READ_ONCE(t->rcu_read_unlock_special.s)) &&
562 rcu_preempt_depth() == 0;
563}
564
565
566
567
568
569
570
571
572static void rcu_preempt_deferred_qs(struct task_struct *t)
573{
574 unsigned long flags;
575
576 if (!rcu_preempt_need_deferred_qs(t))
577 return;
578 local_irq_save(flags);
579 rcu_preempt_deferred_qs_irqrestore(t, flags);
580}
581
582
583
584
585static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
586{
587 struct rcu_data *rdp;
588
589 rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
590 rdp->defer_qs_iw_pending = false;
591}
592
593
594
595
596
597
598static void rcu_read_unlock_special(struct task_struct *t)
599{
600 unsigned long flags;
601 bool preempt_bh_were_disabled =
602 !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
603 bool irqs_were_disabled;
604
605
606 if (in_nmi())
607 return;
608
609 local_irq_save(flags);
610 irqs_were_disabled = irqs_disabled_flags(flags);
611 if (preempt_bh_were_disabled || irqs_were_disabled) {
612 bool exp;
613 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
614 struct rcu_node *rnp = rdp->mynode;
615
616 exp = (t->rcu_blocked_node &&
617 READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
618 (rdp->grpmask & READ_ONCE(rnp->expmask));
619
620 if (use_softirq && (in_irq() || (exp && !irqs_were_disabled))) {
621
622
623 raise_softirq_irqoff(RCU_SOFTIRQ);
624 } else {
625
626
627
628 set_tsk_need_resched(current);
629 set_preempt_need_resched();
630 if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
631 !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) {
632
633
634 init_irq_work(&rdp->defer_qs_iw,
635 rcu_preempt_deferred_qs_handler);
636 rdp->defer_qs_iw_pending = true;
637 irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
638 }
639 }
640 local_irq_restore(flags);
641 return;
642 }
643 rcu_preempt_deferred_qs_irqrestore(t, flags);
644}
645
646
647
648
649
650
651
652
653
654
655static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
656{
657 struct task_struct *t;
658
659 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
660 raw_lockdep_assert_held_rcu_node(rnp);
661 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
662 dump_blkd_tasks(rnp, 10);
663 if (rcu_preempt_has_tasks(rnp) &&
664 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
665 WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
666 t = container_of(rnp->gp_tasks, struct task_struct,
667 rcu_node_entry);
668 trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
669 rnp->gp_seq, t->pid);
670 }
671 WARN_ON_ONCE(rnp->qsmask);
672}
673
674
675
676
677
678
679
680
681static void rcu_flavor_sched_clock_irq(int user)
682{
683 struct task_struct *t = current;
684
685 if (user || rcu_is_cpu_rrupt_from_idle()) {
686 rcu_note_voluntary_context_switch(current);
687 }
688 if (rcu_preempt_depth() > 0 ||
689 (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
690
691 if (rcu_preempt_need_deferred_qs(t)) {
692 set_tsk_need_resched(t);
693 set_preempt_need_resched();
694 }
695 } else if (rcu_preempt_need_deferred_qs(t)) {
696 rcu_preempt_deferred_qs(t);
697 return;
698 } else if (!WARN_ON_ONCE(rcu_preempt_depth())) {
699 rcu_qs();
700 return;
701 }
702
703
704 if (rcu_preempt_depth() > 0 &&
705 __this_cpu_read(rcu_data.core_needs_qs) &&
706 __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
707 !t->rcu_read_unlock_special.b.need_qs &&
708 time_after(jiffies, rcu_state.gp_start + HZ))
709 t->rcu_read_unlock_special.b.need_qs = true;
710}
711
712
713
714
715
716
717
718
719
720void exit_rcu(void)
721{
722 struct task_struct *t = current;
723
724 if (unlikely(!list_empty(¤t->rcu_node_entry))) {
725 rcu_preempt_depth_set(1);
726 barrier();
727 WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
728 } else if (unlikely(rcu_preempt_depth())) {
729 rcu_preempt_depth_set(1);
730 } else {
731 return;
732 }
733 __rcu_read_unlock();
734 rcu_preempt_deferred_qs(current);
735}
736
737
738
739
740
741static void
742dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
743{
744 int cpu;
745 int i;
746 struct list_head *lhp;
747 bool onl;
748 struct rcu_data *rdp;
749 struct rcu_node *rnp1;
750
751 raw_lockdep_assert_held_rcu_node(rnp);
752 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
753 __func__, rnp->grplo, rnp->grphi, rnp->level,
754 (long)READ_ONCE(rnp->gp_seq), (long)rnp->completedqs);
755 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
756 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
757 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
758 pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
759 __func__, READ_ONCE(rnp->gp_tasks), data_race(rnp->boost_tasks),
760 READ_ONCE(rnp->exp_tasks));
761 pr_info("%s: ->blkd_tasks", __func__);
762 i = 0;
763 list_for_each(lhp, &rnp->blkd_tasks) {
764 pr_cont(" %p", lhp);
765 if (++i >= ncheck)
766 break;
767 }
768 pr_cont("\n");
769 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
770 rdp = per_cpu_ptr(&rcu_data, cpu);
771 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
772 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
773 cpu, ".o"[onl],
774 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
775 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
776 }
777}
778
779#else
780
781
782
783
784
785
786void rcu_read_unlock_strict(void)
787{
788 struct rcu_data *rdp;
789
790 if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
791 irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
792 return;
793 rdp = this_cpu_ptr(&rcu_data);
794 rcu_report_qs_rdp(rdp);
795 udelay(rcu_unlock_delay);
796}
797EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
798
799
800
801
802static void __init rcu_bootup_announce(void)
803{
804 pr_info("Hierarchical RCU implementation.\n");
805 rcu_bootup_announce_oddness();
806}
807
808
809
810
811
812
813
814static void rcu_qs(void)
815{
816 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
817 if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
818 return;
819 trace_rcu_grace_period(TPS("rcu_sched"),
820 __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
821 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
822 if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
823 return;
824 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
825 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
826}
827
828
829
830
831
832
833
834
835void rcu_all_qs(void)
836{
837 unsigned long flags;
838
839 if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
840 return;
841 preempt_disable();
842
843 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
844 preempt_enable();
845 return;
846 }
847 this_cpu_write(rcu_data.rcu_urgent_qs, false);
848 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
849 local_irq_save(flags);
850 rcu_momentary_dyntick_idle();
851 local_irq_restore(flags);
852 }
853 rcu_qs();
854 preempt_enable();
855}
856EXPORT_SYMBOL_GPL(rcu_all_qs);
857
858
859
860
861void rcu_note_context_switch(bool preempt)
862{
863 trace_rcu_utilization(TPS("Start context switch"));
864 rcu_qs();
865
866 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
867 goto out;
868 this_cpu_write(rcu_data.rcu_urgent_qs, false);
869 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
870 rcu_momentary_dyntick_idle();
871 rcu_tasks_qs(current, preempt);
872out:
873 trace_rcu_utilization(TPS("End context switch"));
874}
875EXPORT_SYMBOL_GPL(rcu_note_context_switch);
876
877
878
879
880
881static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
882{
883 return 0;
884}
885
886
887
888
889static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
890{
891 return false;
892}
893
894
895
896
897
898static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
899{
900 return false;
901}
902static void rcu_preempt_deferred_qs(struct task_struct *t) { }
903
904
905
906
907
908
909static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
910{
911 WARN_ON_ONCE(rnp->qsmask);
912}
913
914
915
916
917
918static void rcu_flavor_sched_clock_irq(int user)
919{
920 if (user || rcu_is_cpu_rrupt_from_idle()) {
921
922
923
924
925
926
927
928
929
930
931
932
933
934 rcu_qs();
935 }
936}
937
938
939
940
941
942void exit_rcu(void)
943{
944}
945
946
947
948
949static void
950dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
951{
952 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
953}
954
955#endif
956
957
958
959
960static void rcu_cpu_kthread_setup(unsigned int cpu)
961{
962#ifdef CONFIG_RCU_BOOST
963 struct sched_param sp;
964
965 sp.sched_priority = kthread_prio;
966 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
967#endif
968}
969
970#ifdef CONFIG_RCU_BOOST
971
972
973
974
975
976
977
978
979
980static int rcu_boost(struct rcu_node *rnp)
981{
982 unsigned long flags;
983 struct task_struct *t;
984 struct list_head *tb;
985
986 if (READ_ONCE(rnp->exp_tasks) == NULL &&
987 READ_ONCE(rnp->boost_tasks) == NULL)
988 return 0;
989
990 raw_spin_lock_irqsave_rcu_node(rnp, flags);
991
992
993
994
995
996 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
997 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
998 return 0;
999 }
1000
1001
1002
1003
1004
1005
1006
1007 if (rnp->exp_tasks != NULL)
1008 tb = rnp->exp_tasks;
1009 else
1010 tb = rnp->boost_tasks;
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 t = container_of(tb, struct task_struct, rcu_node_entry);
1029 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
1030 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1031
1032 rt_mutex_lock(&rnp->boost_mtx);
1033 rt_mutex_unlock(&rnp->boost_mtx);
1034
1035 return READ_ONCE(rnp->exp_tasks) != NULL ||
1036 READ_ONCE(rnp->boost_tasks) != NULL;
1037}
1038
1039
1040
1041
1042static int rcu_boost_kthread(void *arg)
1043{
1044 struct rcu_node *rnp = (struct rcu_node *)arg;
1045 int spincnt = 0;
1046 int more2boost;
1047
1048 trace_rcu_utilization(TPS("Start boost kthread@init"));
1049 for (;;) {
1050 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
1051 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
1052 rcu_wait(READ_ONCE(rnp->boost_tasks) ||
1053 READ_ONCE(rnp->exp_tasks));
1054 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
1055 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
1056 more2boost = rcu_boost(rnp);
1057 if (more2boost)
1058 spincnt++;
1059 else
1060 spincnt = 0;
1061 if (spincnt > 10) {
1062 WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_YIELDING);
1063 trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
1064 schedule_timeout_idle(2);
1065 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
1066 spincnt = 0;
1067 }
1068 }
1069
1070 trace_rcu_utilization(TPS("End boost kthread@notreached"));
1071 return 0;
1072}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1085 __releases(rnp->lock)
1086{
1087 raw_lockdep_assert_held_rcu_node(rnp);
1088 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1089 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1090 return;
1091 }
1092 if (rnp->exp_tasks != NULL ||
1093 (rnp->gp_tasks != NULL &&
1094 rnp->boost_tasks == NULL &&
1095 rnp->qsmask == 0 &&
1096 (!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
1097 if (rnp->exp_tasks == NULL)
1098 WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
1099 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1100 rcu_wake_cond(rnp->boost_kthread_task,
1101 READ_ONCE(rnp->boost_kthread_status));
1102 } else {
1103 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1104 }
1105}
1106
1107
1108
1109
1110
1111static bool rcu_is_callbacks_kthread(void)
1112{
1113 return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
1114}
1115
1116#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1117
1118
1119
1120
1121static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1122{
1123 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1124}
1125
1126
1127
1128
1129
1130
1131static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1132{
1133 int rnp_index = rnp - rcu_get_root();
1134 unsigned long flags;
1135 struct sched_param sp;
1136 struct task_struct *t;
1137
1138 if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
1139 return;
1140
1141 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
1142 return;
1143
1144 rcu_state.boost = 1;
1145
1146 if (rnp->boost_kthread_task != NULL)
1147 return;
1148
1149 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1150 "rcub/%d", rnp_index);
1151 if (WARN_ON_ONCE(IS_ERR(t)))
1152 return;
1153
1154 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1155 rnp->boost_kthread_task = t;
1156 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1157 sp.sched_priority = kthread_prio;
1158 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1159 wake_up_process(t);
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1172{
1173 struct task_struct *t = rnp->boost_kthread_task;
1174 unsigned long mask = rcu_rnp_online_cpus(rnp);
1175 cpumask_var_t cm;
1176 int cpu;
1177
1178 if (!t)
1179 return;
1180 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1181 return;
1182 for_each_leaf_node_possible_cpu(rnp, cpu)
1183 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1184 cpu != outgoingcpu)
1185 cpumask_set_cpu(cpu, cm);
1186 if (cpumask_weight(cm) == 0)
1187 cpumask_setall(cm);
1188 set_cpus_allowed_ptr(t, cm);
1189 free_cpumask_var(cm);
1190}
1191
1192
1193
1194
1195static void __init rcu_spawn_boost_kthreads(void)
1196{
1197 struct rcu_node *rnp;
1198
1199 rcu_for_each_leaf_node(rnp)
1200 rcu_spawn_one_boost_kthread(rnp);
1201}
1202
1203static void rcu_prepare_kthreads(int cpu)
1204{
1205 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1206 struct rcu_node *rnp = rdp->mynode;
1207
1208
1209 if (rcu_scheduler_fully_active)
1210 rcu_spawn_one_boost_kthread(rnp);
1211}
1212
1213#else
1214
1215static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1216 __releases(rnp->lock)
1217{
1218 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1219}
1220
1221static bool rcu_is_callbacks_kthread(void)
1222{
1223 return false;
1224}
1225
1226static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1227{
1228}
1229
1230static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1231{
1232}
1233
1234static void __init rcu_spawn_boost_kthreads(void)
1235{
1236}
1237
1238static void rcu_prepare_kthreads(int cpu)
1239{
1240}
1241
1242#endif
1243
1244#if !defined(CONFIG_RCU_FAST_NO_HZ)
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1256{
1257 *nextevt = KTIME_MAX;
1258 return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
1259 !rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist);
1260}
1261
1262
1263
1264
1265
1266static void rcu_cleanup_after_idle(void)
1267{
1268}
1269
1270
1271
1272
1273
1274static void rcu_prepare_for_idle(void)
1275{
1276}
1277
1278#else
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299#define RCU_IDLE_GP_DELAY 4
1300
1301static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1302module_param(rcu_idle_gp_delay, int, 0644);
1303
1304
1305
1306
1307
1308
1309static bool __maybe_unused rcu_try_advance_all_cbs(void)
1310{
1311 bool cbs_ready = false;
1312 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1313 struct rcu_node *rnp;
1314
1315
1316 if (jiffies == rdp->last_advance_all)
1317 return false;
1318 rdp->last_advance_all = jiffies;
1319
1320 rnp = rdp->mynode;
1321
1322
1323
1324
1325
1326
1327 if ((rcu_seq_completed_gp(rdp->gp_seq,
1328 rcu_seq_current(&rnp->gp_seq)) ||
1329 unlikely(READ_ONCE(rdp->gpwrap))) &&
1330 rcu_segcblist_pend_cbs(&rdp->cblist))
1331 note_gp_changes(rdp);
1332
1333 if (rcu_segcblist_ready_cbs(&rdp->cblist))
1334 cbs_ready = true;
1335 return cbs_ready;
1336}
1337
1338
1339
1340
1341
1342
1343
1344
1345int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1346{
1347 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1348 unsigned long dj;
1349
1350 lockdep_assert_irqs_disabled();
1351
1352
1353 if (rcu_segcblist_empty(&rdp->cblist) ||
1354 rcu_segcblist_is_offloaded(&this_cpu_ptr(&rcu_data)->cblist)) {
1355 *nextevt = KTIME_MAX;
1356 return 0;
1357 }
1358
1359
1360 if (rcu_try_advance_all_cbs()) {
1361
1362 invoke_rcu_core();
1363 return 1;
1364 }
1365 rdp->last_accelerate = jiffies;
1366
1367
1368 dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
1369
1370 *nextevt = basemono + dj * TICK_NSEC;
1371 return 0;
1372}
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382static void rcu_prepare_for_idle(void)
1383{
1384 bool needwake;
1385 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1386 struct rcu_node *rnp;
1387 int tne;
1388
1389 lockdep_assert_irqs_disabled();
1390 if (rcu_segcblist_is_offloaded(&rdp->cblist))
1391 return;
1392
1393
1394 tne = READ_ONCE(tick_nohz_active);
1395 if (tne != rdp->tick_nohz_enabled_snap) {
1396 if (!rcu_segcblist_empty(&rdp->cblist))
1397 invoke_rcu_core();
1398 rdp->tick_nohz_enabled_snap = tne;
1399 return;
1400 }
1401 if (!tne)
1402 return;
1403
1404
1405
1406
1407
1408 if (rdp->last_accelerate == jiffies)
1409 return;
1410 rdp->last_accelerate = jiffies;
1411 if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
1412 rnp = rdp->mynode;
1413 raw_spin_lock_rcu_node(rnp);
1414 needwake = rcu_accelerate_cbs(rnp, rdp);
1415 raw_spin_unlock_rcu_node(rnp);
1416 if (needwake)
1417 rcu_gp_kthread_wake();
1418 }
1419}
1420
1421
1422
1423
1424
1425
1426static void rcu_cleanup_after_idle(void)
1427{
1428 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1429
1430 lockdep_assert_irqs_disabled();
1431 if (rcu_segcblist_is_offloaded(&rdp->cblist))
1432 return;
1433 if (rcu_try_advance_all_cbs())
1434 invoke_rcu_core();
1435}
1436
1437#endif
1438
1439#ifdef CONFIG_RCU_NOCB_CPU
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470static int __init rcu_nocb_setup(char *str)
1471{
1472 alloc_bootmem_cpumask_var(&rcu_nocb_mask);
1473 if (!strcasecmp(str, "all"))
1474 cpumask_setall(rcu_nocb_mask);
1475 else
1476 if (cpulist_parse(str, rcu_nocb_mask)) {
1477 pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
1478 cpumask_setall(rcu_nocb_mask);
1479 }
1480 return 1;
1481}
1482__setup("rcu_nocbs=", rcu_nocb_setup);
1483
1484static int __init parse_rcu_nocb_poll(char *arg)
1485{
1486 rcu_nocb_poll = true;
1487 return 0;
1488}
1489early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
1490
1491
1492
1493
1494
1495
1496int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ;
1497module_param(nocb_nobypass_lim_per_jiffy, int, 0);
1498
1499
1500
1501
1502
1503
1504static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
1505 __acquires(&rdp->nocb_bypass_lock)
1506{
1507 lockdep_assert_irqs_disabled();
1508 if (raw_spin_trylock(&rdp->nocb_bypass_lock))
1509 return;
1510 atomic_inc(&rdp->nocb_lock_contended);
1511 WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
1512 smp_mb__after_atomic();
1513 raw_spin_lock(&rdp->nocb_bypass_lock);
1514 smp_mb__before_atomic();
1515 atomic_dec(&rdp->nocb_lock_contended);
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528static void rcu_nocb_wait_contended(struct rcu_data *rdp)
1529{
1530 WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
1531 while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
1532 cpu_relax();
1533}
1534
1535
1536
1537
1538
1539static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp)
1540{
1541 lockdep_assert_irqs_disabled();
1542 return raw_spin_trylock(&rdp->nocb_bypass_lock);
1543}
1544
1545
1546
1547
1548static void rcu_nocb_bypass_unlock(struct rcu_data *rdp)
1549 __releases(&rdp->nocb_bypass_lock)
1550{
1551 lockdep_assert_irqs_disabled();
1552 raw_spin_unlock(&rdp->nocb_bypass_lock);
1553}
1554
1555
1556
1557
1558
1559static void rcu_nocb_lock(struct rcu_data *rdp)
1560{
1561 lockdep_assert_irqs_disabled();
1562 if (!rcu_segcblist_is_offloaded(&rdp->cblist))
1563 return;
1564 raw_spin_lock(&rdp->nocb_lock);
1565}
1566
1567
1568
1569
1570
1571static void rcu_nocb_unlock(struct rcu_data *rdp)
1572{
1573 if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
1574 lockdep_assert_irqs_disabled();
1575 raw_spin_unlock(&rdp->nocb_lock);
1576 }
1577}
1578
1579
1580
1581
1582
1583static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
1584 unsigned long flags)
1585{
1586 if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
1587 lockdep_assert_irqs_disabled();
1588 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1589 } else {
1590 local_irq_restore(flags);
1591 }
1592}
1593
1594
1595static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
1596{
1597 lockdep_assert_irqs_disabled();
1598 if (rcu_segcblist_is_offloaded(&rdp->cblist))
1599 lockdep_assert_held(&rdp->nocb_lock);
1600}
1601
1602
1603
1604
1605
1606static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
1607{
1608 swake_up_all(sq);
1609}
1610
1611static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
1612{
1613 return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1];
1614}
1615
1616static void rcu_init_one_nocb(struct rcu_node *rnp)
1617{
1618 init_swait_queue_head(&rnp->nocb_gp_wq[0]);
1619 init_swait_queue_head(&rnp->nocb_gp_wq[1]);
1620}
1621
1622
1623bool rcu_is_nocb_cpu(int cpu)
1624{
1625 if (cpumask_available(rcu_nocb_mask))
1626 return cpumask_test_cpu(cpu, rcu_nocb_mask);
1627 return false;
1628}
1629
1630
1631
1632
1633
1634static void wake_nocb_gp(struct rcu_data *rdp, bool force,
1635 unsigned long flags)
1636 __releases(rdp->nocb_lock)
1637{
1638 bool needwake = false;
1639 struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
1640
1641 lockdep_assert_held(&rdp->nocb_lock);
1642 if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) {
1643 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1644 TPS("AlreadyAwake"));
1645 rcu_nocb_unlock_irqrestore(rdp, flags);
1646 return;
1647 }
1648 del_timer(&rdp->nocb_timer);
1649 rcu_nocb_unlock_irqrestore(rdp, flags);
1650 raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
1651 if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
1652 WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
1653 needwake = true;
1654 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
1655 }
1656 raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
1657 if (needwake)
1658 wake_up_process(rdp_gp->nocb_gp_kthread);
1659}
1660
1661
1662
1663
1664
1665static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
1666 const char *reason)
1667{
1668 if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
1669 mod_timer(&rdp->nocb_timer, jiffies + 1);
1670 if (rdp->nocb_defer_wakeup < waketype)
1671 WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
1672 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
1673}
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
1684 unsigned long j)
1685{
1686 struct rcu_cblist rcl;
1687
1688 WARN_ON_ONCE(!rcu_segcblist_is_offloaded(&rdp->cblist));
1689 rcu_lockdep_assert_cblist_protected(rdp);
1690 lockdep_assert_held(&rdp->nocb_bypass_lock);
1691 if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) {
1692 raw_spin_unlock(&rdp->nocb_bypass_lock);
1693 return false;
1694 }
1695
1696 if (rhp)
1697 rcu_segcblist_inc_len(&rdp->cblist);
1698 rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp);
1699 rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl);
1700 WRITE_ONCE(rdp->nocb_bypass_first, j);
1701 rcu_nocb_bypass_unlock(rdp);
1702 return true;
1703}
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
1714 unsigned long j)
1715{
1716 if (!rcu_segcblist_is_offloaded(&rdp->cblist))
1717 return true;
1718 rcu_lockdep_assert_cblist_protected(rdp);
1719 rcu_nocb_bypass_lock(rdp);
1720 return rcu_nocb_do_flush_bypass(rdp, rhp, j);
1721}
1722
1723
1724
1725
1726
1727static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j)
1728{
1729 rcu_lockdep_assert_cblist_protected(rdp);
1730 if (!rcu_segcblist_is_offloaded(&rdp->cblist) ||
1731 !rcu_nocb_bypass_trylock(rdp))
1732 return;
1733 WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j));
1734}
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
1755 bool *was_alldone, unsigned long flags)
1756{
1757 unsigned long c;
1758 unsigned long cur_gp_seq;
1759 unsigned long j = jiffies;
1760 long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
1761
1762 if (!rcu_segcblist_is_offloaded(&rdp->cblist)) {
1763 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1764 return false;
1765 }
1766 lockdep_assert_irqs_disabled();
1767
1768
1769 if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
1770 rcu_nocb_lock(rdp);
1771 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1772 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1773 return false;
1774 }
1775
1776
1777
1778 if (j == rdp->nocb_nobypass_last) {
1779 c = rdp->nocb_nobypass_count + 1;
1780 } else {
1781 WRITE_ONCE(rdp->nocb_nobypass_last, j);
1782 c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy;
1783 if (ULONG_CMP_LT(rdp->nocb_nobypass_count,
1784 nocb_nobypass_lim_per_jiffy))
1785 c = 0;
1786 else if (c > nocb_nobypass_lim_per_jiffy)
1787 c = nocb_nobypass_lim_per_jiffy;
1788 }
1789 WRITE_ONCE(rdp->nocb_nobypass_count, c);
1790
1791
1792
1793
1794 if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) {
1795 rcu_nocb_lock(rdp);
1796 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1797 if (*was_alldone)
1798 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1799 TPS("FirstQ"));
1800 WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j));
1801 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1802 return false;
1803 }
1804
1805
1806
1807 if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) ||
1808 ncbs >= qhimark) {
1809 rcu_nocb_lock(rdp);
1810 if (!rcu_nocb_flush_bypass(rdp, rhp, j)) {
1811 *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
1812 if (*was_alldone)
1813 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1814 TPS("FirstQ"));
1815 WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
1816 return false;
1817 }
1818 if (j != rdp->nocb_gp_adv_time &&
1819 rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
1820 rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
1821 rcu_advance_cbs_nowake(rdp->mynode, rdp);
1822 rdp->nocb_gp_adv_time = j;
1823 }
1824 rcu_nocb_unlock_irqrestore(rdp, flags);
1825 return true;
1826 }
1827
1828
1829 rcu_nocb_wait_contended(rdp);
1830 rcu_nocb_bypass_lock(rdp);
1831 ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
1832 rcu_segcblist_inc_len(&rdp->cblist);
1833 rcu_cblist_enqueue(&rdp->nocb_bypass, rhp);
1834 if (!ncbs) {
1835 WRITE_ONCE(rdp->nocb_bypass_first, j);
1836 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ"));
1837 }
1838 rcu_nocb_bypass_unlock(rdp);
1839 smp_mb();
1840 if (ncbs) {
1841 local_irq_restore(flags);
1842 } else {
1843
1844 rcu_nocb_lock(rdp);
1845 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) {
1846 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1847 TPS("FirstBQwake"));
1848 __call_rcu_nocb_wake(rdp, true, flags);
1849 } else {
1850 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1851 TPS("FirstBQnoWake"));
1852 rcu_nocb_unlock_irqrestore(rdp, flags);
1853 }
1854 }
1855 return true;
1856}
1857
1858
1859
1860
1861
1862
1863
1864static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
1865 unsigned long flags)
1866 __releases(rdp->nocb_lock)
1867{
1868 unsigned long cur_gp_seq;
1869 unsigned long j;
1870 long len;
1871 struct task_struct *t;
1872
1873
1874 t = READ_ONCE(rdp->nocb_gp_kthread);
1875 if (rcu_nocb_poll || !t) {
1876 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1877 TPS("WakeNotPoll"));
1878 rcu_nocb_unlock_irqrestore(rdp, flags);
1879 return;
1880 }
1881
1882 len = rcu_segcblist_n_cbs(&rdp->cblist);
1883 if (was_alldone) {
1884 rdp->qlen_last_fqs_check = len;
1885 if (!irqs_disabled_flags(flags)) {
1886
1887 wake_nocb_gp(rdp, false, flags);
1888 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1889 TPS("WakeEmpty"));
1890 } else {
1891 wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
1892 TPS("WakeEmptyIsDeferred"));
1893 rcu_nocb_unlock_irqrestore(rdp, flags);
1894 }
1895 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
1896
1897 rdp->qlen_last_fqs_check = len;
1898 j = jiffies;
1899 if (j != rdp->nocb_gp_adv_time &&
1900 rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
1901 rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
1902 rcu_advance_cbs_nowake(rdp->mynode, rdp);
1903 rdp->nocb_gp_adv_time = j;
1904 }
1905 smp_mb();
1906 if ((rdp->nocb_cb_sleep ||
1907 !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
1908 !timer_pending(&rdp->nocb_bypass_timer))
1909 wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
1910 TPS("WakeOvfIsDeferred"));
1911 rcu_nocb_unlock_irqrestore(rdp, flags);
1912 } else {
1913 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
1914 rcu_nocb_unlock_irqrestore(rdp, flags);
1915 }
1916 return;
1917}
1918
1919
1920static void do_nocb_bypass_wakeup_timer(struct timer_list *t)
1921{
1922 unsigned long flags;
1923 struct rcu_data *rdp = from_timer(rdp, t, nocb_bypass_timer);
1924
1925 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
1926 rcu_nocb_lock_irqsave(rdp, flags);
1927 smp_mb__after_spinlock();
1928 __call_rcu_nocb_wake(rdp, true, flags);
1929}
1930
1931
1932
1933
1934
1935static void nocb_gp_wait(struct rcu_data *my_rdp)
1936{
1937 bool bypass = false;
1938 long bypass_ncbs;
1939 int __maybe_unused cpu = my_rdp->cpu;
1940 unsigned long cur_gp_seq;
1941 unsigned long flags;
1942 bool gotcbs = false;
1943 unsigned long j = jiffies;
1944 bool needwait_gp = false;
1945 bool needwake;
1946 bool needwake_gp;
1947 struct rcu_data *rdp;
1948 struct rcu_node *rnp;
1949 unsigned long wait_gp_seq = 0;
1950 bool wasempty = false;
1951
1952
1953
1954
1955
1956
1957 WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp);
1958 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
1959 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check"));
1960 rcu_nocb_lock_irqsave(rdp, flags);
1961 bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
1962 if (bypass_ncbs &&
1963 (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) ||
1964 bypass_ncbs > 2 * qhimark)) {
1965
1966 (void)rcu_nocb_try_flush_bypass(rdp, j);
1967 bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
1968 } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) {
1969 rcu_nocb_unlock_irqrestore(rdp, flags);
1970 continue;
1971 }
1972 if (bypass_ncbs) {
1973 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
1974 TPS("Bypass"));
1975 bypass = true;
1976 }
1977 rnp = rdp->mynode;
1978 if (bypass) {
1979 WRITE_ONCE(my_rdp->nocb_defer_wakeup,
1980 RCU_NOCB_WAKE_NOT);
1981 del_timer(&my_rdp->nocb_timer);
1982 }
1983
1984 needwake_gp = false;
1985 if (!rcu_segcblist_restempty(&rdp->cblist,
1986 RCU_NEXT_READY_TAIL) ||
1987 (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
1988 rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) {
1989 raw_spin_lock_rcu_node(rnp);
1990 needwake_gp = rcu_advance_cbs(rnp, rdp);
1991 wasempty = rcu_segcblist_restempty(&rdp->cblist,
1992 RCU_NEXT_READY_TAIL);
1993 raw_spin_unlock_rcu_node(rnp);
1994 }
1995
1996 WARN_ON_ONCE(wasempty &&
1997 !rcu_segcblist_restempty(&rdp->cblist,
1998 RCU_NEXT_READY_TAIL));
1999 if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
2000 if (!needwait_gp ||
2001 ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
2002 wait_gp_seq = cur_gp_seq;
2003 needwait_gp = true;
2004 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2005 TPS("NeedWaitGP"));
2006 }
2007 if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
2008 needwake = rdp->nocb_cb_sleep;
2009 WRITE_ONCE(rdp->nocb_cb_sleep, false);
2010 smp_mb();
2011 } else {
2012 needwake = false;
2013 }
2014 rcu_nocb_unlock_irqrestore(rdp, flags);
2015 if (needwake) {
2016 swake_up_one(&rdp->nocb_cb_wq);
2017 gotcbs = true;
2018 }
2019 if (needwake_gp)
2020 rcu_gp_kthread_wake();
2021 }
2022
2023 my_rdp->nocb_gp_bypass = bypass;
2024 my_rdp->nocb_gp_gp = needwait_gp;
2025 my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0;
2026 if (bypass && !rcu_nocb_poll) {
2027
2028
2029 raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
2030 mod_timer(&my_rdp->nocb_bypass_timer, j + 2);
2031 raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
2032 }
2033 if (rcu_nocb_poll) {
2034
2035 if (gotcbs)
2036 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
2037 schedule_timeout_idle(1);
2038 } else if (!needwait_gp) {
2039
2040 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
2041 swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
2042 !READ_ONCE(my_rdp->nocb_gp_sleep));
2043 trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
2044 } else {
2045 rnp = my_rdp->mynode;
2046 trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
2047 swait_event_interruptible_exclusive(
2048 rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
2049 rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
2050 !READ_ONCE(my_rdp->nocb_gp_sleep));
2051 trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
2052 }
2053 if (!rcu_nocb_poll) {
2054 raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
2055 if (bypass)
2056 del_timer(&my_rdp->nocb_bypass_timer);
2057 WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
2058 raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
2059 }
2060 my_rdp->nocb_gp_seq = -1;
2061 WARN_ON(signal_pending(current));
2062}
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072static int rcu_nocb_gp_kthread(void *arg)
2073{
2074 struct rcu_data *rdp = arg;
2075
2076 for (;;) {
2077 WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1);
2078 nocb_gp_wait(rdp);
2079 cond_resched_tasks_rcu_qs();
2080 }
2081 return 0;
2082}
2083
2084
2085
2086
2087
2088static void nocb_cb_wait(struct rcu_data *rdp)
2089{
2090 unsigned long cur_gp_seq;
2091 unsigned long flags;
2092 bool needwake_gp = false;
2093 struct rcu_node *rnp = rdp->mynode;
2094
2095 local_irq_save(flags);
2096 rcu_momentary_dyntick_idle();
2097 local_irq_restore(flags);
2098 local_bh_disable();
2099 rcu_do_batch(rdp);
2100 local_bh_enable();
2101 lockdep_assert_irqs_enabled();
2102 rcu_nocb_lock_irqsave(rdp, flags);
2103 if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
2104 rcu_seq_done(&rnp->gp_seq, cur_gp_seq) &&
2105 raw_spin_trylock_rcu_node(rnp)) {
2106 needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
2107 raw_spin_unlock_rcu_node(rnp);
2108 }
2109 if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
2110 rcu_nocb_unlock_irqrestore(rdp, flags);
2111 if (needwake_gp)
2112 rcu_gp_kthread_wake();
2113 return;
2114 }
2115
2116 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
2117 WRITE_ONCE(rdp->nocb_cb_sleep, true);
2118 rcu_nocb_unlock_irqrestore(rdp, flags);
2119 if (needwake_gp)
2120 rcu_gp_kthread_wake();
2121 swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
2122 !READ_ONCE(rdp->nocb_cb_sleep));
2123 if (!smp_load_acquire(&rdp->nocb_cb_sleep)) {
2124
2125 return;
2126 }
2127 WARN_ON(signal_pending(current));
2128 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
2129}
2130
2131
2132
2133
2134
2135static int rcu_nocb_cb_kthread(void *arg)
2136{
2137 struct rcu_data *rdp = arg;
2138
2139
2140
2141 for (;;) {
2142 nocb_cb_wait(rdp);
2143 cond_resched_tasks_rcu_qs();
2144 }
2145 return 0;
2146}
2147
2148
2149static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2150{
2151 return READ_ONCE(rdp->nocb_defer_wakeup);
2152}
2153
2154
2155static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
2156{
2157 unsigned long flags;
2158 int ndw;
2159
2160 rcu_nocb_lock_irqsave(rdp, flags);
2161 if (!rcu_nocb_need_deferred_wakeup(rdp)) {
2162 rcu_nocb_unlock_irqrestore(rdp, flags);
2163 return;
2164 }
2165 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2166 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2167 wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
2168 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
2169}
2170
2171
2172static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
2173{
2174 struct rcu_data *rdp = from_timer(rdp, t, nocb_timer);
2175
2176 do_nocb_deferred_wakeup_common(rdp);
2177}
2178
2179
2180
2181
2182
2183
2184static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2185{
2186 if (rcu_nocb_need_deferred_wakeup(rdp))
2187 do_nocb_deferred_wakeup_common(rdp);
2188}
2189
2190void __init rcu_init_nohz(void)
2191{
2192 int cpu;
2193 bool need_rcu_nocb_mask = false;
2194 struct rcu_data *rdp;
2195
2196#if defined(CONFIG_NO_HZ_FULL)
2197 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
2198 need_rcu_nocb_mask = true;
2199#endif
2200
2201 if (!cpumask_available(rcu_nocb_mask) && need_rcu_nocb_mask) {
2202 if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
2203 pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
2204 return;
2205 }
2206 }
2207 if (!cpumask_available(rcu_nocb_mask))
2208 return;
2209
2210#if defined(CONFIG_NO_HZ_FULL)
2211 if (tick_nohz_full_running)
2212 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2213#endif
2214
2215 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
2216 pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
2217 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2218 rcu_nocb_mask);
2219 }
2220 if (cpumask_empty(rcu_nocb_mask))
2221 pr_info("\tOffload RCU callbacks from CPUs: (none).\n");
2222 else
2223 pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
2224 cpumask_pr_args(rcu_nocb_mask));
2225 if (rcu_nocb_poll)
2226 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2227
2228 for_each_cpu(cpu, rcu_nocb_mask) {
2229 rdp = per_cpu_ptr(&rcu_data, cpu);
2230 if (rcu_segcblist_empty(&rdp->cblist))
2231 rcu_segcblist_init(&rdp->cblist);
2232 rcu_segcblist_offload(&rdp->cblist);
2233 }
2234 rcu_organize_nocb_kthreads();
2235}
2236
2237
2238static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2239{
2240 init_swait_queue_head(&rdp->nocb_cb_wq);
2241 init_swait_queue_head(&rdp->nocb_gp_wq);
2242 raw_spin_lock_init(&rdp->nocb_lock);
2243 raw_spin_lock_init(&rdp->nocb_bypass_lock);
2244 raw_spin_lock_init(&rdp->nocb_gp_lock);
2245 timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0);
2246 timer_setup(&rdp->nocb_bypass_timer, do_nocb_bypass_wakeup_timer, 0);
2247 rcu_cblist_init(&rdp->nocb_bypass);
2248}
2249
2250
2251
2252
2253
2254
2255static void rcu_spawn_one_nocb_kthread(int cpu)
2256{
2257 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2258 struct rcu_data *rdp_gp;
2259 struct task_struct *t;
2260
2261
2262
2263
2264
2265 if (!rcu_is_nocb_cpu(cpu) || rdp->nocb_cb_kthread)
2266 return;
2267
2268
2269 rdp_gp = rdp->nocb_gp_rdp;
2270 if (!rdp_gp->nocb_gp_kthread) {
2271 t = kthread_run(rcu_nocb_gp_kthread, rdp_gp,
2272 "rcuog/%d", rdp_gp->cpu);
2273 if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__))
2274 return;
2275 WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
2276 }
2277
2278
2279 t = kthread_run(rcu_nocb_cb_kthread, rdp,
2280 "rcuo%c/%d", rcu_state.abbr, cpu);
2281 if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
2282 return;
2283 WRITE_ONCE(rdp->nocb_cb_kthread, t);
2284 WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
2285}
2286
2287
2288
2289
2290
2291static void rcu_spawn_cpu_nocb_kthread(int cpu)
2292{
2293 if (rcu_scheduler_fully_active)
2294 rcu_spawn_one_nocb_kthread(cpu);
2295}
2296
2297
2298
2299
2300
2301
2302
2303static void __init rcu_spawn_nocb_kthreads(void)
2304{
2305 int cpu;
2306
2307 for_each_online_cpu(cpu)
2308 rcu_spawn_cpu_nocb_kthread(cpu);
2309}
2310
2311
2312static int rcu_nocb_gp_stride = -1;
2313module_param(rcu_nocb_gp_stride, int, 0444);
2314
2315
2316
2317
2318static void __init rcu_organize_nocb_kthreads(void)
2319{
2320 int cpu;
2321 bool firsttime = true;
2322 bool gotnocbs = false;
2323 bool gotnocbscbs = true;
2324 int ls = rcu_nocb_gp_stride;
2325 int nl = 0;
2326 struct rcu_data *rdp;
2327 struct rcu_data *rdp_gp = NULL;
2328 struct rcu_data *rdp_prev = NULL;
2329
2330 if (!cpumask_available(rcu_nocb_mask))
2331 return;
2332 if (ls == -1) {
2333 ls = nr_cpu_ids / int_sqrt(nr_cpu_ids);
2334 rcu_nocb_gp_stride = ls;
2335 }
2336
2337
2338
2339
2340
2341
2342 for_each_cpu(cpu, rcu_nocb_mask) {
2343 rdp = per_cpu_ptr(&rcu_data, cpu);
2344 if (rdp->cpu >= nl) {
2345
2346 gotnocbs = true;
2347 nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
2348 rdp->nocb_gp_rdp = rdp;
2349 rdp_gp = rdp;
2350 if (dump_tree) {
2351 if (!firsttime)
2352 pr_cont("%s\n", gotnocbscbs
2353 ? "" : " (self only)");
2354 gotnocbscbs = false;
2355 firsttime = false;
2356 pr_alert("%s: No-CB GP kthread CPU %d:",
2357 __func__, cpu);
2358 }
2359 } else {
2360
2361 gotnocbscbs = true;
2362 rdp->nocb_gp_rdp = rdp_gp;
2363 rdp_prev->nocb_next_cb_rdp = rdp;
2364 if (dump_tree)
2365 pr_cont(" %d", cpu);
2366 }
2367 rdp_prev = rdp;
2368 }
2369 if (gotnocbs && dump_tree)
2370 pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
2371}
2372
2373
2374
2375
2376
2377void rcu_bind_current_to_nocb(void)
2378{
2379 if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask))
2380 WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask));
2381}
2382EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
2383
2384
2385
2386
2387
2388static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
2389{
2390 struct rcu_node *rnp = rdp->mynode;
2391
2392 pr_info("nocb GP %d %c%c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu\n",
2393 rdp->cpu,
2394 "kK"[!!rdp->nocb_gp_kthread],
2395 "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)],
2396 "dD"[!!rdp->nocb_defer_wakeup],
2397 "tT"[timer_pending(&rdp->nocb_timer)],
2398 "bB"[timer_pending(&rdp->nocb_bypass_timer)],
2399 "sS"[!!rdp->nocb_gp_sleep],
2400 ".W"[swait_active(&rdp->nocb_gp_wq)],
2401 ".W"[swait_active(&rnp->nocb_gp_wq[0])],
2402 ".W"[swait_active(&rnp->nocb_gp_wq[1])],
2403 ".B"[!!rdp->nocb_gp_bypass],
2404 ".G"[!!rdp->nocb_gp_gp],
2405 (long)rdp->nocb_gp_seq,
2406 rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops));
2407}
2408
2409
2410static void show_rcu_nocb_state(struct rcu_data *rdp)
2411{
2412 struct rcu_segcblist *rsclp = &rdp->cblist;
2413 bool waslocked;
2414 bool wastimer;
2415 bool wassleep;
2416
2417 if (rdp->nocb_gp_rdp == rdp)
2418 show_rcu_nocb_gp_state(rdp);
2419
2420 pr_info(" CB %d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%c%c%c q%ld\n",
2421 rdp->cpu, rdp->nocb_gp_rdp->cpu,
2422 "kK"[!!rdp->nocb_cb_kthread],
2423 "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
2424 "cC"[!!atomic_read(&rdp->nocb_lock_contended)],
2425 "lL"[raw_spin_is_locked(&rdp->nocb_lock)],
2426 "sS"[!!rdp->nocb_cb_sleep],
2427 ".W"[swait_active(&rdp->nocb_cb_wq)],
2428 jiffies - rdp->nocb_bypass_first,
2429 jiffies - rdp->nocb_nobypass_last,
2430 rdp->nocb_nobypass_count,
2431 ".D"[rcu_segcblist_ready_cbs(rsclp)],
2432 ".W"[!rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)],
2433 ".R"[!rcu_segcblist_restempty(rsclp, RCU_WAIT_TAIL)],
2434 ".N"[!rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL)],
2435 ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
2436 rcu_segcblist_n_cbs(&rdp->cblist));
2437
2438
2439 if (rdp->nocb_gp_rdp == rdp)
2440 return;
2441
2442 waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock);
2443 wastimer = timer_pending(&rdp->nocb_bypass_timer);
2444 wassleep = swait_active(&rdp->nocb_gp_wq);
2445 if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep)
2446 return;
2447
2448 pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n",
2449 "lL"[waslocked],
2450 "dD"[!!rdp->nocb_defer_wakeup],
2451 "tT"[wastimer],
2452 "sS"[!!rdp->nocb_gp_sleep],
2453 ".W"[wassleep]);
2454}
2455
2456#else
2457
2458
2459static void rcu_nocb_lock(struct rcu_data *rdp)
2460{
2461}
2462
2463
2464static void rcu_nocb_unlock(struct rcu_data *rdp)
2465{
2466}
2467
2468
2469static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
2470 unsigned long flags)
2471{
2472 local_irq_restore(flags);
2473}
2474
2475
2476static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
2477{
2478 lockdep_assert_irqs_disabled();
2479}
2480
2481static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
2482{
2483}
2484
2485static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
2486{
2487 return NULL;
2488}
2489
2490static void rcu_init_one_nocb(struct rcu_node *rnp)
2491{
2492}
2493
2494static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
2495 unsigned long j)
2496{
2497 return true;
2498}
2499
2500static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
2501 bool *was_alldone, unsigned long flags)
2502{
2503 return false;
2504}
2505
2506static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
2507 unsigned long flags)
2508{
2509 WARN_ON_ONCE(1);
2510}
2511
2512static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2513{
2514}
2515
2516static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2517{
2518 return false;
2519}
2520
2521static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2522{
2523}
2524
2525static void rcu_spawn_cpu_nocb_kthread(int cpu)
2526{
2527}
2528
2529static void __init rcu_spawn_nocb_kthreads(void)
2530{
2531}
2532
2533static void show_rcu_nocb_state(struct rcu_data *rdp)
2534{
2535}
2536
2537#endif
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548static bool rcu_nohz_full_cpu(void)
2549{
2550#ifdef CONFIG_NO_HZ_FULL
2551 if (tick_nohz_full_cpu(smp_processor_id()) &&
2552 (!rcu_gp_in_progress() ||
2553 time_before(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
2554 return true;
2555#endif
2556 return false;
2557}
2558
2559
2560
2561
2562static void rcu_bind_gp_kthread(void)
2563{
2564 if (!tick_nohz_full_enabled())
2565 return;
2566 housekeeping_affine(current, HK_FLAG_RCU);
2567}
2568
2569
2570static void noinstr rcu_dynticks_task_enter(void)
2571{
2572#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
2573 WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
2574#endif
2575}
2576
2577
2578static void noinstr rcu_dynticks_task_exit(void)
2579{
2580#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
2581 WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
2582#endif
2583}
2584
2585
2586static void rcu_dynticks_task_trace_enter(void)
2587{
2588#ifdef CONFIG_TASKS_RCU_TRACE
2589 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
2590 current->trc_reader_special.b.need_mb = true;
2591#endif
2592}
2593
2594
2595static void rcu_dynticks_task_trace_exit(void)
2596{
2597#ifdef CONFIG_TASKS_RCU_TRACE
2598 if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
2599 current->trc_reader_special.b.need_mb = false;
2600#endif
2601}
2602