1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#define pr_fmt(fmt) "rcu: " fmt
19
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/init.h>
23#include <linux/spinlock.h>
24#include <linux/smp.h>
25#include <linux/rcupdate_wait.h>
26#include <linux/interrupt.h>
27#include <linux/sched.h>
28#include <linux/sched/debug.h>
29#include <linux/nmi.h>
30#include <linux/atomic.h>
31#include <linux/bitops.h>
32#include <linux/export.h>
33#include <linux/completion.h>
34#include <linux/moduleparam.h>
35#include <linux/percpu.h>
36#include <linux/notifier.h>
37#include <linux/cpu.h>
38#include <linux/mutex.h>
39#include <linux/time.h>
40#include <linux/kernel_stat.h>
41#include <linux/wait.h>
42#include <linux/kthread.h>
43#include <uapi/linux/sched/types.h>
44#include <linux/prefetch.h>
45#include <linux/delay.h>
46#include <linux/stop_machine.h>
47#include <linux/random.h>
48#include <linux/trace_events.h>
49#include <linux/suspend.h>
50#include <linux/ftrace.h>
51#include <linux/tick.h>
52#include <linux/sysrq.h>
53#include <linux/kprobes.h>
54#include <linux/gfp.h>
55#include <linux/oom.h>
56#include <linux/smpboot.h>
57#include <linux/jiffies.h>
58#include <linux/sched/isolation.h>
59#include <linux/sched/clock.h>
60#include "../time/tick-internal.h"
61
62#include "tree.h"
63#include "rcu.h"
64
65#ifdef MODULE_PARAM_PREFIX
66#undef MODULE_PARAM_PREFIX
67#endif
68#define MODULE_PARAM_PREFIX "rcutree."
69
70
71
72
73
74
75
76#define RCU_DYNTICK_CTRL_MASK 0x1
77#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
78#ifndef rcu_eqs_special_exit
79#define rcu_eqs_special_exit() do { } while (0)
80#endif
81
82static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
83 .dynticks_nesting = 1,
84 .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
85 .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
86};
87struct rcu_state rcu_state = {
88 .level = { &rcu_state.node[0] },
89 .gp_state = RCU_GP_IDLE,
90 .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
91 .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
92 .name = RCU_NAME,
93 .abbr = RCU_ABBR,
94 .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
95 .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
96 .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
97};
98
99
100static bool dump_tree;
101module_param(dump_tree, bool, 0444);
102
103static bool use_softirq = 1;
104module_param(use_softirq, bool, 0444);
105
106static bool rcu_fanout_exact;
107module_param(rcu_fanout_exact, bool, 0444);
108
109static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
110module_param(rcu_fanout_leaf, int, 0444);
111int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
112
113int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
114int rcu_num_nodes __read_mostly = NUM_RCU_NODES;
115
116
117
118
119
120
121
122
123
124
125
126
127
128int rcu_scheduler_active __read_mostly;
129EXPORT_SYMBOL_GPL(rcu_scheduler_active);
130
131
132
133
134
135
136
137
138
139
140
141
142
143static int rcu_scheduler_fully_active __read_mostly;
144
145static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
146 unsigned long gps, unsigned long flags);
147static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
148static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
149static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
150static void invoke_rcu_core(void);
151static void rcu_report_exp_rdp(struct rcu_data *rdp);
152static void sync_sched_exp_online_cleanup(int cpu);
153
154
155static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
156module_param(kthread_prio, int, 0444);
157
158
159
160static int gp_preinit_delay;
161module_param(gp_preinit_delay, int, 0444);
162static int gp_init_delay;
163module_param(gp_init_delay, int, 0444);
164static int gp_cleanup_delay;
165module_param(gp_cleanup_delay, int, 0444);
166
167
168int rcu_get_gp_kthreads_prio(void)
169{
170 return kthread_prio;
171}
172EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
173
174
175
176
177
178
179
180
181
182
183#define PER_RCU_NODE_PERIOD 3
184
185
186
187
188
189
190
191unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
192{
193 return READ_ONCE(rnp->qsmaskinitnext);
194}
195
196
197
198
199
200
201static int rcu_gp_in_progress(void)
202{
203 return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
204}
205
206
207
208
209
210static long rcu_get_n_cbs_cpu(int cpu)
211{
212 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
213
214 if (rcu_segcblist_is_enabled(&rdp->cblist))
215 return rcu_segcblist_n_cbs(&rdp->cblist);
216 return 0;
217}
218
219void rcu_softirq_qs(void)
220{
221 rcu_qs();
222 rcu_preempt_deferred_qs(current);
223}
224
225
226
227
228
229static void rcu_dynticks_eqs_enter(void)
230{
231 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
232 int seq;
233
234
235
236
237
238
239 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
240
241 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
242 (seq & RCU_DYNTICK_CTRL_CTR));
243
244 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
245 (seq & RCU_DYNTICK_CTRL_MASK));
246}
247
248
249
250
251
252static void rcu_dynticks_eqs_exit(void)
253{
254 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
255 int seq;
256
257
258
259
260
261
262 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
263 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
264 !(seq & RCU_DYNTICK_CTRL_CTR));
265 if (seq & RCU_DYNTICK_CTRL_MASK) {
266 atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
267 smp_mb__after_atomic();
268
269 rcu_eqs_special_exit();
270 }
271}
272
273
274
275
276
277
278
279
280
281
282
283static void rcu_dynticks_eqs_online(void)
284{
285 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
286
287 if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
288 return;
289 atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
290}
291
292
293
294
295
296
297bool rcu_dynticks_curr_cpu_in_eqs(void)
298{
299 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
300
301 return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
302}
303
304
305
306
307
308int rcu_dynticks_snap(struct rcu_data *rdp)
309{
310 int snap = atomic_add_return(0, &rdp->dynticks);
311
312 return snap & ~RCU_DYNTICK_CTRL_MASK;
313}
314
315
316
317
318
319static bool rcu_dynticks_in_eqs(int snap)
320{
321 return !(snap & RCU_DYNTICK_CTRL_CTR);
322}
323
324
325
326
327
328
329static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
330{
331 return snap != rcu_dynticks_snap(rdp);
332}
333
334
335
336
337
338
339
340
341bool rcu_eqs_special_set(int cpu)
342{
343 int old;
344 int new;
345 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
346
347 do {
348 old = atomic_read(&rdp->dynticks);
349 if (old & RCU_DYNTICK_CTRL_CTR)
350 return false;
351 new = old | RCU_DYNTICK_CTRL_MASK;
352 } while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
353 return true;
354}
355
356
357
358
359
360
361
362
363
364
365
366
367static void __maybe_unused rcu_momentary_dyntick_idle(void)
368{
369 int special;
370
371 raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
372 special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
373 &this_cpu_ptr(&rcu_data)->dynticks);
374
375 WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
376 rcu_preempt_deferred_qs(current);
377}
378
379
380
381
382
383
384
385
386static int rcu_is_cpu_rrupt_from_idle(void)
387{
388
389 lockdep_assert_in_irq();
390
391
392 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
393 "RCU dynticks_nesting counter underflow!");
394 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
395 "RCU dynticks_nmi_nesting counter underflow/zero!");
396
397
398 if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
399 return false;
400
401
402 return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
403}
404
405#define DEFAULT_RCU_BLIMIT 10
406#define DEFAULT_MAX_RCU_BLIMIT 10000
407static long blimit = DEFAULT_RCU_BLIMIT;
408#define DEFAULT_RCU_QHIMARK 10000
409static long qhimark = DEFAULT_RCU_QHIMARK;
410#define DEFAULT_RCU_QLOMARK 100
411static long qlowmark = DEFAULT_RCU_QLOMARK;
412
413module_param(blimit, long, 0444);
414module_param(qhimark, long, 0444);
415module_param(qlowmark, long, 0444);
416
417static ulong jiffies_till_first_fqs = ULONG_MAX;
418static ulong jiffies_till_next_fqs = ULONG_MAX;
419static bool rcu_kick_kthreads;
420static int rcu_divisor = 7;
421module_param(rcu_divisor, int, 0644);
422
423
424static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
425module_param(rcu_resched_ns, long, 0644);
426
427
428
429
430
431static ulong jiffies_till_sched_qs = ULONG_MAX;
432module_param(jiffies_till_sched_qs, ulong, 0444);
433static ulong jiffies_to_sched_qs;
434module_param(jiffies_to_sched_qs, ulong, 0444);
435
436
437
438
439
440
441
442static void adjust_jiffies_till_sched_qs(void)
443{
444 unsigned long j;
445
446
447 if (jiffies_till_sched_qs != ULONG_MAX) {
448 WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
449 return;
450 }
451
452 j = READ_ONCE(jiffies_till_first_fqs) +
453 2 * READ_ONCE(jiffies_till_next_fqs);
454 if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
455 j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
456 pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
457 WRITE_ONCE(jiffies_to_sched_qs, j);
458}
459
460static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
461{
462 ulong j;
463 int ret = kstrtoul(val, 0, &j);
464
465 if (!ret) {
466 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
467 adjust_jiffies_till_sched_qs();
468 }
469 return ret;
470}
471
472static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
473{
474 ulong j;
475 int ret = kstrtoul(val, 0, &j);
476
477 if (!ret) {
478 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
479 adjust_jiffies_till_sched_qs();
480 }
481 return ret;
482}
483
484static struct kernel_param_ops first_fqs_jiffies_ops = {
485 .set = param_set_first_fqs_jiffies,
486 .get = param_get_ulong,
487};
488
489static struct kernel_param_ops next_fqs_jiffies_ops = {
490 .set = param_set_next_fqs_jiffies,
491 .get = param_get_ulong,
492};
493
494module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
495module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
496module_param(rcu_kick_kthreads, bool, 0644);
497
498static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
499static int rcu_pending(void);
500
501
502
503
504unsigned long rcu_get_gp_seq(void)
505{
506 return READ_ONCE(rcu_state.gp_seq);
507}
508EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
509
510
511
512
513
514
515
516unsigned long rcu_exp_batches_completed(void)
517{
518 return rcu_state.expedited_sequence;
519}
520EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
521
522
523
524
525static struct rcu_node *rcu_get_root(void)
526{
527 return &rcu_state.node[0];
528}
529
530
531
532
533static const char *gp_state_getname(short gs)
534{
535 if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names))
536 return "???";
537 return gp_state_names[gs];
538}
539
540
541
542
543void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
544 unsigned long *gp_seq)
545{
546 switch (test_type) {
547 case RCU_FLAVOR:
548 *flags = READ_ONCE(rcu_state.gp_flags);
549 *gp_seq = rcu_seq_current(&rcu_state.gp_seq);
550 break;
551 default:
552 break;
553 }
554}
555EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
556
557
558
559
560
561
562
563
564
565static void rcu_eqs_enter(bool user)
566{
567 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
568
569 WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE);
570 WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);
571 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
572 rdp->dynticks_nesting == 0);
573 if (rdp->dynticks_nesting != 1) {
574 rdp->dynticks_nesting--;
575 return;
576 }
577
578 lockdep_assert_irqs_disabled();
579 trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks);
580 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
581 rdp = this_cpu_ptr(&rcu_data);
582 do_nocb_deferred_wakeup(rdp);
583 rcu_prepare_for_idle();
584 rcu_preempt_deferred_qs(current);
585 WRITE_ONCE(rdp->dynticks_nesting, 0);
586 rcu_dynticks_eqs_enter();
587 rcu_dynticks_task_enter();
588}
589
590
591
592
593
594
595
596
597
598
599
600
601void rcu_idle_enter(void)
602{
603 lockdep_assert_irqs_disabled();
604 rcu_eqs_enter(false);
605}
606
607#ifdef CONFIG_NO_HZ_FULL
608
609
610
611
612
613
614
615
616
617
618
619void rcu_user_enter(void)
620{
621 lockdep_assert_irqs_disabled();
622 rcu_eqs_enter(true);
623}
624#endif
625
626
627
628
629
630
631
632
633
634
635static __always_inline void rcu_nmi_exit_common(bool irq)
636{
637 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
638
639
640
641
642
643
644 WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0);
645 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
646
647
648
649
650
651 if (rdp->dynticks_nmi_nesting != 1) {
652 trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
653 WRITE_ONCE(rdp->dynticks_nmi_nesting,
654 rdp->dynticks_nmi_nesting - 2);
655 return;
656 }
657
658
659 trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks);
660 WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);
661
662 if (irq)
663 rcu_prepare_for_idle();
664
665 rcu_dynticks_eqs_enter();
666
667 if (irq)
668 rcu_dynticks_task_enter();
669}
670
671
672
673
674
675
676
677void rcu_nmi_exit(void)
678{
679 rcu_nmi_exit_common(false);
680}
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701void rcu_irq_exit(void)
702{
703 lockdep_assert_irqs_disabled();
704 rcu_nmi_exit_common(true);
705}
706
707
708
709
710
711
712
713void rcu_irq_exit_irqson(void)
714{
715 unsigned long flags;
716
717 local_irq_save(flags);
718 rcu_irq_exit();
719 local_irq_restore(flags);
720}
721
722
723
724
725
726
727
728
729
730static void rcu_eqs_exit(bool user)
731{
732 struct rcu_data *rdp;
733 long oldval;
734
735 lockdep_assert_irqs_disabled();
736 rdp = this_cpu_ptr(&rcu_data);
737 oldval = rdp->dynticks_nesting;
738 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
739 if (oldval) {
740 rdp->dynticks_nesting++;
741 return;
742 }
743 rcu_dynticks_task_exit();
744 rcu_dynticks_eqs_exit();
745 rcu_cleanup_after_idle();
746 trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks);
747 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
748 WRITE_ONCE(rdp->dynticks_nesting, 1);
749 WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
750 WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
751}
752
753
754
755
756
757
758
759
760
761
762void rcu_idle_exit(void)
763{
764 unsigned long flags;
765
766 local_irq_save(flags);
767 rcu_eqs_exit(false);
768 local_irq_restore(flags);
769}
770
771#ifdef CONFIG_NO_HZ_FULL
772
773
774
775
776
777
778
779
780
781void rcu_user_exit(void)
782{
783 rcu_eqs_exit(1);
784}
785#endif
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800static __always_inline void rcu_nmi_enter_common(bool irq)
801{
802 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
803 long incby = 2;
804
805
806 WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0);
807
808
809
810
811
812
813
814
815
816 if (rcu_dynticks_curr_cpu_in_eqs()) {
817
818 if (irq)
819 rcu_dynticks_task_exit();
820
821 rcu_dynticks_eqs_exit();
822
823 if (irq)
824 rcu_cleanup_after_idle();
825
826 incby = 1;
827 }
828 trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
829 rdp->dynticks_nmi_nesting,
830 rdp->dynticks_nmi_nesting + incby, rdp->dynticks);
831 WRITE_ONCE(rdp->dynticks_nmi_nesting,
832 rdp->dynticks_nmi_nesting + incby);
833 barrier();
834}
835
836
837
838
839void rcu_nmi_enter(void)
840{
841 rcu_nmi_enter_common(false);
842}
843NOKPROBE_SYMBOL(rcu_nmi_enter);
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867void rcu_irq_enter(void)
868{
869 lockdep_assert_irqs_disabled();
870 rcu_nmi_enter_common(true);
871}
872
873
874
875
876
877
878
879void rcu_irq_enter_irqson(void)
880{
881 unsigned long flags;
882
883 local_irq_save(flags);
884 rcu_irq_enter();
885 local_irq_restore(flags);
886}
887
888
889
890
891
892
893
894
895
896bool notrace rcu_is_watching(void)
897{
898 bool ret;
899
900 preempt_disable_notrace();
901 ret = !rcu_dynticks_curr_cpu_in_eqs();
902 preempt_enable_notrace();
903 return ret;
904}
905EXPORT_SYMBOL_GPL(rcu_is_watching);
906
907
908
909
910
911
912
913
914void rcu_request_urgent_qs_task(struct task_struct *t)
915{
916 int cpu;
917
918 barrier();
919 cpu = task_cpu(t);
920 if (!task_curr(t))
921 return;
922 smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
923}
924
925#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940bool rcu_lockdep_current_cpu_online(void)
941{
942 struct rcu_data *rdp;
943 struct rcu_node *rnp;
944 bool ret = false;
945
946 if (in_nmi() || !rcu_scheduler_fully_active)
947 return true;
948 preempt_disable();
949 rdp = this_cpu_ptr(&rcu_data);
950 rnp = rdp->mynode;
951 if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
952 ret = true;
953 preempt_enable();
954 return ret;
955}
956EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
957
958#endif
959
960
961
962
963
964
965
966
967static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
968{
969 raw_lockdep_assert_held_rcu_node(rnp);
970 if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
971 rnp->gp_seq))
972 WRITE_ONCE(rdp->gpwrap, true);
973 if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
974 rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
975}
976
977
978
979
980
981
982static int dyntick_save_progress_counter(struct rcu_data *rdp)
983{
984 rdp->dynticks_snap = rcu_dynticks_snap(rdp);
985 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
986 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
987 rcu_gpnum_ovf(rdp->mynode, rdp);
988 return 1;
989 }
990 return 0;
991}
992
993
994
995
996
997
998
999static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1000{
1001 unsigned long jtsq;
1002 bool *rnhqp;
1003 bool *ruqp;
1004 struct rcu_node *rnp = rdp->mynode;
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014 if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) {
1015 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1016 rcu_gpnum_ovf(rnp, rdp);
1017 return 1;
1018 }
1019
1020
1021 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
1022 time_after(jiffies, rcu_state.gp_start + HZ)) {
1023 bool onl;
1024 struct rcu_node *rnp1;
1025
1026 WARN_ON(1);
1027 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
1028 __func__, rnp->grplo, rnp->grphi, rnp->level,
1029 (long)rnp->gp_seq, (long)rnp->completedqs);
1030 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
1031 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
1032 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
1033 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
1034 pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
1035 __func__, rdp->cpu, ".o"[onl],
1036 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
1037 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
1038 return 1;
1039 }
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 jtsq = READ_ONCE(jiffies_to_sched_qs);
1053 ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu);
1054 rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu);
1055 if (!READ_ONCE(*rnhqp) &&
1056 (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
1057 time_after(jiffies, rcu_state.jiffies_resched))) {
1058 WRITE_ONCE(*rnhqp, true);
1059
1060 smp_store_release(ruqp, true);
1061 } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
1062 WRITE_ONCE(*ruqp, true);
1063 }
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073 if (tick_nohz_full_cpu(rdp->cpu) &&
1074 time_after(jiffies,
1075 READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) {
1076 resched_cpu(rdp->cpu);
1077 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1078 }
1079
1080
1081
1082
1083
1084
1085
1086
1087 if (time_after(jiffies, rcu_state.jiffies_resched)) {
1088 if (time_after(jiffies,
1089 READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
1090 resched_cpu(rdp->cpu);
1091 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1092 }
1093 if (IS_ENABLED(CONFIG_IRQ_WORK) &&
1094 !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
1095 (rnp->ffmask & rdp->grpmask)) {
1096 init_irq_work(&rdp->rcu_iw, rcu_iw_handler);
1097 rdp->rcu_iw_pending = true;
1098 rdp->rcu_iw_gp_seq = rnp->gp_seq;
1099 irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
1100 }
1101 }
1102
1103 return 0;
1104}
1105
1106
1107static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1108 unsigned long gp_seq_req, const char *s)
1109{
1110 trace_rcu_future_grace_period(rcu_state.name, rnp->gp_seq, gp_seq_req,
1111 rnp->level, rnp->grplo, rnp->grphi, s);
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
1131 unsigned long gp_seq_req)
1132{
1133 bool ret = false;
1134 struct rcu_node *rnp;
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145 raw_lockdep_assert_held_rcu_node(rnp_start);
1146 trace_rcu_this_gp(rnp_start, rdp, gp_seq_req, TPS("Startleaf"));
1147 for (rnp = rnp_start; 1; rnp = rnp->parent) {
1148 if (rnp != rnp_start)
1149 raw_spin_lock_rcu_node(rnp);
1150 if (ULONG_CMP_GE(rnp->gp_seq_needed, gp_seq_req) ||
1151 rcu_seq_started(&rnp->gp_seq, gp_seq_req) ||
1152 (rnp != rnp_start &&
1153 rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))) {
1154 trace_rcu_this_gp(rnp, rdp, gp_seq_req,
1155 TPS("Prestarted"));
1156 goto unlock_out;
1157 }
1158 rnp->gp_seq_needed = gp_seq_req;
1159 if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {
1160
1161
1162
1163
1164
1165
1166 trace_rcu_this_gp(rnp_start, rdp, gp_seq_req,
1167 TPS("Startedleaf"));
1168 goto unlock_out;
1169 }
1170 if (rnp != rnp_start && rnp->parent != NULL)
1171 raw_spin_unlock_rcu_node(rnp);
1172 if (!rnp->parent)
1173 break;
1174 }
1175
1176
1177 if (rcu_gp_in_progress()) {
1178 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot"));
1179 goto unlock_out;
1180 }
1181 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot"));
1182 WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT);
1183 rcu_state.gp_req_activity = jiffies;
1184 if (!rcu_state.gp_kthread) {
1185 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
1186 goto unlock_out;
1187 }
1188 trace_rcu_grace_period(rcu_state.name, READ_ONCE(rcu_state.gp_seq), TPS("newreq"));
1189 ret = true;
1190unlock_out:
1191
1192 if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) {
1193 rnp_start->gp_seq_needed = rnp->gp_seq_needed;
1194 rdp->gp_seq_needed = rnp->gp_seq_needed;
1195 }
1196 if (rnp != rnp_start)
1197 raw_spin_unlock_rcu_node(rnp);
1198 return ret;
1199}
1200
1201
1202
1203
1204
1205static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
1206{
1207 bool needmore;
1208 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1209
1210 needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
1211 if (!needmore)
1212 rnp->gp_seq_needed = rnp->gp_seq;
1213 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq,
1214 needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1215 return needmore;
1216}
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232static void rcu_gp_kthread_wake(void)
1233{
1234 if ((current == rcu_state.gp_kthread &&
1235 !in_irq() && !in_serving_softirq()) ||
1236 !READ_ONCE(rcu_state.gp_flags) ||
1237 !rcu_state.gp_kthread)
1238 return;
1239 WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
1240 WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
1241 swake_up_one(&rcu_state.gp_wq);
1242}
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
1257{
1258 unsigned long gp_seq_req;
1259 bool ret = false;
1260
1261 rcu_lockdep_assert_cblist_protected(rdp);
1262 raw_lockdep_assert_held_rcu_node(rnp);
1263
1264
1265 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1266 return false;
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq);
1279 if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))
1280 ret = rcu_start_this_gp(rnp, rdp, gp_seq_req);
1281
1282
1283 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1284 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccWaitCB"));
1285 else
1286 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccReadyCB"));
1287 return ret;
1288}
1289
1290
1291
1292
1293
1294
1295
1296
1297static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
1298 struct rcu_data *rdp)
1299{
1300 unsigned long c;
1301 bool needwake;
1302
1303 rcu_lockdep_assert_cblist_protected(rdp);
1304 c = rcu_seq_snap(&rcu_state.gp_seq);
1305 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
1306
1307 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1308 return;
1309 }
1310 raw_spin_lock_rcu_node(rnp);
1311 needwake = rcu_accelerate_cbs(rnp, rdp);
1312 raw_spin_unlock_rcu_node(rnp);
1313 if (needwake)
1314 rcu_gp_kthread_wake();
1315}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
1328{
1329 rcu_lockdep_assert_cblist_protected(rdp);
1330 raw_lockdep_assert_held_rcu_node(rnp);
1331
1332
1333 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1334 return false;
1335
1336
1337
1338
1339
1340 rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq);
1341
1342
1343 return rcu_accelerate_cbs(rnp, rdp);
1344}
1345
1346
1347
1348
1349
1350static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
1351 struct rcu_data *rdp)
1352{
1353 rcu_lockdep_assert_cblist_protected(rdp);
1354 if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
1355 !raw_spin_trylock_rcu_node(rnp))
1356 return;
1357 WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
1358 raw_spin_unlock_rcu_node(rnp);
1359}
1360
1361
1362
1363
1364
1365
1366
1367static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
1368{
1369 bool ret = false;
1370 bool need_gp;
1371 const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
1372 rcu_segcblist_is_offloaded(&rdp->cblist);
1373
1374 raw_lockdep_assert_held_rcu_node(rnp);
1375
1376 if (rdp->gp_seq == rnp->gp_seq)
1377 return false;
1378
1379
1380 if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
1381 unlikely(READ_ONCE(rdp->gpwrap))) {
1382 if (!offloaded)
1383 ret = rcu_advance_cbs(rnp, rdp);
1384 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));
1385 } else {
1386 if (!offloaded)
1387 ret = rcu_accelerate_cbs(rnp, rdp);
1388 }
1389
1390
1391 if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||
1392 unlikely(READ_ONCE(rdp->gpwrap))) {
1393
1394
1395
1396
1397
1398 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
1399 need_gp = !!(rnp->qsmask & rdp->grpmask);
1400 rdp->cpu_no_qs.b.norm = need_gp;
1401 rdp->core_needs_qs = need_gp;
1402 zero_cpu_stall_ticks(rdp);
1403 }
1404 rdp->gp_seq = rnp->gp_seq;
1405 if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
1406 rdp->gp_seq_needed = rnp->gp_seq_needed;
1407 WRITE_ONCE(rdp->gpwrap, false);
1408 rcu_gpnum_ovf(rnp, rdp);
1409 return ret;
1410}
1411
1412static void note_gp_changes(struct rcu_data *rdp)
1413{
1414 unsigned long flags;
1415 bool needwake;
1416 struct rcu_node *rnp;
1417
1418 local_irq_save(flags);
1419 rnp = rdp->mynode;
1420 if ((rdp->gp_seq == rcu_seq_current(&rnp->gp_seq) &&
1421 !unlikely(READ_ONCE(rdp->gpwrap))) ||
1422 !raw_spin_trylock_rcu_node(rnp)) {
1423 local_irq_restore(flags);
1424 return;
1425 }
1426 needwake = __note_gp_changes(rnp, rdp);
1427 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1428 if (needwake)
1429 rcu_gp_kthread_wake();
1430}
1431
1432static void rcu_gp_slow(int delay)
1433{
1434 if (delay > 0 &&
1435 !(rcu_seq_ctr(rcu_state.gp_seq) %
1436 (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
1437 schedule_timeout_uninterruptible(delay);
1438}
1439
1440
1441
1442
1443static bool rcu_gp_init(void)
1444{
1445 unsigned long flags;
1446 unsigned long oldmask;
1447 unsigned long mask;
1448 struct rcu_data *rdp;
1449 struct rcu_node *rnp = rcu_get_root();
1450
1451 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1452 raw_spin_lock_irq_rcu_node(rnp);
1453 if (!READ_ONCE(rcu_state.gp_flags)) {
1454
1455 raw_spin_unlock_irq_rcu_node(rnp);
1456 return false;
1457 }
1458 WRITE_ONCE(rcu_state.gp_flags, 0);
1459
1460 if (WARN_ON_ONCE(rcu_gp_in_progress())) {
1461
1462
1463
1464
1465 raw_spin_unlock_irq_rcu_node(rnp);
1466 return false;
1467 }
1468
1469
1470 record_gp_stall_check_time();
1471
1472 rcu_seq_start(&rcu_state.gp_seq);
1473 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
1474 raw_spin_unlock_irq_rcu_node(rnp);
1475
1476
1477
1478
1479
1480
1481
1482 rcu_state.gp_state = RCU_GP_ONOFF;
1483 rcu_for_each_leaf_node(rnp) {
1484 raw_spin_lock(&rcu_state.ofl_lock);
1485 raw_spin_lock_irq_rcu_node(rnp);
1486 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
1487 !rnp->wait_blkd_tasks) {
1488
1489 raw_spin_unlock_irq_rcu_node(rnp);
1490 raw_spin_unlock(&rcu_state.ofl_lock);
1491 continue;
1492 }
1493
1494
1495 oldmask = rnp->qsmaskinit;
1496 rnp->qsmaskinit = rnp->qsmaskinitnext;
1497
1498
1499 if (!oldmask != !rnp->qsmaskinit) {
1500 if (!oldmask) {
1501 if (!rnp->wait_blkd_tasks)
1502 rcu_init_new_rnp(rnp);
1503 } else if (rcu_preempt_has_tasks(rnp)) {
1504 rnp->wait_blkd_tasks = true;
1505 } else {
1506 rcu_cleanup_dead_rnp(rnp);
1507 }
1508 }
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518 if (rnp->wait_blkd_tasks &&
1519 (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) {
1520 rnp->wait_blkd_tasks = false;
1521 if (!rnp->qsmaskinit)
1522 rcu_cleanup_dead_rnp(rnp);
1523 }
1524
1525 raw_spin_unlock_irq_rcu_node(rnp);
1526 raw_spin_unlock(&rcu_state.ofl_lock);
1527 }
1528 rcu_gp_slow(gp_preinit_delay);
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542 rcu_state.gp_state = RCU_GP_INIT;
1543 rcu_for_each_node_breadth_first(rnp) {
1544 rcu_gp_slow(gp_init_delay);
1545 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1546 rdp = this_cpu_ptr(&rcu_data);
1547 rcu_preempt_check_blocked_tasks(rnp);
1548 rnp->qsmask = rnp->qsmaskinit;
1549 WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq);
1550 if (rnp == rdp->mynode)
1551 (void)__note_gp_changes(rnp, rdp);
1552 rcu_preempt_boost_start_gp(rnp);
1553 trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq,
1554 rnp->level, rnp->grplo,
1555 rnp->grphi, rnp->qsmask);
1556
1557 mask = rnp->qsmask & ~rnp->qsmaskinitnext;
1558 rnp->rcu_gp_init_mask = mask;
1559 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
1560 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
1561 else
1562 raw_spin_unlock_irq_rcu_node(rnp);
1563 cond_resched_tasks_rcu_qs();
1564 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1565 }
1566
1567 return true;
1568}
1569
1570
1571
1572
1573
1574static bool rcu_gp_fqs_check_wake(int *gfp)
1575{
1576 struct rcu_node *rnp = rcu_get_root();
1577
1578
1579 *gfp = READ_ONCE(rcu_state.gp_flags);
1580 if (*gfp & RCU_GP_FLAG_FQS)
1581 return true;
1582
1583
1584 if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
1585 return true;
1586
1587 return false;
1588}
1589
1590
1591
1592
1593static void rcu_gp_fqs(bool first_time)
1594{
1595 struct rcu_node *rnp = rcu_get_root();
1596
1597 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1598 rcu_state.n_force_qs++;
1599 if (first_time) {
1600
1601 force_qs_rnp(dyntick_save_progress_counter);
1602 } else {
1603
1604 force_qs_rnp(rcu_implicit_dynticks_qs);
1605 }
1606
1607 if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
1608 raw_spin_lock_irq_rcu_node(rnp);
1609 WRITE_ONCE(rcu_state.gp_flags,
1610 READ_ONCE(rcu_state.gp_flags) & ~RCU_GP_FLAG_FQS);
1611 raw_spin_unlock_irq_rcu_node(rnp);
1612 }
1613}
1614
1615
1616
1617
1618static void rcu_gp_fqs_loop(void)
1619{
1620 bool first_gp_fqs;
1621 int gf;
1622 unsigned long j;
1623 int ret;
1624 struct rcu_node *rnp = rcu_get_root();
1625
1626 first_gp_fqs = true;
1627 j = READ_ONCE(jiffies_till_first_fqs);
1628 ret = 0;
1629 for (;;) {
1630 if (!ret) {
1631 rcu_state.jiffies_force_qs = jiffies + j;
1632 WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
1633 jiffies + (j ? 3 * j : 2));
1634 }
1635 trace_rcu_grace_period(rcu_state.name,
1636 READ_ONCE(rcu_state.gp_seq),
1637 TPS("fqswait"));
1638 rcu_state.gp_state = RCU_GP_WAIT_FQS;
1639 ret = swait_event_idle_timeout_exclusive(
1640 rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
1641 rcu_state.gp_state = RCU_GP_DOING_FQS;
1642
1643
1644 if (!READ_ONCE(rnp->qsmask) &&
1645 !rcu_preempt_blocked_readers_cgp(rnp))
1646 break;
1647
1648 if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) ||
1649 (gf & RCU_GP_FLAG_FQS)) {
1650 trace_rcu_grace_period(rcu_state.name,
1651 READ_ONCE(rcu_state.gp_seq),
1652 TPS("fqsstart"));
1653 rcu_gp_fqs(first_gp_fqs);
1654 first_gp_fqs = false;
1655 trace_rcu_grace_period(rcu_state.name,
1656 READ_ONCE(rcu_state.gp_seq),
1657 TPS("fqsend"));
1658 cond_resched_tasks_rcu_qs();
1659 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1660 ret = 0;
1661 j = READ_ONCE(jiffies_till_next_fqs);
1662 } else {
1663
1664 cond_resched_tasks_rcu_qs();
1665 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1666 WARN_ON(signal_pending(current));
1667 trace_rcu_grace_period(rcu_state.name,
1668 READ_ONCE(rcu_state.gp_seq),
1669 TPS("fqswaitsig"));
1670 ret = 1;
1671 j = jiffies;
1672 if (time_after(jiffies, rcu_state.jiffies_force_qs))
1673 j = 1;
1674 else
1675 j = rcu_state.jiffies_force_qs - j;
1676 }
1677 }
1678}
1679
1680
1681
1682
1683static void rcu_gp_cleanup(void)
1684{
1685 unsigned long gp_duration;
1686 bool needgp = false;
1687 unsigned long new_gp_seq;
1688 bool offloaded;
1689 struct rcu_data *rdp;
1690 struct rcu_node *rnp = rcu_get_root();
1691 struct swait_queue_head *sq;
1692
1693 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1694 raw_spin_lock_irq_rcu_node(rnp);
1695 rcu_state.gp_end = jiffies;
1696 gp_duration = rcu_state.gp_end - rcu_state.gp_start;
1697 if (gp_duration > rcu_state.gp_max)
1698 rcu_state.gp_max = gp_duration;
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708 raw_spin_unlock_irq_rcu_node(rnp);
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719 new_gp_seq = rcu_state.gp_seq;
1720 rcu_seq_end(&new_gp_seq);
1721 rcu_for_each_node_breadth_first(rnp) {
1722 raw_spin_lock_irq_rcu_node(rnp);
1723 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
1724 dump_blkd_tasks(rnp, 10);
1725 WARN_ON_ONCE(rnp->qsmask);
1726 WRITE_ONCE(rnp->gp_seq, new_gp_seq);
1727 rdp = this_cpu_ptr(&rcu_data);
1728 if (rnp == rdp->mynode)
1729 needgp = __note_gp_changes(rnp, rdp) || needgp;
1730
1731 needgp = rcu_future_gp_cleanup(rnp) || needgp;
1732 sq = rcu_nocb_gp_get(rnp);
1733 raw_spin_unlock_irq_rcu_node(rnp);
1734 rcu_nocb_gp_cleanup(sq);
1735 cond_resched_tasks_rcu_qs();
1736 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1737 rcu_gp_slow(gp_cleanup_delay);
1738 }
1739 rnp = rcu_get_root();
1740 raw_spin_lock_irq_rcu_node(rnp);
1741
1742
1743 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
1744 rcu_seq_end(&rcu_state.gp_seq);
1745 rcu_state.gp_state = RCU_GP_IDLE;
1746
1747 rdp = this_cpu_ptr(&rcu_data);
1748 if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
1749 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,
1750 TPS("CleanupMore"));
1751 needgp = true;
1752 }
1753
1754 offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
1755 rcu_segcblist_is_offloaded(&rdp->cblist);
1756 if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
1757 WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
1758 rcu_state.gp_req_activity = jiffies;
1759 trace_rcu_grace_period(rcu_state.name,
1760 READ_ONCE(rcu_state.gp_seq),
1761 TPS("newreq"));
1762 } else {
1763 WRITE_ONCE(rcu_state.gp_flags,
1764 rcu_state.gp_flags & RCU_GP_FLAG_INIT);
1765 }
1766 raw_spin_unlock_irq_rcu_node(rnp);
1767}
1768
1769
1770
1771
1772static int __noreturn rcu_gp_kthread(void *unused)
1773{
1774 rcu_bind_gp_kthread();
1775 for (;;) {
1776
1777
1778 for (;;) {
1779 trace_rcu_grace_period(rcu_state.name,
1780 READ_ONCE(rcu_state.gp_seq),
1781 TPS("reqwait"));
1782 rcu_state.gp_state = RCU_GP_WAIT_GPS;
1783 swait_event_idle_exclusive(rcu_state.gp_wq,
1784 READ_ONCE(rcu_state.gp_flags) &
1785 RCU_GP_FLAG_INIT);
1786 rcu_state.gp_state = RCU_GP_DONE_GPS;
1787
1788 if (rcu_gp_init())
1789 break;
1790 cond_resched_tasks_rcu_qs();
1791 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1792 WARN_ON(signal_pending(current));
1793 trace_rcu_grace_period(rcu_state.name,
1794 READ_ONCE(rcu_state.gp_seq),
1795 TPS("reqwaitsig"));
1796 }
1797
1798
1799 rcu_gp_fqs_loop();
1800
1801
1802 rcu_state.gp_state = RCU_GP_CLEANUP;
1803 rcu_gp_cleanup();
1804 rcu_state.gp_state = RCU_GP_CLEANED;
1805 }
1806}
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817static void rcu_report_qs_rsp(unsigned long flags)
1818 __releases(rcu_get_root()->lock)
1819{
1820 raw_lockdep_assert_held_rcu_node(rcu_get_root());
1821 WARN_ON_ONCE(!rcu_gp_in_progress());
1822 WRITE_ONCE(rcu_state.gp_flags,
1823 READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
1824 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(), flags);
1825 rcu_gp_kthread_wake();
1826}
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
1843 unsigned long gps, unsigned long flags)
1844 __releases(rnp->lock)
1845{
1846 unsigned long oldmask = 0;
1847 struct rcu_node *rnp_c;
1848
1849 raw_lockdep_assert_held_rcu_node(rnp);
1850
1851
1852 for (;;) {
1853 if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) {
1854
1855
1856
1857
1858
1859 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1860 return;
1861 }
1862 WARN_ON_ONCE(oldmask);
1863 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
1864 rcu_preempt_blocked_readers_cgp(rnp));
1865 rnp->qsmask &= ~mask;
1866 trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq,
1867 mask, rnp->qsmask, rnp->level,
1868 rnp->grplo, rnp->grphi,
1869 !!rnp->gp_tasks);
1870 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
1871
1872
1873 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1874 return;
1875 }
1876 rnp->completedqs = rnp->gp_seq;
1877 mask = rnp->grpmask;
1878 if (rnp->parent == NULL) {
1879
1880
1881
1882 break;
1883 }
1884 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1885 rnp_c = rnp;
1886 rnp = rnp->parent;
1887 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1888 oldmask = rnp_c->qsmask;
1889 }
1890
1891
1892
1893
1894
1895
1896 rcu_report_qs_rsp(flags);
1897}
1898
1899
1900
1901
1902
1903
1904
1905
1906static void __maybe_unused
1907rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
1908 __releases(rnp->lock)
1909{
1910 unsigned long gps;
1911 unsigned long mask;
1912 struct rcu_node *rnp_p;
1913
1914 raw_lockdep_assert_held_rcu_node(rnp);
1915 if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPTION)) ||
1916 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
1917 rnp->qsmask != 0) {
1918 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1919 return;
1920 }
1921
1922 rnp->completedqs = rnp->gp_seq;
1923 rnp_p = rnp->parent;
1924 if (rnp_p == NULL) {
1925
1926
1927
1928
1929 rcu_report_qs_rsp(flags);
1930 return;
1931 }
1932
1933
1934 gps = rnp->gp_seq;
1935 mask = rnp->grpmask;
1936 raw_spin_unlock_rcu_node(rnp);
1937 raw_spin_lock_rcu_node(rnp_p);
1938 rcu_report_qs_rnp(mask, rnp_p, gps, flags);
1939}
1940
1941
1942
1943
1944
1945static void
1946rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
1947{
1948 unsigned long flags;
1949 unsigned long mask;
1950 bool needwake = false;
1951 const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
1952 rcu_segcblist_is_offloaded(&rdp->cblist);
1953 struct rcu_node *rnp;
1954
1955 rnp = rdp->mynode;
1956 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1957 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
1958 rdp->gpwrap) {
1959
1960
1961
1962
1963
1964
1965
1966 rdp->cpu_no_qs.b.norm = true;
1967 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1968 return;
1969 }
1970 mask = rdp->grpmask;
1971 rdp->core_needs_qs = false;
1972 if ((rnp->qsmask & mask) == 0) {
1973 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1974 } else {
1975
1976
1977
1978
1979 if (!offloaded)
1980 needwake = rcu_accelerate_cbs(rnp, rdp);
1981
1982 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
1983
1984 if (needwake)
1985 rcu_gp_kthread_wake();
1986 }
1987}
1988
1989
1990
1991
1992
1993
1994
1995static void
1996rcu_check_quiescent_state(struct rcu_data *rdp)
1997{
1998
1999 note_gp_changes(rdp);
2000
2001
2002
2003
2004
2005 if (!rdp->core_needs_qs)
2006 return;
2007
2008
2009
2010
2011
2012 if (rdp->cpu_no_qs.b.norm)
2013 return;
2014
2015
2016
2017
2018
2019 rcu_report_qs_rdp(rdp->cpu, rdp);
2020}
2021
2022
2023
2024
2025
2026int rcutree_dying_cpu(unsigned int cpu)
2027{
2028 bool blkd;
2029 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
2030 struct rcu_node *rnp = rdp->mynode;
2031
2032 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2033 return 0;
2034
2035 blkd = !!(rnp->qsmask & rdp->grpmask);
2036 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq,
2037 blkd ? TPS("cpuofl") : TPS("cpuofl-bgp"));
2038 return 0;
2039}
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2059{
2060 long mask;
2061 struct rcu_node *rnp = rnp_leaf;
2062
2063 raw_lockdep_assert_held_rcu_node(rnp_leaf);
2064 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2065 WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
2066 WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
2067 return;
2068 for (;;) {
2069 mask = rnp->grpmask;
2070 rnp = rnp->parent;
2071 if (!rnp)
2072 break;
2073 raw_spin_lock_rcu_node(rnp);
2074 rnp->qsmaskinit &= ~mask;
2075
2076 WARN_ON_ONCE(rnp->qsmask);
2077 if (rnp->qsmaskinit) {
2078 raw_spin_unlock_rcu_node(rnp);
2079
2080 return;
2081 }
2082 raw_spin_unlock_rcu_node(rnp);
2083 }
2084}
2085
2086
2087
2088
2089
2090
2091
2092int rcutree_dead_cpu(unsigned int cpu)
2093{
2094 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2095 struct rcu_node *rnp = rdp->mynode;
2096
2097 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2098 return 0;
2099
2100
2101 rcu_boost_kthread_setaffinity(rnp, -1);
2102
2103 do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu));
2104 return 0;
2105}
2106
2107
2108
2109
2110
2111static void rcu_do_batch(struct rcu_data *rdp)
2112{
2113 unsigned long flags;
2114 const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
2115 rcu_segcblist_is_offloaded(&rdp->cblist);
2116 struct rcu_head *rhp;
2117 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
2118 long bl, count;
2119 long pending, tlimit = 0;
2120
2121
2122 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
2123 trace_rcu_batch_start(rcu_state.name,
2124 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2125 rcu_segcblist_n_cbs(&rdp->cblist), 0);
2126 trace_rcu_batch_end(rcu_state.name, 0,
2127 !rcu_segcblist_empty(&rdp->cblist),
2128 need_resched(), is_idle_task(current),
2129 rcu_is_callbacks_kthread());
2130 return;
2131 }
2132
2133
2134
2135
2136
2137
2138 local_irq_save(flags);
2139 rcu_nocb_lock(rdp);
2140 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2141 pending = rcu_segcblist_n_cbs(&rdp->cblist);
2142 bl = max(rdp->blimit, pending >> rcu_divisor);
2143 if (unlikely(bl > 100))
2144 tlimit = local_clock() + rcu_resched_ns;
2145 trace_rcu_batch_start(rcu_state.name,
2146 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2147 rcu_segcblist_n_cbs(&rdp->cblist), bl);
2148 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
2149 if (offloaded)
2150 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2151 rcu_nocb_unlock_irqrestore(rdp, flags);
2152
2153
2154 rhp = rcu_cblist_dequeue(&rcl);
2155 for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
2156 debug_rcu_head_unqueue(rhp);
2157 if (__rcu_reclaim(rcu_state.name, rhp))
2158 rcu_cblist_dequeued_lazy(&rcl);
2159
2160
2161
2162
2163 if (-rcl.len >= bl && !offloaded &&
2164 (need_resched() ||
2165 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2166 break;
2167 if (unlikely(tlimit)) {
2168
2169 if (likely((-rcl.len & 31) || local_clock() < tlimit))
2170 continue;
2171
2172 break;
2173 }
2174 if (offloaded) {
2175 WARN_ON_ONCE(in_serving_softirq());
2176 local_bh_enable();
2177 lockdep_assert_irqs_enabled();
2178 cond_resched_tasks_rcu_qs();
2179 lockdep_assert_irqs_enabled();
2180 local_bh_disable();
2181 }
2182 }
2183
2184 local_irq_save(flags);
2185 rcu_nocb_lock(rdp);
2186 count = -rcl.len;
2187 trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
2188 is_idle_task(current), rcu_is_callbacks_kthread());
2189
2190
2191 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
2192 smp_mb();
2193 rcu_segcblist_insert_count(&rdp->cblist, &rcl);
2194
2195
2196 count = rcu_segcblist_n_cbs(&rdp->cblist);
2197 if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
2198 rdp->blimit = blimit;
2199
2200
2201 if (count == 0 && rdp->qlen_last_fqs_check != 0) {
2202 rdp->qlen_last_fqs_check = 0;
2203 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2204 } else if (count < rdp->qlen_last_fqs_check - qhimark)
2205 rdp->qlen_last_fqs_check = count;
2206
2207
2208
2209
2210
2211 WARN_ON_ONCE(count == 0 && !rcu_segcblist_empty(&rdp->cblist));
2212 WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
2213 count != 0 && rcu_segcblist_empty(&rdp->cblist));
2214
2215 rcu_nocb_unlock_irqrestore(rdp, flags);
2216
2217
2218 if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))
2219 invoke_rcu_core();
2220}
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230void rcu_sched_clock_irq(int user)
2231{
2232 trace_rcu_utilization(TPS("Start scheduler-tick"));
2233 raw_cpu_inc(rcu_data.ticks_this_gp);
2234
2235 if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
2236
2237 if (!rcu_is_cpu_rrupt_from_idle() && !user) {
2238 set_tsk_need_resched(current);
2239 set_preempt_need_resched();
2240 }
2241 __this_cpu_write(rcu_data.rcu_urgent_qs, false);
2242 }
2243 rcu_flavor_sched_clock_irq(user);
2244 if (rcu_pending())
2245 invoke_rcu_core();
2246
2247 trace_rcu_utilization(TPS("End scheduler-tick"));
2248}
2249
2250
2251
2252
2253
2254
2255
2256
2257static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
2258{
2259 int cpu;
2260 unsigned long flags;
2261 unsigned long mask;
2262 struct rcu_node *rnp;
2263
2264 rcu_for_each_leaf_node(rnp) {
2265 cond_resched_tasks_rcu_qs();
2266 mask = 0;
2267 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2268 if (rnp->qsmask == 0) {
2269 if (!IS_ENABLED(CONFIG_PREEMPTION) ||
2270 rcu_preempt_blocked_readers_cgp(rnp)) {
2271
2272
2273
2274
2275
2276 rcu_initiate_boost(rnp, flags);
2277
2278 continue;
2279 }
2280 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2281 continue;
2282 }
2283 for_each_leaf_node_possible_cpu(rnp, cpu) {
2284 unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
2285 if ((rnp->qsmask & bit) != 0) {
2286 if (f(per_cpu_ptr(&rcu_data, cpu)))
2287 mask |= bit;
2288 }
2289 }
2290 if (mask != 0) {
2291
2292 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2293 } else {
2294
2295 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2296 }
2297 }
2298}
2299
2300
2301
2302
2303
2304void rcu_force_quiescent_state(void)
2305{
2306 unsigned long flags;
2307 bool ret;
2308 struct rcu_node *rnp;
2309 struct rcu_node *rnp_old = NULL;
2310
2311
2312 rnp = __this_cpu_read(rcu_data.mynode);
2313 for (; rnp != NULL; rnp = rnp->parent) {
2314 ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
2315 !raw_spin_trylock(&rnp->fqslock);
2316 if (rnp_old != NULL)
2317 raw_spin_unlock(&rnp_old->fqslock);
2318 if (ret)
2319 return;
2320 rnp_old = rnp;
2321 }
2322
2323
2324
2325 raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
2326 raw_spin_unlock(&rnp_old->fqslock);
2327 if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
2328 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2329 return;
2330 }
2331 WRITE_ONCE(rcu_state.gp_flags,
2332 READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
2333 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2334 rcu_gp_kthread_wake();
2335}
2336EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
2337
2338
2339static __latent_entropy void rcu_core(void)
2340{
2341 unsigned long flags;
2342 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
2343 struct rcu_node *rnp = rdp->mynode;
2344 const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
2345 rcu_segcblist_is_offloaded(&rdp->cblist);
2346
2347 if (cpu_is_offline(smp_processor_id()))
2348 return;
2349 trace_rcu_utilization(TPS("Start RCU core"));
2350 WARN_ON_ONCE(!rdp->beenonline);
2351
2352
2353 if (!(preempt_count() & PREEMPT_MASK)) {
2354 rcu_preempt_deferred_qs(current);
2355 } else if (rcu_preempt_need_deferred_qs(current)) {
2356 set_tsk_need_resched(current);
2357 set_preempt_need_resched();
2358 }
2359
2360
2361 rcu_check_quiescent_state(rdp);
2362
2363
2364 if (!rcu_gp_in_progress() &&
2365 rcu_segcblist_is_enabled(&rdp->cblist) && !offloaded) {
2366 local_irq_save(flags);
2367 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
2368 rcu_accelerate_cbs_unlocked(rnp, rdp);
2369 local_irq_restore(flags);
2370 }
2371
2372 rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
2373
2374
2375 if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist) &&
2376 likely(READ_ONCE(rcu_scheduler_fully_active)))
2377 rcu_do_batch(rdp);
2378
2379
2380 do_nocb_deferred_wakeup(rdp);
2381 trace_rcu_utilization(TPS("End RCU core"));
2382}
2383
2384static void rcu_core_si(struct softirq_action *h)
2385{
2386 rcu_core();
2387}
2388
2389static void rcu_wake_cond(struct task_struct *t, int status)
2390{
2391
2392
2393
2394
2395 if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
2396 wake_up_process(t);
2397}
2398
2399static void invoke_rcu_core_kthread(void)
2400{
2401 struct task_struct *t;
2402 unsigned long flags;
2403
2404 local_irq_save(flags);
2405 __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
2406 t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
2407 if (t != NULL && t != current)
2408 rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
2409 local_irq_restore(flags);
2410}
2411
2412
2413
2414
2415static void invoke_rcu_core(void)
2416{
2417 if (!cpu_online(smp_processor_id()))
2418 return;
2419 if (use_softirq)
2420 raise_softirq(RCU_SOFTIRQ);
2421 else
2422 invoke_rcu_core_kthread();
2423}
2424
2425static void rcu_cpu_kthread_park(unsigned int cpu)
2426{
2427 per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
2428}
2429
2430static int rcu_cpu_kthread_should_run(unsigned int cpu)
2431{
2432 return __this_cpu_read(rcu_data.rcu_cpu_has_work);
2433}
2434
2435
2436
2437
2438
2439
2440static void rcu_cpu_kthread(unsigned int cpu)
2441{
2442 unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
2443 char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
2444 int spincnt;
2445
2446 for (spincnt = 0; spincnt < 10; spincnt++) {
2447 trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
2448 local_bh_disable();
2449 *statusp = RCU_KTHREAD_RUNNING;
2450 local_irq_disable();
2451 work = *workp;
2452 *workp = 0;
2453 local_irq_enable();
2454 if (work)
2455 rcu_core();
2456 local_bh_enable();
2457 if (*workp == 0) {
2458 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
2459 *statusp = RCU_KTHREAD_WAITING;
2460 return;
2461 }
2462 }
2463 *statusp = RCU_KTHREAD_YIELDING;
2464 trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
2465 schedule_timeout_interruptible(2);
2466 trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
2467 *statusp = RCU_KTHREAD_WAITING;
2468}
2469
2470static struct smp_hotplug_thread rcu_cpu_thread_spec = {
2471 .store = &rcu_data.rcu_cpu_kthread_task,
2472 .thread_should_run = rcu_cpu_kthread_should_run,
2473 .thread_fn = rcu_cpu_kthread,
2474 .thread_comm = "rcuc/%u",
2475 .setup = rcu_cpu_kthread_setup,
2476 .park = rcu_cpu_kthread_park,
2477};
2478
2479
2480
2481
2482static int __init rcu_spawn_core_kthreads(void)
2483{
2484 int cpu;
2485
2486 for_each_possible_cpu(cpu)
2487 per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
2488 if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
2489 return 0;
2490 WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
2491 "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
2492 return 0;
2493}
2494early_initcall(rcu_spawn_core_kthreads);
2495
2496
2497
2498
2499static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
2500 unsigned long flags)
2501{
2502
2503
2504
2505
2506 if (!rcu_is_watching())
2507 invoke_rcu_core();
2508
2509
2510 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
2511 return;
2512
2513
2514
2515
2516
2517
2518
2519
2520 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
2521 rdp->qlen_last_fqs_check + qhimark)) {
2522
2523
2524 note_gp_changes(rdp);
2525
2526
2527 if (!rcu_gp_in_progress()) {
2528 rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
2529 } else {
2530
2531 rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
2532 if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
2533 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
2534 rcu_force_quiescent_state();
2535 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2536 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2537 }
2538 }
2539}
2540
2541
2542
2543
2544static void rcu_leak_callback(struct rcu_head *rhp)
2545{
2546}
2547
2548
2549
2550
2551
2552
2553
2554static void
2555__call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy)
2556{
2557 unsigned long flags;
2558 struct rcu_data *rdp;
2559 bool was_alldone;
2560
2561
2562 WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
2563
2564 if (debug_rcu_head_queue(head)) {
2565
2566
2567
2568
2569
2570 WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pS()!!!\n",
2571 head, head->func);
2572 WRITE_ONCE(head->func, rcu_leak_callback);
2573 return;
2574 }
2575 head->func = func;
2576 head->next = NULL;
2577 local_irq_save(flags);
2578 rdp = this_cpu_ptr(&rcu_data);
2579
2580
2581 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
2582
2583 WARN_ON_ONCE(rcu_scheduler_active != RCU_SCHEDULER_INACTIVE);
2584 WARN_ON_ONCE(!rcu_is_watching());
2585
2586
2587 if (rcu_segcblist_empty(&rdp->cblist))
2588 rcu_segcblist_init(&rdp->cblist);
2589 }
2590 if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
2591 return;
2592
2593 rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
2594 if (__is_kfree_rcu_offset((unsigned long)func))
2595 trace_rcu_kfree_callback(rcu_state.name, head,
2596 (unsigned long)func,
2597 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2598 rcu_segcblist_n_cbs(&rdp->cblist));
2599 else
2600 trace_rcu_callback(rcu_state.name, head,
2601 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2602 rcu_segcblist_n_cbs(&rdp->cblist));
2603
2604
2605 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
2606 unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
2607 __call_rcu_nocb_wake(rdp, was_alldone, flags);
2608 } else {
2609 __call_rcu_core(rdp, head, flags);
2610 local_irq_restore(flags);
2611 }
2612}
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649void call_rcu(struct rcu_head *head, rcu_callback_t func)
2650{
2651 __call_rcu(head, func, 0);
2652}
2653EXPORT_SYMBOL_GPL(call_rcu);
2654
2655
2656
2657
2658
2659
2660
2661
2662void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
2663{
2664 __call_rcu(head, func, 1);
2665}
2666EXPORT_SYMBOL_GPL(kfree_call_rcu);
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680static int rcu_blocking_is_gp(void)
2681{
2682 int ret;
2683
2684 if (IS_ENABLED(CONFIG_PREEMPTION))
2685 return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
2686 might_sleep();
2687 preempt_disable();
2688 ret = num_online_cpus() <= 1;
2689 preempt_enable();
2690 return ret;
2691}
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726void synchronize_rcu(void)
2727{
2728 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
2729 lock_is_held(&rcu_lock_map) ||
2730 lock_is_held(&rcu_sched_lock_map),
2731 "Illegal synchronize_rcu() in RCU read-side critical section");
2732 if (rcu_blocking_is_gp())
2733 return;
2734 if (rcu_gp_is_expedited())
2735 synchronize_rcu_expedited();
2736 else
2737 wait_rcu_gp(call_rcu);
2738}
2739EXPORT_SYMBOL_GPL(synchronize_rcu);
2740
2741
2742
2743
2744
2745
2746
2747
2748unsigned long get_state_synchronize_rcu(void)
2749{
2750
2751
2752
2753
2754 smp_mb();
2755 return rcu_seq_snap(&rcu_state.gp_seq);
2756}
2757EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773void cond_synchronize_rcu(unsigned long oldstate)
2774{
2775 if (!rcu_seq_done(&rcu_state.gp_seq, oldstate))
2776 synchronize_rcu();
2777 else
2778 smp_mb();
2779}
2780EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
2781
2782
2783
2784
2785
2786
2787
2788
2789static int rcu_pending(void)
2790{
2791 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
2792 struct rcu_node *rnp = rdp->mynode;
2793
2794
2795 check_cpu_stall(rdp);
2796
2797
2798 if (rcu_nocb_need_deferred_wakeup(rdp))
2799 return 1;
2800
2801
2802 if (rcu_nohz_full_cpu())
2803 return 0;
2804
2805
2806 if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm)
2807 return 1;
2808
2809
2810 if (rcu_segcblist_ready_cbs(&rdp->cblist))
2811 return 1;
2812
2813
2814 if (!rcu_gp_in_progress() &&
2815 rcu_segcblist_is_enabled(&rdp->cblist) &&
2816 (!IS_ENABLED(CONFIG_RCU_NOCB_CPU) ||
2817 !rcu_segcblist_is_offloaded(&rdp->cblist)) &&
2818 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
2819 return 1;
2820
2821
2822 if (rcu_seq_current(&rnp->gp_seq) != rdp->gp_seq ||
2823 unlikely(READ_ONCE(rdp->gpwrap)))
2824 return 1;
2825
2826
2827 return 0;
2828}
2829
2830
2831
2832
2833
2834static void rcu_barrier_trace(const char *s, int cpu, unsigned long done)
2835{
2836 trace_rcu_barrier(rcu_state.name, s, cpu,
2837 atomic_read(&rcu_state.barrier_cpu_count), done);
2838}
2839
2840
2841
2842
2843
2844static void rcu_barrier_callback(struct rcu_head *rhp)
2845{
2846 if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
2847 rcu_barrier_trace(TPS("LastCB"), -1,
2848 rcu_state.barrier_sequence);
2849 complete(&rcu_state.barrier_completion);
2850 } else {
2851 rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence);
2852 }
2853}
2854
2855
2856
2857
2858static void rcu_barrier_func(void *unused)
2859{
2860 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
2861
2862 rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
2863 rdp->barrier_head.func = rcu_barrier_callback;
2864 debug_rcu_head_queue(&rdp->barrier_head);
2865 rcu_nocb_lock(rdp);
2866 WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
2867 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
2868 atomic_inc(&rcu_state.barrier_cpu_count);
2869 } else {
2870 debug_rcu_head_unqueue(&rdp->barrier_head);
2871 rcu_barrier_trace(TPS("IRQNQ"), -1,
2872 rcu_state.barrier_sequence);
2873 }
2874 rcu_nocb_unlock(rdp);
2875}
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885void rcu_barrier(void)
2886{
2887 int cpu;
2888 struct rcu_data *rdp;
2889 unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
2890
2891 rcu_barrier_trace(TPS("Begin"), -1, s);
2892
2893
2894 mutex_lock(&rcu_state.barrier_mutex);
2895
2896
2897 if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
2898 rcu_barrier_trace(TPS("EarlyExit"), -1,
2899 rcu_state.barrier_sequence);
2900 smp_mb();
2901 mutex_unlock(&rcu_state.barrier_mutex);
2902 return;
2903 }
2904
2905
2906 rcu_seq_start(&rcu_state.barrier_sequence);
2907 rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
2908
2909
2910
2911
2912
2913
2914
2915 init_completion(&rcu_state.barrier_completion);
2916 atomic_set(&rcu_state.barrier_cpu_count, 1);
2917 get_online_cpus();
2918
2919
2920
2921
2922
2923
2924 for_each_possible_cpu(cpu) {
2925 rdp = per_cpu_ptr(&rcu_data, cpu);
2926 if (!cpu_online(cpu) &&
2927 !rcu_segcblist_is_offloaded(&rdp->cblist))
2928 continue;
2929 if (rcu_segcblist_n_cbs(&rdp->cblist)) {
2930 rcu_barrier_trace(TPS("OnlineQ"), cpu,
2931 rcu_state.barrier_sequence);
2932 smp_call_function_single(cpu, rcu_barrier_func, NULL, 1);
2933 } else {
2934 rcu_barrier_trace(TPS("OnlineNQ"), cpu,
2935 rcu_state.barrier_sequence);
2936 }
2937 }
2938 put_online_cpus();
2939
2940
2941
2942
2943
2944 if (atomic_dec_and_test(&rcu_state.barrier_cpu_count))
2945 complete(&rcu_state.barrier_completion);
2946
2947
2948 wait_for_completion(&rcu_state.barrier_completion);
2949
2950
2951 rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
2952 rcu_seq_end(&rcu_state.barrier_sequence);
2953
2954
2955 mutex_unlock(&rcu_state.barrier_mutex);
2956}
2957EXPORT_SYMBOL_GPL(rcu_barrier);
2958
2959
2960
2961
2962
2963
2964
2965static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
2966{
2967 long mask;
2968 long oldmask;
2969 struct rcu_node *rnp = rnp_leaf;
2970
2971 raw_lockdep_assert_held_rcu_node(rnp_leaf);
2972 WARN_ON_ONCE(rnp->wait_blkd_tasks);
2973 for (;;) {
2974 mask = rnp->grpmask;
2975 rnp = rnp->parent;
2976 if (rnp == NULL)
2977 return;
2978 raw_spin_lock_rcu_node(rnp);
2979 oldmask = rnp->qsmaskinit;
2980 rnp->qsmaskinit |= mask;
2981 raw_spin_unlock_rcu_node(rnp);
2982 if (oldmask)
2983 return;
2984 }
2985}
2986
2987
2988
2989
2990static void __init
2991rcu_boot_init_percpu_data(int cpu)
2992{
2993 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2994
2995
2996 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
2997 WARN_ON_ONCE(rdp->dynticks_nesting != 1);
2998 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
2999 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
3000 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
3001 rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
3002 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
3003 rdp->cpu = cpu;
3004 rcu_boot_init_nocb_percpu_data(rdp);
3005}
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017int rcutree_prepare_cpu(unsigned int cpu)
3018{
3019 unsigned long flags;
3020 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3021 struct rcu_node *rnp = rcu_get_root();
3022
3023
3024 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3025 rdp->qlen_last_fqs_check = 0;
3026 rdp->n_force_qs_snap = rcu_state.n_force_qs;
3027 rdp->blimit = blimit;
3028 if (rcu_segcblist_empty(&rdp->cblist) &&
3029 !rcu_segcblist_is_offloaded(&rdp->cblist))
3030 rcu_segcblist_init(&rdp->cblist);
3031 rdp->dynticks_nesting = 1;
3032 rcu_dynticks_eqs_online();
3033 raw_spin_unlock_rcu_node(rnp);
3034
3035
3036
3037
3038
3039
3040 rnp = rdp->mynode;
3041 raw_spin_lock_rcu_node(rnp);
3042 rdp->beenonline = true;
3043 rdp->gp_seq = rnp->gp_seq;
3044 rdp->gp_seq_needed = rnp->gp_seq;
3045 rdp->cpu_no_qs.b.norm = true;
3046 rdp->core_needs_qs = false;
3047 rdp->rcu_iw_pending = false;
3048 rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
3049 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
3050 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3051 rcu_prepare_kthreads(cpu);
3052 rcu_spawn_cpu_nocb_kthread(cpu);
3053
3054 return 0;
3055}
3056
3057
3058
3059
3060static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
3061{
3062 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3063
3064 rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
3065}
3066
3067
3068
3069
3070
3071int rcutree_online_cpu(unsigned int cpu)
3072{
3073 unsigned long flags;
3074 struct rcu_data *rdp;
3075 struct rcu_node *rnp;
3076
3077 rdp = per_cpu_ptr(&rcu_data, cpu);
3078 rnp = rdp->mynode;
3079 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3080 rnp->ffmask |= rdp->grpmask;
3081 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3082 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
3083 return 0;
3084 sync_sched_exp_online_cleanup(cpu);
3085 rcutree_affinity_setting(cpu, -1);
3086 return 0;
3087}
3088
3089
3090
3091
3092
3093int rcutree_offline_cpu(unsigned int cpu)
3094{
3095 unsigned long flags;
3096 struct rcu_data *rdp;
3097 struct rcu_node *rnp;
3098
3099 rdp = per_cpu_ptr(&rcu_data, cpu);
3100 rnp = rdp->mynode;
3101 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3102 rnp->ffmask &= ~rdp->grpmask;
3103 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3104
3105 rcutree_affinity_setting(cpu, cpu);
3106 return 0;
3107}
3108
3109static DEFINE_PER_CPU(int, rcu_cpu_started);
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122void rcu_cpu_starting(unsigned int cpu)
3123{
3124 unsigned long flags;
3125 unsigned long mask;
3126 int nbits;
3127 unsigned long oldmask;
3128 struct rcu_data *rdp;
3129 struct rcu_node *rnp;
3130
3131 if (per_cpu(rcu_cpu_started, cpu))
3132 return;
3133
3134 per_cpu(rcu_cpu_started, cpu) = 1;
3135
3136 rdp = per_cpu_ptr(&rcu_data, cpu);
3137 rnp = rdp->mynode;
3138 mask = rdp->grpmask;
3139 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3140 rnp->qsmaskinitnext |= mask;
3141 oldmask = rnp->expmaskinitnext;
3142 rnp->expmaskinitnext |= mask;
3143 oldmask ^= rnp->expmaskinitnext;
3144 nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
3145
3146 smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits);
3147 rcu_gpnum_ovf(rnp, rdp);
3148 rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
3149 rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
3150 if (rnp->qsmask & mask) {
3151
3152 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
3153 } else {
3154 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3155 }
3156 smp_mb();
3157}
3158
3159#ifdef CONFIG_HOTPLUG_CPU
3160
3161
3162
3163
3164
3165
3166
3167
3168void rcu_report_dead(unsigned int cpu)
3169{
3170 unsigned long flags;
3171 unsigned long mask;
3172 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3173 struct rcu_node *rnp = rdp->mynode;
3174
3175
3176 preempt_disable();
3177 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
3178 preempt_enable();
3179 rcu_preempt_deferred_qs(current);
3180
3181
3182 mask = rdp->grpmask;
3183 raw_spin_lock(&rcu_state.ofl_lock);
3184 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3185 rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
3186 rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
3187 if (rnp->qsmask & mask) {
3188
3189 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
3190 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3191 }
3192 rnp->qsmaskinitnext &= ~mask;
3193 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3194 raw_spin_unlock(&rcu_state.ofl_lock);
3195
3196 per_cpu(rcu_cpu_started, cpu) = 0;
3197}
3198
3199
3200
3201
3202
3203
3204void rcutree_migrate_callbacks(int cpu)
3205{
3206 unsigned long flags;
3207 struct rcu_data *my_rdp;
3208 struct rcu_node *my_rnp;
3209 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3210 bool needwake;
3211
3212 if (rcu_segcblist_is_offloaded(&rdp->cblist) ||
3213 rcu_segcblist_empty(&rdp->cblist))
3214 return;
3215
3216 local_irq_save(flags);
3217 my_rdp = this_cpu_ptr(&rcu_data);
3218 my_rnp = my_rdp->mynode;
3219 rcu_nocb_lock(my_rdp);
3220 WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
3221 raw_spin_lock_rcu_node(my_rnp);
3222
3223 needwake = rcu_advance_cbs(my_rnp, rdp) ||
3224 rcu_advance_cbs(my_rnp, my_rdp);
3225 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
3226 needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
3227 rcu_segcblist_disable(&rdp->cblist);
3228 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
3229 !rcu_segcblist_n_cbs(&my_rdp->cblist));
3230 if (rcu_segcblist_is_offloaded(&my_rdp->cblist)) {
3231 raw_spin_unlock_rcu_node(my_rnp);
3232 __call_rcu_nocb_wake(my_rdp, true, flags);
3233 } else {
3234 rcu_nocb_unlock(my_rdp);
3235 raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags);
3236 }
3237 if (needwake)
3238 rcu_gp_kthread_wake();
3239 lockdep_assert_irqs_enabled();
3240 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
3241 !rcu_segcblist_empty(&rdp->cblist),
3242 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
3243 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
3244 rcu_segcblist_first_cb(&rdp->cblist));
3245}
3246#endif
3247
3248
3249
3250
3251
3252static int rcu_pm_notify(struct notifier_block *self,
3253 unsigned long action, void *hcpu)
3254{
3255 switch (action) {
3256 case PM_HIBERNATION_PREPARE:
3257 case PM_SUSPEND_PREPARE:
3258 rcu_expedite_gp();
3259 break;
3260 case PM_POST_HIBERNATION:
3261 case PM_POST_SUSPEND:
3262 rcu_unexpedite_gp();
3263 break;
3264 default:
3265 break;
3266 }
3267 return NOTIFY_OK;
3268}
3269
3270
3271
3272
3273static int __init rcu_spawn_gp_kthread(void)
3274{
3275 unsigned long flags;
3276 int kthread_prio_in = kthread_prio;
3277 struct rcu_node *rnp;
3278 struct sched_param sp;
3279 struct task_struct *t;
3280
3281
3282 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
3283 && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
3284 kthread_prio = 2;
3285 else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
3286 kthread_prio = 1;
3287 else if (kthread_prio < 0)
3288 kthread_prio = 0;
3289 else if (kthread_prio > 99)
3290 kthread_prio = 99;
3291
3292 if (kthread_prio != kthread_prio_in)
3293 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
3294 kthread_prio, kthread_prio_in);
3295
3296 rcu_scheduler_fully_active = 1;
3297 t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
3298 if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
3299 return 0;
3300 if (kthread_prio) {
3301 sp.sched_priority = kthread_prio;
3302 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3303 }
3304 rnp = rcu_get_root();
3305 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3306 rcu_state.gp_kthread = t;
3307 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3308 wake_up_process(t);
3309 rcu_spawn_nocb_kthreads();
3310 rcu_spawn_boost_kthreads();
3311 return 0;
3312}
3313early_initcall(rcu_spawn_gp_kthread);
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325void rcu_scheduler_starting(void)
3326{
3327 WARN_ON(num_online_cpus() != 1);
3328 WARN_ON(nr_context_switches() > 0);
3329 rcu_test_sync_prims();
3330 rcu_scheduler_active = RCU_SCHEDULER_INIT;
3331 rcu_test_sync_prims();
3332}
3333
3334
3335
3336
3337static void __init rcu_init_one(void)
3338{
3339 static const char * const buf[] = RCU_NODE_NAME_INIT;
3340 static const char * const fqs[] = RCU_FQS_NAME_INIT;
3341 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
3342 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
3343
3344 int levelspread[RCU_NUM_LVLS];
3345 int cpustride = 1;
3346 int i;
3347 int j;
3348 struct rcu_node *rnp;
3349
3350 BUILD_BUG_ON(RCU_NUM_LVLS > ARRAY_SIZE(buf));
3351
3352
3353 if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
3354 panic("rcu_init_one: rcu_num_lvls out of range");
3355
3356
3357
3358 for (i = 1; i < rcu_num_lvls; i++)
3359 rcu_state.level[i] =
3360 rcu_state.level[i - 1] + num_rcu_lvl[i - 1];
3361 rcu_init_levelspread(levelspread, num_rcu_lvl);
3362
3363
3364
3365 for (i = rcu_num_lvls - 1; i >= 0; i--) {
3366 cpustride *= levelspread[i];
3367 rnp = rcu_state.level[i];
3368 for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) {
3369 raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
3370 lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
3371 &rcu_node_class[i], buf[i]);
3372 raw_spin_lock_init(&rnp->fqslock);
3373 lockdep_set_class_and_name(&rnp->fqslock,
3374 &rcu_fqs_class[i], fqs[i]);
3375 rnp->gp_seq = rcu_state.gp_seq;
3376 rnp->gp_seq_needed = rcu_state.gp_seq;
3377 rnp->completedqs = rcu_state.gp_seq;
3378 rnp->qsmask = 0;
3379 rnp->qsmaskinit = 0;
3380 rnp->grplo = j * cpustride;
3381 rnp->grphi = (j + 1) * cpustride - 1;
3382 if (rnp->grphi >= nr_cpu_ids)
3383 rnp->grphi = nr_cpu_ids - 1;
3384 if (i == 0) {
3385 rnp->grpnum = 0;
3386 rnp->grpmask = 0;
3387 rnp->parent = NULL;
3388 } else {
3389 rnp->grpnum = j % levelspread[i - 1];
3390 rnp->grpmask = BIT(rnp->grpnum);
3391 rnp->parent = rcu_state.level[i - 1] +
3392 j / levelspread[i - 1];
3393 }
3394 rnp->level = i;
3395 INIT_LIST_HEAD(&rnp->blkd_tasks);
3396 rcu_init_one_nocb(rnp);
3397 init_waitqueue_head(&rnp->exp_wq[0]);
3398 init_waitqueue_head(&rnp->exp_wq[1]);
3399 init_waitqueue_head(&rnp->exp_wq[2]);
3400 init_waitqueue_head(&rnp->exp_wq[3]);
3401 spin_lock_init(&rnp->exp_lock);
3402 }
3403 }
3404
3405 init_swait_queue_head(&rcu_state.gp_wq);
3406 init_swait_queue_head(&rcu_state.expedited_wq);
3407 rnp = rcu_first_leaf_node();
3408 for_each_possible_cpu(i) {
3409 while (i > rnp->grphi)
3410 rnp++;
3411 per_cpu_ptr(&rcu_data, i)->mynode = rnp;
3412 rcu_boot_init_percpu_data(i);
3413 }
3414}
3415
3416
3417
3418
3419
3420
3421static void __init rcu_init_geometry(void)
3422{
3423 ulong d;
3424 int i;
3425 int rcu_capacity[RCU_NUM_LVLS];
3426
3427
3428
3429
3430
3431
3432
3433
3434 d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
3435 if (jiffies_till_first_fqs == ULONG_MAX)
3436 jiffies_till_first_fqs = d;
3437 if (jiffies_till_next_fqs == ULONG_MAX)
3438 jiffies_till_next_fqs = d;
3439 adjust_jiffies_till_sched_qs();
3440
3441
3442 if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
3443 nr_cpu_ids == NR_CPUS)
3444 return;
3445 pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
3446 rcu_fanout_leaf, nr_cpu_ids);
3447
3448
3449
3450
3451
3452
3453
3454 if (rcu_fanout_leaf < 2 ||
3455 rcu_fanout_leaf > sizeof(unsigned long) * 8) {
3456 rcu_fanout_leaf = RCU_FANOUT_LEAF;
3457 WARN_ON(1);
3458 return;
3459 }
3460
3461
3462
3463
3464
3465 rcu_capacity[0] = rcu_fanout_leaf;
3466 for (i = 1; i < RCU_NUM_LVLS; i++)
3467 rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
3468
3469
3470
3471
3472
3473 if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) {
3474 rcu_fanout_leaf = RCU_FANOUT_LEAF;
3475 WARN_ON(1);
3476 return;
3477 }
3478
3479
3480 for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {
3481 }
3482 rcu_num_lvls = i + 1;
3483
3484
3485 for (i = 0; i < rcu_num_lvls; i++) {
3486 int cap = rcu_capacity[(rcu_num_lvls - 1) - i];
3487 num_rcu_lvl[i] = DIV_ROUND_UP(nr_cpu_ids, cap);
3488 }
3489
3490
3491 rcu_num_nodes = 0;
3492 for (i = 0; i < rcu_num_lvls; i++)
3493 rcu_num_nodes += num_rcu_lvl[i];
3494}
3495
3496
3497
3498
3499
3500static void __init rcu_dump_rcu_node_tree(void)
3501{
3502 int level = 0;
3503 struct rcu_node *rnp;
3504
3505 pr_info("rcu_node tree layout dump\n");
3506 pr_info(" ");
3507 rcu_for_each_node_breadth_first(rnp) {
3508 if (rnp->level != level) {
3509 pr_cont("\n");
3510 pr_info(" ");
3511 level = rnp->level;
3512 }
3513 pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum);
3514 }
3515 pr_cont("\n");
3516}
3517
3518struct workqueue_struct *rcu_gp_wq;
3519struct workqueue_struct *rcu_par_gp_wq;
3520
3521void __init rcu_init(void)
3522{
3523 int cpu;
3524
3525 rcu_early_boot_tests();
3526
3527 rcu_bootup_announce();
3528 rcu_init_geometry();
3529 rcu_init_one();
3530 if (dump_tree)
3531 rcu_dump_rcu_node_tree();
3532 if (use_softirq)
3533 open_softirq(RCU_SOFTIRQ, rcu_core_si);
3534
3535
3536
3537
3538
3539
3540 pm_notifier(rcu_pm_notify, 0);
3541 for_each_online_cpu(cpu) {
3542 rcutree_prepare_cpu(cpu);
3543 rcu_cpu_starting(cpu);
3544 rcutree_online_cpu(cpu);
3545 }
3546
3547
3548 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
3549 WARN_ON(!rcu_gp_wq);
3550 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
3551 WARN_ON(!rcu_par_gp_wq);
3552 srcu_init();
3553}
3554
3555#include "tree_stall.h"
3556#include "tree_exp.h"
3557#include "tree_plugin.h"
3558