1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#define pr_fmt(fmt) "rcu: " fmt
19
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/init.h>
23#include <linux/spinlock.h>
24#include <linux/smp.h>
25#include <linux/rcupdate_wait.h>
26#include <linux/interrupt.h>
27#include <linux/sched.h>
28#include <linux/sched/debug.h>
29#include <linux/nmi.h>
30#include <linux/atomic.h>
31#include <linux/bitops.h>
32#include <linux/export.h>
33#include <linux/completion.h>
34#include <linux/moduleparam.h>
35#include <linux/panic.h>
36#include <linux/panic_notifier.h>
37#include <linux/percpu.h>
38#include <linux/notifier.h>
39#include <linux/cpu.h>
40#include <linux/mutex.h>
41#include <linux/time.h>
42#include <linux/kernel_stat.h>
43#include <linux/wait.h>
44#include <linux/kthread.h>
45#include <uapi/linux/sched/types.h>
46#include <linux/prefetch.h>
47#include <linux/delay.h>
48#include <linux/random.h>
49#include <linux/trace_events.h>
50#include <linux/suspend.h>
51#include <linux/ftrace.h>
52#include <linux/tick.h>
53#include <linux/sysrq.h>
54#include <linux/kprobes.h>
55#include <linux/gfp.h>
56#include <linux/oom.h>
57#include <linux/smpboot.h>
58#include <linux/jiffies.h>
59#include <linux/slab.h>
60#include <linux/sched/isolation.h>
61#include <linux/sched/clock.h>
62#include <linux/vmalloc.h>
63#include <linux/mm.h>
64#include <linux/kasan.h>
65#include "../time/tick-internal.h"
66
67#include "tree.h"
68#include "rcu.h"
69
70#ifdef MODULE_PARAM_PREFIX
71#undef MODULE_PARAM_PREFIX
72#endif
73#define MODULE_PARAM_PREFIX "rcutree."
74
75
76
77static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
78 .dynticks_nesting = 1,
79 .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
80 .dynticks = ATOMIC_INIT(1),
81#ifdef CONFIG_RCU_NOCB_CPU
82 .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
83#endif
84};
85static struct rcu_state rcu_state = {
86 .level = { &rcu_state.node[0] },
87 .gp_state = RCU_GP_IDLE,
88 .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
89 .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
90 .name = RCU_NAME,
91 .abbr = RCU_ABBR,
92 .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
93 .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
94 .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
95};
96
97
98static bool dump_tree;
99module_param(dump_tree, bool, 0444);
100
101static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
102#ifndef CONFIG_PREEMPT_RT
103module_param(use_softirq, bool, 0444);
104#endif
105
106static bool rcu_fanout_exact;
107module_param(rcu_fanout_exact, bool, 0444);
108
109static int rcu_fanout_leaf = RCU_FANOUT_LEAF;
110module_param(rcu_fanout_leaf, int, 0444);
111int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
112
113int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
114int rcu_num_nodes __read_mostly = NUM_RCU_NODES;
115
116
117
118
119
120
121
122
123
124
125
126
127
128int rcu_scheduler_active __read_mostly;
129EXPORT_SYMBOL_GPL(rcu_scheduler_active);
130
131
132
133
134
135
136
137
138
139
140
141
142
143static int rcu_scheduler_fully_active __read_mostly;
144
145static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
146 unsigned long gps, unsigned long flags);
147static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
148static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
149static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
150static void invoke_rcu_core(void);
151static void rcu_report_exp_rdp(struct rcu_data *rdp);
152static void sync_sched_exp_online_cleanup(int cpu);
153static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
154static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
155
156
157static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
158module_param(kthread_prio, int, 0444);
159
160
161
162static int gp_preinit_delay;
163module_param(gp_preinit_delay, int, 0444);
164static int gp_init_delay;
165module_param(gp_init_delay, int, 0444);
166static int gp_cleanup_delay;
167module_param(gp_cleanup_delay, int, 0444);
168
169
170static int rcu_unlock_delay;
171#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
172module_param(rcu_unlock_delay, int, 0444);
173#endif
174
175
176
177
178
179
180
181static int rcu_min_cached_objs = 5;
182module_param(rcu_min_cached_objs, int, 0444);
183
184
185
186
187
188
189
190
191
192static int rcu_delay_page_cache_fill_msec = 5000;
193module_param(rcu_delay_page_cache_fill_msec, int, 0444);
194
195
196int rcu_get_gp_kthreads_prio(void)
197{
198 return kthread_prio;
199}
200EXPORT_SYMBOL_GPL(rcu_get_gp_kthreads_prio);
201
202
203
204
205
206
207
208
209
210
211#define PER_RCU_NODE_PERIOD 3
212
213
214
215
216
217
218
219static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
220{
221 return READ_ONCE(rnp->qsmaskinitnext);
222}
223
224
225
226
227
228
229static int rcu_gp_in_progress(void)
230{
231 return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
232}
233
234
235
236
237
238static long rcu_get_n_cbs_cpu(int cpu)
239{
240 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
241
242 if (rcu_segcblist_is_enabled(&rdp->cblist))
243 return rcu_segcblist_n_cbs(&rdp->cblist);
244 return 0;
245}
246
247void rcu_softirq_qs(void)
248{
249 rcu_qs();
250 rcu_preempt_deferred_qs(current);
251 rcu_tasks_qs(current, false);
252}
253
254
255
256
257
258static noinline noinstr unsigned long rcu_dynticks_inc(int incby)
259{
260 return arch_atomic_add_return(incby, this_cpu_ptr(&rcu_data.dynticks));
261}
262
263
264
265
266
267
268
269static noinstr void rcu_dynticks_eqs_enter(void)
270{
271 int seq;
272
273
274
275
276
277
278 rcu_dynticks_task_trace_enter();
279 seq = rcu_dynticks_inc(1);
280
281 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
282}
283
284
285
286
287
288
289static noinstr void rcu_dynticks_eqs_exit(void)
290{
291 int seq;
292
293
294
295
296
297
298 seq = rcu_dynticks_inc(1);
299
300 rcu_dynticks_task_trace_exit();
301 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
302}
303
304
305
306
307
308
309
310
311
312
313
314static void rcu_dynticks_eqs_online(void)
315{
316 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
317
318 if (atomic_read(&rdp->dynticks) & 0x1)
319 return;
320 rcu_dynticks_inc(1);
321}
322
323
324
325
326
327
328static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
329{
330 return !(atomic_read(this_cpu_ptr(&rcu_data.dynticks)) & 0x1);
331}
332
333
334
335
336
337static int rcu_dynticks_snap(struct rcu_data *rdp)
338{
339 smp_mb();
340 return atomic_read_acquire(&rdp->dynticks);
341}
342
343
344
345
346
347static bool rcu_dynticks_in_eqs(int snap)
348{
349 return !(snap & 0x1);
350}
351
352
353bool rcu_is_idle_cpu(int cpu)
354{
355 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
356
357 return rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp));
358}
359
360
361
362
363
364
365static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
366{
367 return snap != rcu_dynticks_snap(rdp);
368}
369
370
371
372
373
374bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
375{
376 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
377 int snap;
378
379
380 snap = atomic_read(&rdp->dynticks) & ~0x1;
381
382 smp_rmb();
383 if (READ_ONCE(*vp))
384 return false;
385 smp_rmb();
386
387
388 return snap == atomic_read(&rdp->dynticks);
389}
390
391
392
393
394
395
396
397
398
399
400
401
402notrace void rcu_momentary_dyntick_idle(void)
403{
404 int seq;
405
406 raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
407 seq = rcu_dynticks_inc(2);
408
409 WARN_ON_ONCE(!(seq & 0x1));
410 rcu_preempt_deferred_qs(current);
411}
412EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
413
414
415
416
417
418
419
420
421
422static int rcu_is_cpu_rrupt_from_idle(void)
423{
424 long nesting;
425
426
427
428
429
430
431 lockdep_assert_irqs_disabled();
432
433
434 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
435 "RCU dynticks_nesting counter underflow!");
436 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
437 "RCU dynticks_nmi_nesting counter underflow/zero!");
438
439
440 nesting = __this_cpu_read(rcu_data.dynticks_nmi_nesting);
441 if (nesting > 1)
442 return false;
443
444
445
446
447 WARN_ON_ONCE(!nesting && !is_idle_task(current));
448
449
450 return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
451}
452
453#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
454
455#define DEFAULT_MAX_RCU_BLIMIT 10000
456static long blimit = DEFAULT_RCU_BLIMIT;
457#define DEFAULT_RCU_QHIMARK 10000
458static long qhimark = DEFAULT_RCU_QHIMARK;
459#define DEFAULT_RCU_QLOMARK 100
460static long qlowmark = DEFAULT_RCU_QLOMARK;
461#define DEFAULT_RCU_QOVLD_MULT 2
462#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
463static long qovld = DEFAULT_RCU_QOVLD;
464static long qovld_calc = -1;
465
466module_param(blimit, long, 0444);
467module_param(qhimark, long, 0444);
468module_param(qlowmark, long, 0444);
469module_param(qovld, long, 0444);
470
471static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
472static ulong jiffies_till_next_fqs = ULONG_MAX;
473static bool rcu_kick_kthreads;
474static int rcu_divisor = 7;
475module_param(rcu_divisor, int, 0644);
476
477
478static long rcu_resched_ns = 3 * NSEC_PER_MSEC;
479module_param(rcu_resched_ns, long, 0644);
480
481
482
483
484
485static ulong jiffies_till_sched_qs = ULONG_MAX;
486module_param(jiffies_till_sched_qs, ulong, 0444);
487static ulong jiffies_to_sched_qs;
488module_param(jiffies_to_sched_qs, ulong, 0444);
489
490
491
492
493
494
495
496static void adjust_jiffies_till_sched_qs(void)
497{
498 unsigned long j;
499
500
501 if (jiffies_till_sched_qs != ULONG_MAX) {
502 WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
503 return;
504 }
505
506 j = READ_ONCE(jiffies_till_first_fqs) +
507 2 * READ_ONCE(jiffies_till_next_fqs);
508 if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
509 j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
510 pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
511 WRITE_ONCE(jiffies_to_sched_qs, j);
512}
513
514static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
515{
516 ulong j;
517 int ret = kstrtoul(val, 0, &j);
518
519 if (!ret) {
520 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
521 adjust_jiffies_till_sched_qs();
522 }
523 return ret;
524}
525
526static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param *kp)
527{
528 ulong j;
529 int ret = kstrtoul(val, 0, &j);
530
531 if (!ret) {
532 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
533 adjust_jiffies_till_sched_qs();
534 }
535 return ret;
536}
537
538static const struct kernel_param_ops first_fqs_jiffies_ops = {
539 .set = param_set_first_fqs_jiffies,
540 .get = param_get_ulong,
541};
542
543static const struct kernel_param_ops next_fqs_jiffies_ops = {
544 .set = param_set_next_fqs_jiffies,
545 .get = param_get_ulong,
546};
547
548module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_first_fqs, 0644);
549module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
550module_param(rcu_kick_kthreads, bool, 0644);
551
552static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
553static int rcu_pending(int user);
554
555
556
557
558unsigned long rcu_get_gp_seq(void)
559{
560 return READ_ONCE(rcu_state.gp_seq);
561}
562EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
563
564
565
566
567
568
569
570unsigned long rcu_exp_batches_completed(void)
571{
572 return rcu_state.expedited_sequence;
573}
574EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
575
576
577
578
579static struct rcu_node *rcu_get_root(void)
580{
581 return &rcu_state.node[0];
582}
583
584
585
586
587void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
588 unsigned long *gp_seq)
589{
590 switch (test_type) {
591 case RCU_FLAVOR:
592 *flags = READ_ONCE(rcu_state.gp_flags);
593 *gp_seq = rcu_seq_current(&rcu_state.gp_seq);
594 break;
595 default:
596 break;
597 }
598}
599EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
600
601
602
603
604
605
606
607
608
609static noinstr void rcu_eqs_enter(bool user)
610{
611 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
612
613 WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE);
614 WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);
615 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
616 rdp->dynticks_nesting == 0);
617 if (rdp->dynticks_nesting != 1) {
618
619 rdp->dynticks_nesting--;
620 return;
621 }
622
623 lockdep_assert_irqs_disabled();
624 instrumentation_begin();
625 trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
626 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
627 rcu_prepare_for_idle();
628 rcu_preempt_deferred_qs(current);
629
630
631 instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
632
633 instrumentation_end();
634 WRITE_ONCE(rdp->dynticks_nesting, 0);
635
636 rcu_dynticks_eqs_enter();
637
638 rcu_dynticks_task_enter();
639}
640
641
642
643
644
645
646
647
648
649
650
651
652void rcu_idle_enter(void)
653{
654 lockdep_assert_irqs_disabled();
655 rcu_eqs_enter(false);
656}
657EXPORT_SYMBOL_GPL(rcu_idle_enter);
658
659#ifdef CONFIG_NO_HZ_FULL
660
661#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)
662
663
664
665
666static void late_wakeup_func(struct irq_work *work)
667{
668}
669
670static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) =
671 IRQ_WORK_INIT(late_wakeup_func);
672
673
674
675
676
677
678
679
680
681
682
683noinstr static void rcu_irq_work_resched(void)
684{
685 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
686
687 if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU))
688 return;
689
690 if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU))
691 return;
692
693 instrumentation_begin();
694 if (do_nocb_deferred_wakeup(rdp) && need_resched()) {
695 irq_work_queue(this_cpu_ptr(&late_wakeup_work));
696 }
697 instrumentation_end();
698}
699
700#else
701static inline void rcu_irq_work_resched(void) { }
702#endif
703
704
705
706
707
708
709
710
711
712
713
714
715noinstr void rcu_user_enter(void)
716{
717 lockdep_assert_irqs_disabled();
718
719
720
721
722
723
724 rcu_irq_work_resched();
725 rcu_eqs_enter(true);
726}
727
728#endif
729
730
731
732
733
734
735
736
737
738
739
740
741noinstr void rcu_nmi_exit(void)
742{
743 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
744
745 instrumentation_begin();
746
747
748
749
750
751 WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0);
752 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
753
754
755
756
757
758 if (rdp->dynticks_nmi_nesting != 1) {
759 trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
760 atomic_read(&rdp->dynticks));
761 WRITE_ONCE(rdp->dynticks_nmi_nesting,
762 rdp->dynticks_nmi_nesting - 2);
763 instrumentation_end();
764 return;
765 }
766
767
768 trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
769 WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);
770
771 if (!in_nmi())
772 rcu_prepare_for_idle();
773
774
775 instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
776 instrumentation_end();
777
778
779 rcu_dynticks_eqs_enter();
780
781
782 if (!in_nmi())
783 rcu_dynticks_task_enter();
784}
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805void noinstr rcu_irq_exit(void)
806{
807 lockdep_assert_irqs_disabled();
808 rcu_nmi_exit();
809}
810
811#ifdef CONFIG_PROVE_RCU
812
813
814
815void rcu_irq_exit_check_preempt(void)
816{
817 lockdep_assert_irqs_disabled();
818
819 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) <= 0,
820 "RCU dynticks_nesting counter underflow/zero!");
821 RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) !=
822 DYNTICK_IRQ_NONIDLE,
823 "Bad RCU dynticks_nmi_nesting counter\n");
824 RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
825 "RCU in extended quiescent state!");
826}
827#endif
828
829
830
831
832
833
834
835void rcu_irq_exit_irqson(void)
836{
837 unsigned long flags;
838
839 local_irq_save(flags);
840 rcu_irq_exit();
841 local_irq_restore(flags);
842}
843
844
845
846
847
848
849
850
851
852static void noinstr rcu_eqs_exit(bool user)
853{
854 struct rcu_data *rdp;
855 long oldval;
856
857 lockdep_assert_irqs_disabled();
858 rdp = this_cpu_ptr(&rcu_data);
859 oldval = rdp->dynticks_nesting;
860 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
861 if (oldval) {
862
863 rdp->dynticks_nesting++;
864 return;
865 }
866 rcu_dynticks_task_exit();
867
868 rcu_dynticks_eqs_exit();
869
870 instrumentation_begin();
871
872
873 instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
874
875 rcu_cleanup_after_idle();
876 trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
877 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
878 WRITE_ONCE(rdp->dynticks_nesting, 1);
879 WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
880 WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
881 instrumentation_end();
882}
883
884
885
886
887
888
889
890
891
892
893void rcu_idle_exit(void)
894{
895 unsigned long flags;
896
897 local_irq_save(flags);
898 rcu_eqs_exit(false);
899 local_irq_restore(flags);
900}
901EXPORT_SYMBOL_GPL(rcu_idle_exit);
902
903#ifdef CONFIG_NO_HZ_FULL
904
905
906
907
908
909
910
911
912
913void noinstr rcu_user_exit(void)
914{
915 rcu_eqs_exit(true);
916}
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944void __rcu_irq_enter_check_tick(void)
945{
946 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
947
948
949 if (in_nmi())
950 return;
951
952 RCU_LOCKDEP_WARN(rcu_dynticks_curr_cpu_in_eqs(),
953 "Illegal rcu_irq_enter_check_tick() from extended quiescent state");
954
955 if (!tick_nohz_full_cpu(rdp->cpu) ||
956 !READ_ONCE(rdp->rcu_urgent_qs) ||
957 READ_ONCE(rdp->rcu_forced_tick)) {
958
959
960 return;
961 }
962
963
964
965
966
967
968
969 raw_spin_lock_rcu_node(rdp->mynode);
970 if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
971
972
973 WRITE_ONCE(rdp->rcu_forced_tick, true);
974 tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
975 }
976 raw_spin_unlock_rcu_node(rdp->mynode);
977}
978#endif
979
980
981
982
983
984
985
986
987
988
989
990
991
992noinstr void rcu_nmi_enter(void)
993{
994 long incby = 2;
995 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
996
997
998 WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0);
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 if (rcu_dynticks_curr_cpu_in_eqs()) {
1009
1010 if (!in_nmi())
1011 rcu_dynticks_task_exit();
1012
1013
1014 rcu_dynticks_eqs_exit();
1015
1016
1017 if (!in_nmi()) {
1018 instrumentation_begin();
1019 rcu_cleanup_after_idle();
1020 instrumentation_end();
1021 }
1022
1023 instrumentation_begin();
1024
1025 instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
1026
1027 instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
1028
1029 incby = 1;
1030 } else if (!in_nmi()) {
1031 instrumentation_begin();
1032 rcu_irq_enter_check_tick();
1033 } else {
1034 instrumentation_begin();
1035 }
1036
1037 trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
1038 rdp->dynticks_nmi_nesting,
1039 rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
1040 instrumentation_end();
1041 WRITE_ONCE(rdp->dynticks_nmi_nesting,
1042 rdp->dynticks_nmi_nesting + incby);
1043 barrier();
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068noinstr void rcu_irq_enter(void)
1069{
1070 lockdep_assert_irqs_disabled();
1071 rcu_nmi_enter();
1072}
1073
1074
1075
1076
1077
1078
1079
1080void rcu_irq_enter_irqson(void)
1081{
1082 unsigned long flags;
1083
1084 local_irq_save(flags);
1085 rcu_irq_enter();
1086 local_irq_restore(flags);
1087}
1088
1089
1090
1091
1092
1093
1094static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
1095{
1096 raw_lockdep_assert_held_rcu_node(rdp->mynode);
1097 WRITE_ONCE(rdp->rcu_urgent_qs, false);
1098 WRITE_ONCE(rdp->rcu_need_heavy_qs, false);
1099 if (tick_nohz_full_cpu(rdp->cpu) && rdp->rcu_forced_tick) {
1100 tick_dep_clear_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
1101 WRITE_ONCE(rdp->rcu_forced_tick, false);
1102 }
1103}
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116notrace bool rcu_is_watching(void)
1117{
1118 bool ret;
1119
1120 preempt_disable_notrace();
1121 ret = !rcu_dynticks_curr_cpu_in_eqs();
1122 preempt_enable_notrace();
1123 return ret;
1124}
1125EXPORT_SYMBOL_GPL(rcu_is_watching);
1126
1127
1128
1129
1130
1131
1132
1133
1134void rcu_request_urgent_qs_task(struct task_struct *t)
1135{
1136 int cpu;
1137
1138 barrier();
1139 cpu = task_cpu(t);
1140 if (!task_curr(t))
1141 return;
1142 smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
1143}
1144
1145#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160bool rcu_lockdep_current_cpu_online(void)
1161{
1162 struct rcu_data *rdp;
1163 struct rcu_node *rnp;
1164 bool ret = false;
1165
1166 if (in_nmi() || !rcu_scheduler_fully_active)
1167 return true;
1168 preempt_disable_notrace();
1169 rdp = this_cpu_ptr(&rcu_data);
1170 rnp = rdp->mynode;
1171 if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
1172 ret = true;
1173 preempt_enable_notrace();
1174 return ret;
1175}
1176EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
1177
1178#endif
1179
1180
1181
1182
1183
1184
1185
1186
1187static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
1188{
1189 raw_lockdep_assert_held_rcu_node(rnp);
1190 if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
1191 rnp->gp_seq))
1192 WRITE_ONCE(rdp->gpwrap, true);
1193 if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
1194 rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
1195}
1196
1197
1198
1199
1200
1201
1202static int dyntick_save_progress_counter(struct rcu_data *rdp)
1203{
1204 rdp->dynticks_snap = rcu_dynticks_snap(rdp);
1205 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
1206 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1207 rcu_gpnum_ovf(rdp->mynode, rdp);
1208 return 1;
1209 }
1210 return 0;
1211}
1212
1213
1214
1215
1216
1217
1218
1219static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1220{
1221 unsigned long jtsq;
1222 bool *rnhqp;
1223 bool *ruqp;
1224 struct rcu_node *rnp = rdp->mynode;
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234 if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) {
1235 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1236 rcu_gpnum_ovf(rnp, rdp);
1237 return 1;
1238 }
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258 if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) {
1259 bool onl;
1260 struct rcu_node *rnp1;
1261
1262 pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n",
1263 __func__, rnp->grplo, rnp->grphi, rnp->level,
1264 (long)rnp->gp_seq, (long)rnp->completedqs);
1265 for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent)
1266 pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n",
1267 __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask);
1268 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
1269 pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n",
1270 __func__, rdp->cpu, ".o"[onl],
1271 (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags,
1272 (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags);
1273 return 1;
1274 }
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287 jtsq = READ_ONCE(jiffies_to_sched_qs);
1288 ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu);
1289 rnhqp = per_cpu_ptr(&rcu_data.rcu_need_heavy_qs, rdp->cpu);
1290 if (!READ_ONCE(*rnhqp) &&
1291 (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
1292 time_after(jiffies, rcu_state.jiffies_resched) ||
1293 rcu_state.cbovld)) {
1294 WRITE_ONCE(*rnhqp, true);
1295
1296 smp_store_release(ruqp, true);
1297 } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
1298 WRITE_ONCE(*ruqp, true);
1299 }
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309 if (tick_nohz_full_cpu(rdp->cpu) &&
1310 (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
1311 rcu_state.cbovld)) {
1312 WRITE_ONCE(*ruqp, true);
1313 resched_cpu(rdp->cpu);
1314 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1315 }
1316
1317
1318
1319
1320
1321
1322
1323
1324 if (time_after(jiffies, rcu_state.jiffies_resched)) {
1325 if (time_after(jiffies,
1326 READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
1327 resched_cpu(rdp->cpu);
1328 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1329 }
1330 if (IS_ENABLED(CONFIG_IRQ_WORK) &&
1331 !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
1332 (rnp->ffmask & rdp->grpmask)) {
1333 rdp->rcu_iw_pending = true;
1334 rdp->rcu_iw_gp_seq = rnp->gp_seq;
1335 irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
1336 }
1337 }
1338
1339 return 0;
1340}
1341
1342
1343static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1344 unsigned long gp_seq_req, const char *s)
1345{
1346 trace_rcu_future_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
1347 gp_seq_req, rnp->level,
1348 rnp->grplo, rnp->grphi, s);
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
1368 unsigned long gp_seq_req)
1369{
1370 bool ret = false;
1371 struct rcu_node *rnp;
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382 raw_lockdep_assert_held_rcu_node(rnp_start);
1383 trace_rcu_this_gp(rnp_start, rdp, gp_seq_req, TPS("Startleaf"));
1384 for (rnp = rnp_start; 1; rnp = rnp->parent) {
1385 if (rnp != rnp_start)
1386 raw_spin_lock_rcu_node(rnp);
1387 if (ULONG_CMP_GE(rnp->gp_seq_needed, gp_seq_req) ||
1388 rcu_seq_started(&rnp->gp_seq, gp_seq_req) ||
1389 (rnp != rnp_start &&
1390 rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))) {
1391 trace_rcu_this_gp(rnp, rdp, gp_seq_req,
1392 TPS("Prestarted"));
1393 goto unlock_out;
1394 }
1395 WRITE_ONCE(rnp->gp_seq_needed, gp_seq_req);
1396 if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {
1397
1398
1399
1400
1401
1402
1403 trace_rcu_this_gp(rnp_start, rdp, gp_seq_req,
1404 TPS("Startedleaf"));
1405 goto unlock_out;
1406 }
1407 if (rnp != rnp_start && rnp->parent != NULL)
1408 raw_spin_unlock_rcu_node(rnp);
1409 if (!rnp->parent)
1410 break;
1411 }
1412
1413
1414 if (rcu_gp_in_progress()) {
1415 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot"));
1416 goto unlock_out;
1417 }
1418 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot"));
1419 WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT);
1420 WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
1421 if (!READ_ONCE(rcu_state.gp_kthread)) {
1422 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
1423 goto unlock_out;
1424 }
1425 trace_rcu_grace_period(rcu_state.name, data_race(rcu_state.gp_seq), TPS("newreq"));
1426 ret = true;
1427unlock_out:
1428
1429 if (ULONG_CMP_LT(gp_seq_req, rnp->gp_seq_needed)) {
1430 WRITE_ONCE(rnp_start->gp_seq_needed, rnp->gp_seq_needed);
1431 WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
1432 }
1433 if (rnp != rnp_start)
1434 raw_spin_unlock_rcu_node(rnp);
1435 return ret;
1436}
1437
1438
1439
1440
1441
1442static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
1443{
1444 bool needmore;
1445 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1446
1447 needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
1448 if (!needmore)
1449 rnp->gp_seq_needed = rnp->gp_seq;
1450 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq,
1451 needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1452 return needmore;
1453}
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470static void rcu_gp_kthread_wake(void)
1471{
1472 struct task_struct *t = READ_ONCE(rcu_state.gp_kthread);
1473
1474 if ((current == t && !in_irq() && !in_serving_softirq()) ||
1475 !READ_ONCE(rcu_state.gp_flags) || !t)
1476 return;
1477 WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
1478 WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
1479 swake_up_one(&rcu_state.gp_wq);
1480}
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
1495{
1496 unsigned long gp_seq_req;
1497 bool ret = false;
1498
1499 rcu_lockdep_assert_cblist_protected(rdp);
1500 raw_lockdep_assert_held_rcu_node(rnp);
1501
1502
1503 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1504 return false;
1505
1506 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518 gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq);
1519 if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))
1520 ret = rcu_start_this_gp(rnp, rdp, gp_seq_req);
1521
1522
1523 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1524 trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccWaitCB"));
1525 else
1526 trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));
1527
1528 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));
1529
1530 return ret;
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
1541 struct rcu_data *rdp)
1542{
1543 unsigned long c;
1544 bool needwake;
1545
1546 rcu_lockdep_assert_cblist_protected(rdp);
1547 c = rcu_seq_snap(&rcu_state.gp_seq);
1548 if (!READ_ONCE(rdp->gpwrap) && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
1549
1550 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1551 return;
1552 }
1553 raw_spin_lock_rcu_node(rnp);
1554 needwake = rcu_accelerate_cbs(rnp, rdp);
1555 raw_spin_unlock_rcu_node(rnp);
1556 if (needwake)
1557 rcu_gp_kthread_wake();
1558}
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
1571{
1572 rcu_lockdep_assert_cblist_protected(rdp);
1573 raw_lockdep_assert_held_rcu_node(rnp);
1574
1575
1576 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1577 return false;
1578
1579
1580
1581
1582
1583 rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq);
1584
1585
1586 return rcu_accelerate_cbs(rnp, rdp);
1587}
1588
1589
1590
1591
1592
1593static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
1594 struct rcu_data *rdp)
1595{
1596 rcu_lockdep_assert_cblist_protected(rdp);
1597 if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
1598 !raw_spin_trylock_rcu_node(rnp))
1599 return;
1600 WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
1601 raw_spin_unlock_rcu_node(rnp);
1602}
1603
1604
1605
1606
1607
1608
1609static void rcu_strict_gp_check_qs(void)
1610{
1611 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
1612 rcu_read_lock();
1613 rcu_read_unlock();
1614 }
1615}
1616
1617
1618
1619
1620
1621
1622
1623static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
1624{
1625 bool ret = false;
1626 bool need_qs;
1627 const bool offloaded = rcu_rdp_is_offloaded(rdp);
1628
1629 raw_lockdep_assert_held_rcu_node(rnp);
1630
1631 if (rdp->gp_seq == rnp->gp_seq)
1632 return false;
1633
1634
1635 if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
1636 unlikely(READ_ONCE(rdp->gpwrap))) {
1637 if (!offloaded)
1638 ret = rcu_advance_cbs(rnp, rdp);
1639 rdp->core_needs_qs = false;
1640 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));
1641 } else {
1642 if (!offloaded)
1643 ret = rcu_accelerate_cbs(rnp, rdp);
1644 if (rdp->core_needs_qs)
1645 rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
1646 }
1647
1648
1649 if (rcu_seq_new_gp(rdp->gp_seq, rnp->gp_seq) ||
1650 unlikely(READ_ONCE(rdp->gpwrap))) {
1651
1652
1653
1654
1655
1656 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
1657 need_qs = !!(rnp->qsmask & rdp->grpmask);
1658 rdp->cpu_no_qs.b.norm = need_qs;
1659 rdp->core_needs_qs = need_qs;
1660 zero_cpu_stall_ticks(rdp);
1661 }
1662 rdp->gp_seq = rnp->gp_seq;
1663 if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) || rdp->gpwrap)
1664 WRITE_ONCE(rdp->gp_seq_needed, rnp->gp_seq_needed);
1665 WRITE_ONCE(rdp->gpwrap, false);
1666 rcu_gpnum_ovf(rnp, rdp);
1667 return ret;
1668}
1669
1670static void note_gp_changes(struct rcu_data *rdp)
1671{
1672 unsigned long flags;
1673 bool needwake;
1674 struct rcu_node *rnp;
1675
1676 local_irq_save(flags);
1677 rnp = rdp->mynode;
1678 if ((rdp->gp_seq == rcu_seq_current(&rnp->gp_seq) &&
1679 !unlikely(READ_ONCE(rdp->gpwrap))) ||
1680 !raw_spin_trylock_rcu_node(rnp)) {
1681 local_irq_restore(flags);
1682 return;
1683 }
1684 needwake = __note_gp_changes(rnp, rdp);
1685 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1686 rcu_strict_gp_check_qs();
1687 if (needwake)
1688 rcu_gp_kthread_wake();
1689}
1690
1691static void rcu_gp_slow(int delay)
1692{
1693 if (delay > 0 &&
1694 !(rcu_seq_ctr(rcu_state.gp_seq) %
1695 (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
1696 schedule_timeout_idle(delay);
1697}
1698
1699static unsigned long sleep_duration;
1700
1701
1702void rcu_gp_set_torture_wait(int duration)
1703{
1704 if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST) && duration > 0)
1705 WRITE_ONCE(sleep_duration, duration);
1706}
1707EXPORT_SYMBOL_GPL(rcu_gp_set_torture_wait);
1708
1709
1710static void rcu_gp_torture_wait(void)
1711{
1712 unsigned long duration;
1713
1714 if (!IS_ENABLED(CONFIG_RCU_TORTURE_TEST))
1715 return;
1716 duration = xchg(&sleep_duration, 0UL);
1717 if (duration > 0) {
1718 pr_alert("%s: Waiting %lu jiffies\n", __func__, duration);
1719 schedule_timeout_idle(duration);
1720 pr_alert("%s: Wait complete\n", __func__);
1721 }
1722}
1723
1724
1725
1726
1727
1728static void rcu_strict_gp_boundary(void *unused)
1729{
1730 invoke_rcu_core();
1731}
1732
1733
1734
1735
1736static noinline_for_stack bool rcu_gp_init(void)
1737{
1738 unsigned long firstseq;
1739 unsigned long flags;
1740 unsigned long oldmask;
1741 unsigned long mask;
1742 struct rcu_data *rdp;
1743 struct rcu_node *rnp = rcu_get_root();
1744
1745 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1746 raw_spin_lock_irq_rcu_node(rnp);
1747 if (!READ_ONCE(rcu_state.gp_flags)) {
1748
1749 raw_spin_unlock_irq_rcu_node(rnp);
1750 return false;
1751 }
1752 WRITE_ONCE(rcu_state.gp_flags, 0);
1753
1754 if (WARN_ON_ONCE(rcu_gp_in_progress())) {
1755
1756
1757
1758
1759 raw_spin_unlock_irq_rcu_node(rnp);
1760 return false;
1761 }
1762
1763
1764 record_gp_stall_check_time();
1765
1766 rcu_seq_start(&rcu_state.gp_seq);
1767 ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
1768 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
1769 raw_spin_unlock_irq_rcu_node(rnp);
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780 WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
1781 rcu_for_each_leaf_node(rnp) {
1782 smp_mb();
1783 firstseq = READ_ONCE(rnp->ofl_seq);
1784 if (firstseq & 0x1)
1785 while (firstseq == READ_ONCE(rnp->ofl_seq))
1786 schedule_timeout_idle(1);
1787 smp_mb();
1788 raw_spin_lock(&rcu_state.ofl_lock);
1789 raw_spin_lock_irq_rcu_node(rnp);
1790 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
1791 !rnp->wait_blkd_tasks) {
1792
1793 raw_spin_unlock_irq_rcu_node(rnp);
1794 raw_spin_unlock(&rcu_state.ofl_lock);
1795 continue;
1796 }
1797
1798
1799 oldmask = rnp->qsmaskinit;
1800 rnp->qsmaskinit = rnp->qsmaskinitnext;
1801
1802
1803 if (!oldmask != !rnp->qsmaskinit) {
1804 if (!oldmask) {
1805 if (!rnp->wait_blkd_tasks)
1806 rcu_init_new_rnp(rnp);
1807 } else if (rcu_preempt_has_tasks(rnp)) {
1808 rnp->wait_blkd_tasks = true;
1809 } else {
1810 rcu_cleanup_dead_rnp(rnp);
1811 }
1812 }
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822 if (rnp->wait_blkd_tasks &&
1823 (!rcu_preempt_has_tasks(rnp) || rnp->qsmaskinit)) {
1824 rnp->wait_blkd_tasks = false;
1825 if (!rnp->qsmaskinit)
1826 rcu_cleanup_dead_rnp(rnp);
1827 }
1828
1829 raw_spin_unlock_irq_rcu_node(rnp);
1830 raw_spin_unlock(&rcu_state.ofl_lock);
1831 }
1832 rcu_gp_slow(gp_preinit_delay);
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846 WRITE_ONCE(rcu_state.gp_state, RCU_GP_INIT);
1847 rcu_for_each_node_breadth_first(rnp) {
1848 rcu_gp_slow(gp_init_delay);
1849 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1850 rdp = this_cpu_ptr(&rcu_data);
1851 rcu_preempt_check_blocked_tasks(rnp);
1852 rnp->qsmask = rnp->qsmaskinit;
1853 WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq);
1854 if (rnp == rdp->mynode)
1855 (void)__note_gp_changes(rnp, rdp);
1856 rcu_preempt_boost_start_gp(rnp);
1857 trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq,
1858 rnp->level, rnp->grplo,
1859 rnp->grphi, rnp->qsmask);
1860
1861 mask = rnp->qsmask & ~rnp->qsmaskinitnext;
1862 rnp->rcu_gp_init_mask = mask;
1863 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
1864 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
1865 else
1866 raw_spin_unlock_irq_rcu_node(rnp);
1867 cond_resched_tasks_rcu_qs();
1868 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1869 }
1870
1871
1872 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
1873 on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
1874
1875 return true;
1876}
1877
1878
1879
1880
1881
1882static bool rcu_gp_fqs_check_wake(int *gfp)
1883{
1884 struct rcu_node *rnp = rcu_get_root();
1885
1886
1887 if (*gfp & RCU_GP_FLAG_OVLD)
1888 return true;
1889
1890
1891 *gfp = READ_ONCE(rcu_state.gp_flags);
1892 if (*gfp & RCU_GP_FLAG_FQS)
1893 return true;
1894
1895
1896 if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
1897 return true;
1898
1899 return false;
1900}
1901
1902
1903
1904
1905static void rcu_gp_fqs(bool first_time)
1906{
1907 struct rcu_node *rnp = rcu_get_root();
1908
1909 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1910 rcu_state.n_force_qs++;
1911 if (first_time) {
1912
1913 force_qs_rnp(dyntick_save_progress_counter);
1914 } else {
1915
1916 force_qs_rnp(rcu_implicit_dynticks_qs);
1917 }
1918
1919 if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
1920 raw_spin_lock_irq_rcu_node(rnp);
1921 WRITE_ONCE(rcu_state.gp_flags,
1922 READ_ONCE(rcu_state.gp_flags) & ~RCU_GP_FLAG_FQS);
1923 raw_spin_unlock_irq_rcu_node(rnp);
1924 }
1925}
1926
1927
1928
1929
1930static noinline_for_stack void rcu_gp_fqs_loop(void)
1931{
1932 bool first_gp_fqs;
1933 int gf = 0;
1934 unsigned long j;
1935 int ret;
1936 struct rcu_node *rnp = rcu_get_root();
1937
1938 first_gp_fqs = true;
1939 j = READ_ONCE(jiffies_till_first_fqs);
1940 if (rcu_state.cbovld)
1941 gf = RCU_GP_FLAG_OVLD;
1942 ret = 0;
1943 for (;;) {
1944 if (!ret) {
1945 WRITE_ONCE(rcu_state.jiffies_force_qs, jiffies + j);
1946
1947
1948
1949
1950 smp_wmb();
1951 WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
1952 jiffies + (j ? 3 * j : 2));
1953 }
1954 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1955 TPS("fqswait"));
1956 WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_FQS);
1957 (void)swait_event_idle_timeout_exclusive(rcu_state.gp_wq,
1958 rcu_gp_fqs_check_wake(&gf), j);
1959 rcu_gp_torture_wait();
1960 WRITE_ONCE(rcu_state.gp_state, RCU_GP_DOING_FQS);
1961
1962
1963 if (!READ_ONCE(rnp->qsmask) &&
1964 !rcu_preempt_blocked_readers_cgp(rnp))
1965 break;
1966
1967 if (!time_after(rcu_state.jiffies_force_qs, jiffies) ||
1968 (gf & (RCU_GP_FLAG_FQS | RCU_GP_FLAG_OVLD))) {
1969 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1970 TPS("fqsstart"));
1971 rcu_gp_fqs(first_gp_fqs);
1972 gf = 0;
1973 if (first_gp_fqs) {
1974 first_gp_fqs = false;
1975 gf = rcu_state.cbovld ? RCU_GP_FLAG_OVLD : 0;
1976 }
1977 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1978 TPS("fqsend"));
1979 cond_resched_tasks_rcu_qs();
1980 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1981 ret = 0;
1982 j = READ_ONCE(jiffies_till_next_fqs);
1983 } else {
1984
1985 cond_resched_tasks_rcu_qs();
1986 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1987 WARN_ON(signal_pending(current));
1988 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
1989 TPS("fqswaitsig"));
1990 ret = 1;
1991 j = jiffies;
1992 if (time_after(jiffies, rcu_state.jiffies_force_qs))
1993 j = 1;
1994 else
1995 j = rcu_state.jiffies_force_qs - j;
1996 gf = 0;
1997 }
1998 }
1999}
2000
2001
2002
2003
2004static noinline void rcu_gp_cleanup(void)
2005{
2006 int cpu;
2007 bool needgp = false;
2008 unsigned long gp_duration;
2009 unsigned long new_gp_seq;
2010 bool offloaded;
2011 struct rcu_data *rdp;
2012 struct rcu_node *rnp = rcu_get_root();
2013 struct swait_queue_head *sq;
2014
2015 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2016 raw_spin_lock_irq_rcu_node(rnp);
2017 rcu_state.gp_end = jiffies;
2018 gp_duration = rcu_state.gp_end - rcu_state.gp_start;
2019 if (gp_duration > rcu_state.gp_max)
2020 rcu_state.gp_max = gp_duration;
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030 raw_spin_unlock_irq_rcu_node(rnp);
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041 new_gp_seq = rcu_state.gp_seq;
2042 rcu_seq_end(&new_gp_seq);
2043 rcu_for_each_node_breadth_first(rnp) {
2044 raw_spin_lock_irq_rcu_node(rnp);
2045 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
2046 dump_blkd_tasks(rnp, 10);
2047 WARN_ON_ONCE(rnp->qsmask);
2048 WRITE_ONCE(rnp->gp_seq, new_gp_seq);
2049 rdp = this_cpu_ptr(&rcu_data);
2050 if (rnp == rdp->mynode)
2051 needgp = __note_gp_changes(rnp, rdp) || needgp;
2052
2053 needgp = rcu_future_gp_cleanup(rnp) || needgp;
2054
2055 if (rcu_is_leaf_node(rnp))
2056 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->cbovldmask) {
2057 rdp = per_cpu_ptr(&rcu_data, cpu);
2058 check_cb_ovld_locked(rdp, rnp);
2059 }
2060 sq = rcu_nocb_gp_get(rnp);
2061 raw_spin_unlock_irq_rcu_node(rnp);
2062 rcu_nocb_gp_cleanup(sq);
2063 cond_resched_tasks_rcu_qs();
2064 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2065 rcu_gp_slow(gp_cleanup_delay);
2066 }
2067 rnp = rcu_get_root();
2068 raw_spin_lock_irq_rcu_node(rnp);
2069
2070
2071 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
2072 rcu_seq_end(&rcu_state.gp_seq);
2073 ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
2074 WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE);
2075
2076 rdp = this_cpu_ptr(&rcu_data);
2077 if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
2078 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,
2079 TPS("CleanupMore"));
2080 needgp = true;
2081 }
2082
2083 offloaded = rcu_rdp_is_offloaded(rdp);
2084 if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
2085 WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
2086 WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
2087 trace_rcu_grace_period(rcu_state.name,
2088 rcu_state.gp_seq,
2089 TPS("newreq"));
2090 } else {
2091 WRITE_ONCE(rcu_state.gp_flags,
2092 rcu_state.gp_flags & RCU_GP_FLAG_INIT);
2093 }
2094 raw_spin_unlock_irq_rcu_node(rnp);
2095
2096
2097 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
2098 on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
2099}
2100
2101
2102
2103
2104static int __noreturn rcu_gp_kthread(void *unused)
2105{
2106 rcu_bind_gp_kthread();
2107 for (;;) {
2108
2109
2110 for (;;) {
2111 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
2112 TPS("reqwait"));
2113 WRITE_ONCE(rcu_state.gp_state, RCU_GP_WAIT_GPS);
2114 swait_event_idle_exclusive(rcu_state.gp_wq,
2115 READ_ONCE(rcu_state.gp_flags) &
2116 RCU_GP_FLAG_INIT);
2117 rcu_gp_torture_wait();
2118 WRITE_ONCE(rcu_state.gp_state, RCU_GP_DONE_GPS);
2119
2120 if (rcu_gp_init())
2121 break;
2122 cond_resched_tasks_rcu_qs();
2123 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2124 WARN_ON(signal_pending(current));
2125 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq,
2126 TPS("reqwaitsig"));
2127 }
2128
2129
2130 rcu_gp_fqs_loop();
2131
2132
2133 WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANUP);
2134 rcu_gp_cleanup();
2135 WRITE_ONCE(rcu_state.gp_state, RCU_GP_CLEANED);
2136 }
2137}
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148static void rcu_report_qs_rsp(unsigned long flags)
2149 __releases(rcu_get_root()->lock)
2150{
2151 raw_lockdep_assert_held_rcu_node(rcu_get_root());
2152 WARN_ON_ONCE(!rcu_gp_in_progress());
2153 WRITE_ONCE(rcu_state.gp_flags,
2154 READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
2155 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(), flags);
2156 rcu_gp_kthread_wake();
2157}
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
2174 unsigned long gps, unsigned long flags)
2175 __releases(rnp->lock)
2176{
2177 unsigned long oldmask = 0;
2178 struct rcu_node *rnp_c;
2179
2180 raw_lockdep_assert_held_rcu_node(rnp);
2181
2182
2183 for (;;) {
2184 if ((!(rnp->qsmask & mask) && mask) || rnp->gp_seq != gps) {
2185
2186
2187
2188
2189
2190 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2191 return;
2192 }
2193 WARN_ON_ONCE(oldmask);
2194 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
2195 rcu_preempt_blocked_readers_cgp(rnp));
2196 WRITE_ONCE(rnp->qsmask, rnp->qsmask & ~mask);
2197 trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq,
2198 mask, rnp->qsmask, rnp->level,
2199 rnp->grplo, rnp->grphi,
2200 !!rnp->gp_tasks);
2201 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
2202
2203
2204 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2205 return;
2206 }
2207 rnp->completedqs = rnp->gp_seq;
2208 mask = rnp->grpmask;
2209 if (rnp->parent == NULL) {
2210
2211
2212
2213 break;
2214 }
2215 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2216 rnp_c = rnp;
2217 rnp = rnp->parent;
2218 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2219 oldmask = READ_ONCE(rnp_c->qsmask);
2220 }
2221
2222
2223
2224
2225
2226
2227 rcu_report_qs_rsp(flags);
2228}
2229
2230
2231
2232
2233
2234
2235
2236
2237static void __maybe_unused
2238rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
2239 __releases(rnp->lock)
2240{
2241 unsigned long gps;
2242 unsigned long mask;
2243 struct rcu_node *rnp_p;
2244
2245 raw_lockdep_assert_held_rcu_node(rnp);
2246 if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RCU)) ||
2247 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
2248 rnp->qsmask != 0) {
2249 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2250 return;
2251 }
2252
2253 rnp->completedqs = rnp->gp_seq;
2254 rnp_p = rnp->parent;
2255 if (rnp_p == NULL) {
2256
2257
2258
2259
2260 rcu_report_qs_rsp(flags);
2261 return;
2262 }
2263
2264
2265 gps = rnp->gp_seq;
2266 mask = rnp->grpmask;
2267 raw_spin_unlock_rcu_node(rnp);
2268 raw_spin_lock_rcu_node(rnp_p);
2269 rcu_report_qs_rnp(mask, rnp_p, gps, flags);
2270}
2271
2272
2273
2274
2275
2276static void
2277rcu_report_qs_rdp(struct rcu_data *rdp)
2278{
2279 unsigned long flags;
2280 unsigned long mask;
2281 bool needwake = false;
2282 const bool offloaded = rcu_rdp_is_offloaded(rdp);
2283 struct rcu_node *rnp;
2284
2285 WARN_ON_ONCE(rdp->cpu != smp_processor_id());
2286 rnp = rdp->mynode;
2287 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2288 if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
2289 rdp->gpwrap) {
2290
2291
2292
2293
2294
2295
2296
2297 rdp->cpu_no_qs.b.norm = true;
2298 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2299 return;
2300 }
2301 mask = rdp->grpmask;
2302 rdp->core_needs_qs = false;
2303 if ((rnp->qsmask & mask) == 0) {
2304 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2305 } else {
2306
2307
2308
2309
2310 if (!offloaded)
2311 needwake = rcu_accelerate_cbs(rnp, rdp);
2312
2313 rcu_disable_urgency_upon_qs(rdp);
2314 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2315
2316 if (needwake)
2317 rcu_gp_kthread_wake();
2318 }
2319}
2320
2321
2322
2323
2324
2325
2326
2327static void
2328rcu_check_quiescent_state(struct rcu_data *rdp)
2329{
2330
2331 note_gp_changes(rdp);
2332
2333
2334
2335
2336
2337 if (!rdp->core_needs_qs)
2338 return;
2339
2340
2341
2342
2343
2344 if (rdp->cpu_no_qs.b.norm)
2345 return;
2346
2347
2348
2349
2350
2351 rcu_report_qs_rdp(rdp);
2352}
2353
2354
2355
2356
2357
2358int rcutree_dying_cpu(unsigned int cpu)
2359{
2360 bool blkd;
2361 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
2362 struct rcu_node *rnp = rdp->mynode;
2363
2364 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2365 return 0;
2366
2367 blkd = !!(rnp->qsmask & rdp->grpmask);
2368 trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
2369 blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
2370 return 0;
2371}
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2391{
2392 long mask;
2393 struct rcu_node *rnp = rnp_leaf;
2394
2395 raw_lockdep_assert_held_rcu_node(rnp_leaf);
2396 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2397 WARN_ON_ONCE(rnp_leaf->qsmaskinit) ||
2398 WARN_ON_ONCE(rcu_preempt_has_tasks(rnp_leaf)))
2399 return;
2400 for (;;) {
2401 mask = rnp->grpmask;
2402 rnp = rnp->parent;
2403 if (!rnp)
2404 break;
2405 raw_spin_lock_rcu_node(rnp);
2406 rnp->qsmaskinit &= ~mask;
2407
2408 WARN_ON_ONCE(rnp->qsmask);
2409 if (rnp->qsmaskinit) {
2410 raw_spin_unlock_rcu_node(rnp);
2411
2412 return;
2413 }
2414 raw_spin_unlock_rcu_node(rnp);
2415 }
2416}
2417
2418
2419
2420
2421
2422
2423
2424int rcutree_dead_cpu(unsigned int cpu)
2425{
2426 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2427 struct rcu_node *rnp = rdp->mynode;
2428
2429 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2430 return 0;
2431
2432 WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
2433
2434 rcu_boost_kthread_setaffinity(rnp, -1);
2435
2436 tick_dep_clear(TICK_DEP_BIT_RCU);
2437 return 0;
2438}
2439
2440
2441
2442
2443
2444static void rcu_do_batch(struct rcu_data *rdp)
2445{
2446 int div;
2447 bool __maybe_unused empty;
2448 unsigned long flags;
2449 const bool offloaded = rcu_rdp_is_offloaded(rdp);
2450 struct rcu_head *rhp;
2451 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
2452 long bl, count = 0;
2453 long pending, tlimit = 0;
2454
2455
2456 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
2457 trace_rcu_batch_start(rcu_state.name,
2458 rcu_segcblist_n_cbs(&rdp->cblist), 0);
2459 trace_rcu_batch_end(rcu_state.name, 0,
2460 !rcu_segcblist_empty(&rdp->cblist),
2461 need_resched(), is_idle_task(current),
2462 rcu_is_callbacks_kthread());
2463 return;
2464 }
2465
2466
2467
2468
2469
2470
2471 local_irq_save(flags);
2472 rcu_nocb_lock(rdp);
2473 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2474 pending = rcu_segcblist_n_cbs(&rdp->cblist);
2475 div = READ_ONCE(rcu_divisor);
2476 div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
2477 bl = max(rdp->blimit, pending >> div);
2478 if (unlikely(bl > 100)) {
2479 long rrn = READ_ONCE(rcu_resched_ns);
2480
2481 rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn;
2482 tlimit = local_clock() + rrn;
2483 }
2484 trace_rcu_batch_start(rcu_state.name,
2485 rcu_segcblist_n_cbs(&rdp->cblist), bl);
2486 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
2487 if (offloaded)
2488 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2489
2490 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
2491 rcu_nocb_unlock_irqrestore(rdp, flags);
2492
2493
2494 tick_dep_set_task(current, TICK_DEP_BIT_RCU);
2495 rhp = rcu_cblist_dequeue(&rcl);
2496
2497 for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
2498 rcu_callback_t f;
2499
2500 count++;
2501 debug_rcu_head_unqueue(rhp);
2502
2503 rcu_lock_acquire(&rcu_callback_map);
2504 trace_rcu_invoke_callback(rcu_state.name, rhp);
2505
2506 f = rhp->func;
2507 WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
2508 f(rhp);
2509
2510 rcu_lock_release(&rcu_callback_map);
2511
2512
2513
2514
2515 if (count >= bl && !offloaded &&
2516 (need_resched() ||
2517 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2518 break;
2519 if (unlikely(tlimit)) {
2520
2521 if (likely((count & 31) || local_clock() < tlimit))
2522 continue;
2523
2524 break;
2525 }
2526 if (!in_serving_softirq()) {
2527 local_bh_enable();
2528 lockdep_assert_irqs_enabled();
2529 cond_resched_tasks_rcu_qs();
2530 lockdep_assert_irqs_enabled();
2531 local_bh_disable();
2532 }
2533 }
2534
2535 local_irq_save(flags);
2536 rcu_nocb_lock(rdp);
2537 rdp->n_cbs_invoked += count;
2538 trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
2539 is_idle_task(current), rcu_is_callbacks_kthread());
2540
2541
2542 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
2543 rcu_segcblist_add_len(&rdp->cblist, -count);
2544
2545
2546 count = rcu_segcblist_n_cbs(&rdp->cblist);
2547 if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
2548 rdp->blimit = blimit;
2549
2550
2551 if (count == 0 && rdp->qlen_last_fqs_check != 0) {
2552 rdp->qlen_last_fqs_check = 0;
2553 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2554 } else if (count < rdp->qlen_last_fqs_check - qhimark)
2555 rdp->qlen_last_fqs_check = count;
2556
2557
2558
2559
2560
2561 empty = rcu_segcblist_empty(&rdp->cblist);
2562 WARN_ON_ONCE(count == 0 && !empty);
2563 WARN_ON_ONCE(!IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
2564 count != 0 && empty);
2565 WARN_ON_ONCE(count == 0 && rcu_segcblist_n_segment_cbs(&rdp->cblist) != 0);
2566 WARN_ON_ONCE(!empty && rcu_segcblist_n_segment_cbs(&rdp->cblist) == 0);
2567
2568 rcu_nocb_unlock_irqrestore(rdp, flags);
2569
2570
2571 if (!offloaded && rcu_segcblist_ready_cbs(&rdp->cblist))
2572 invoke_rcu_core();
2573 tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
2574}
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584void rcu_sched_clock_irq(int user)
2585{
2586 trace_rcu_utilization(TPS("Start scheduler-tick"));
2587 lockdep_assert_irqs_disabled();
2588 raw_cpu_inc(rcu_data.ticks_this_gp);
2589
2590 if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
2591
2592 if (!rcu_is_cpu_rrupt_from_idle() && !user) {
2593 set_tsk_need_resched(current);
2594 set_preempt_need_resched();
2595 }
2596 __this_cpu_write(rcu_data.rcu_urgent_qs, false);
2597 }
2598 rcu_flavor_sched_clock_irq(user);
2599 if (rcu_pending(user))
2600 invoke_rcu_core();
2601 lockdep_assert_irqs_disabled();
2602
2603 trace_rcu_utilization(TPS("End scheduler-tick"));
2604}
2605
2606
2607
2608
2609
2610
2611
2612
2613static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
2614{
2615 int cpu;
2616 unsigned long flags;
2617 unsigned long mask;
2618 struct rcu_data *rdp;
2619 struct rcu_node *rnp;
2620
2621 rcu_state.cbovld = rcu_state.cbovldnext;
2622 rcu_state.cbovldnext = false;
2623 rcu_for_each_leaf_node(rnp) {
2624 cond_resched_tasks_rcu_qs();
2625 mask = 0;
2626 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2627 rcu_state.cbovldnext |= !!rnp->cbovldmask;
2628 if (rnp->qsmask == 0) {
2629 if (rcu_preempt_blocked_readers_cgp(rnp)) {
2630
2631
2632
2633
2634
2635 rcu_initiate_boost(rnp, flags);
2636
2637 continue;
2638 }
2639 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2640 continue;
2641 }
2642 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {
2643 rdp = per_cpu_ptr(&rcu_data, cpu);
2644 if (f(rdp)) {
2645 mask |= rdp->grpmask;
2646 rcu_disable_urgency_upon_qs(rdp);
2647 }
2648 }
2649 if (mask != 0) {
2650
2651 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2652 } else {
2653
2654 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2655 }
2656 }
2657}
2658
2659
2660
2661
2662
2663void rcu_force_quiescent_state(void)
2664{
2665 unsigned long flags;
2666 bool ret;
2667 struct rcu_node *rnp;
2668 struct rcu_node *rnp_old = NULL;
2669
2670
2671 rnp = __this_cpu_read(rcu_data.mynode);
2672 for (; rnp != NULL; rnp = rnp->parent) {
2673 ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
2674 !raw_spin_trylock(&rnp->fqslock);
2675 if (rnp_old != NULL)
2676 raw_spin_unlock(&rnp_old->fqslock);
2677 if (ret)
2678 return;
2679 rnp_old = rnp;
2680 }
2681
2682
2683
2684 raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
2685 raw_spin_unlock(&rnp_old->fqslock);
2686 if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
2687 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2688 return;
2689 }
2690 WRITE_ONCE(rcu_state.gp_flags,
2691 READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
2692 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2693 rcu_gp_kthread_wake();
2694}
2695EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
2696
2697
2698
2699static void strict_work_handler(struct work_struct *work)
2700{
2701 rcu_read_lock();
2702 rcu_read_unlock();
2703}
2704
2705
2706static __latent_entropy void rcu_core(void)
2707{
2708 unsigned long flags;
2709 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
2710 struct rcu_node *rnp = rdp->mynode;
2711 const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
2712
2713 if (cpu_is_offline(smp_processor_id()))
2714 return;
2715 trace_rcu_utilization(TPS("Start RCU core"));
2716 WARN_ON_ONCE(!rdp->beenonline);
2717
2718
2719 if (!(preempt_count() & PREEMPT_MASK)) {
2720 rcu_preempt_deferred_qs(current);
2721 } else if (rcu_preempt_need_deferred_qs(current)) {
2722 set_tsk_need_resched(current);
2723 set_preempt_need_resched();
2724 }
2725
2726
2727 rcu_check_quiescent_state(rdp);
2728
2729
2730 if (!rcu_gp_in_progress() &&
2731 rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) {
2732 rcu_nocb_lock_irqsave(rdp, flags);
2733 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
2734 rcu_accelerate_cbs_unlocked(rnp, rdp);
2735 rcu_nocb_unlock_irqrestore(rdp, flags);
2736 }
2737
2738 rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
2739
2740
2741 if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
2742 likely(READ_ONCE(rcu_scheduler_fully_active)))
2743 rcu_do_batch(rdp);
2744
2745
2746 do_nocb_deferred_wakeup(rdp);
2747 trace_rcu_utilization(TPS("End RCU core"));
2748
2749
2750 if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
2751 queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
2752}
2753
2754static void rcu_core_si(struct softirq_action *h)
2755{
2756 rcu_core();
2757}
2758
2759static void rcu_wake_cond(struct task_struct *t, int status)
2760{
2761
2762
2763
2764
2765 if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
2766 wake_up_process(t);
2767}
2768
2769static void invoke_rcu_core_kthread(void)
2770{
2771 struct task_struct *t;
2772 unsigned long flags;
2773
2774 local_irq_save(flags);
2775 __this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
2776 t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
2777 if (t != NULL && t != current)
2778 rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
2779 local_irq_restore(flags);
2780}
2781
2782
2783
2784
2785static void invoke_rcu_core(void)
2786{
2787 if (!cpu_online(smp_processor_id()))
2788 return;
2789 if (use_softirq)
2790 raise_softirq(RCU_SOFTIRQ);
2791 else
2792 invoke_rcu_core_kthread();
2793}
2794
2795static void rcu_cpu_kthread_park(unsigned int cpu)
2796{
2797 per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
2798}
2799
2800static int rcu_cpu_kthread_should_run(unsigned int cpu)
2801{
2802 return __this_cpu_read(rcu_data.rcu_cpu_has_work);
2803}
2804
2805
2806
2807
2808
2809
2810static void rcu_cpu_kthread(unsigned int cpu)
2811{
2812 unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
2813 char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
2814 int spincnt;
2815
2816 trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
2817 for (spincnt = 0; spincnt < 10; spincnt++) {
2818 local_bh_disable();
2819 *statusp = RCU_KTHREAD_RUNNING;
2820 local_irq_disable();
2821 work = *workp;
2822 *workp = 0;
2823 local_irq_enable();
2824 if (work)
2825 rcu_core();
2826 local_bh_enable();
2827 if (*workp == 0) {
2828 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
2829 *statusp = RCU_KTHREAD_WAITING;
2830 return;
2831 }
2832 }
2833 *statusp = RCU_KTHREAD_YIELDING;
2834 trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
2835 schedule_timeout_idle(2);
2836 trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
2837 *statusp = RCU_KTHREAD_WAITING;
2838}
2839
2840static struct smp_hotplug_thread rcu_cpu_thread_spec = {
2841 .store = &rcu_data.rcu_cpu_kthread_task,
2842 .thread_should_run = rcu_cpu_kthread_should_run,
2843 .thread_fn = rcu_cpu_kthread,
2844 .thread_comm = "rcuc/%u",
2845 .setup = rcu_cpu_kthread_setup,
2846 .park = rcu_cpu_kthread_park,
2847};
2848
2849
2850
2851
2852static int __init rcu_spawn_core_kthreads(void)
2853{
2854 int cpu;
2855
2856 for_each_possible_cpu(cpu)
2857 per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
2858 if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
2859 return 0;
2860 WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
2861 "%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
2862 return 0;
2863}
2864
2865
2866
2867
2868static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
2869 unsigned long flags)
2870{
2871
2872
2873
2874
2875 if (!rcu_is_watching())
2876 invoke_rcu_core();
2877
2878
2879 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
2880 return;
2881
2882
2883
2884
2885
2886
2887
2888
2889 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
2890 rdp->qlen_last_fqs_check + qhimark)) {
2891
2892
2893 note_gp_changes(rdp);
2894
2895
2896 if (!rcu_gp_in_progress()) {
2897 rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
2898 } else {
2899
2900 rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
2901 if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
2902 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
2903 rcu_force_quiescent_state();
2904 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2905 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2906 }
2907 }
2908}
2909
2910
2911
2912
2913static void rcu_leak_callback(struct rcu_head *rhp)
2914{
2915}
2916
2917
2918
2919
2920
2921
2922
2923static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp)
2924{
2925 raw_lockdep_assert_held_rcu_node(rnp);
2926 if (qovld_calc <= 0)
2927 return;
2928 if (rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc)
2929 WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask | rdp->grpmask);
2930 else
2931 WRITE_ONCE(rnp->cbovldmask, rnp->cbovldmask & ~rdp->grpmask);
2932}
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946static void check_cb_ovld(struct rcu_data *rdp)
2947{
2948 struct rcu_node *const rnp = rdp->mynode;
2949
2950 if (qovld_calc <= 0 ||
2951 ((rcu_segcblist_n_cbs(&rdp->cblist) >= qovld_calc) ==
2952 !!(READ_ONCE(rnp->cbovldmask) & rdp->grpmask)))
2953 return;
2954 raw_spin_lock_rcu_node(rnp);
2955 check_cb_ovld_locked(rdp, rnp);
2956 raw_spin_unlock_rcu_node(rnp);
2957}
2958
2959
2960static void
2961__call_rcu(struct rcu_head *head, rcu_callback_t func)
2962{
2963 static atomic_t doublefrees;
2964 unsigned long flags;
2965 struct rcu_data *rdp;
2966 bool was_alldone;
2967
2968
2969 WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
2970
2971 if (debug_rcu_head_queue(head)) {
2972
2973
2974
2975
2976
2977 if (atomic_inc_return(&doublefrees) < 4) {
2978 pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
2979 mem_dump_obj(head);
2980 }
2981 WRITE_ONCE(head->func, rcu_leak_callback);
2982 return;
2983 }
2984 head->func = func;
2985 head->next = NULL;
2986 local_irq_save(flags);
2987 kasan_record_aux_stack(head);
2988 rdp = this_cpu_ptr(&rcu_data);
2989
2990
2991 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
2992
2993 WARN_ON_ONCE(rcu_scheduler_active != RCU_SCHEDULER_INACTIVE);
2994 WARN_ON_ONCE(!rcu_is_watching());
2995
2996
2997 if (rcu_segcblist_empty(&rdp->cblist))
2998 rcu_segcblist_init(&rdp->cblist);
2999 }
3000
3001 check_cb_ovld(rdp);
3002 if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
3003 return;
3004
3005 rcu_segcblist_enqueue(&rdp->cblist, head);
3006 if (__is_kvfree_rcu_offset((unsigned long)func))
3007 trace_rcu_kvfree_callback(rcu_state.name, head,
3008 (unsigned long)func,
3009 rcu_segcblist_n_cbs(&rdp->cblist));
3010 else
3011 trace_rcu_callback(rcu_state.name, head,
3012 rcu_segcblist_n_cbs(&rdp->cblist));
3013
3014 trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
3015
3016
3017 if (unlikely(rcu_rdp_is_offloaded(rdp))) {
3018 __call_rcu_nocb_wake(rdp, was_alldone, flags);
3019 } else {
3020 __call_rcu_core(rdp, head, flags);
3021 local_irq_restore(flags);
3022 }
3023}
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065void call_rcu(struct rcu_head *head, rcu_callback_t func)
3066{
3067 __call_rcu(head, func);
3068}
3069EXPORT_SYMBOL_GPL(call_rcu);
3070
3071
3072
3073#define KFREE_DRAIN_JIFFIES (HZ / 50)
3074#define KFREE_N_BATCHES 2
3075#define FREE_N_CHANNELS 2
3076
3077
3078
3079
3080
3081
3082
3083struct kvfree_rcu_bulk_data {
3084 unsigned long nr_records;
3085 struct kvfree_rcu_bulk_data *next;
3086 void *records[];
3087};
3088
3089
3090
3091
3092
3093
3094#define KVFREE_BULK_MAX_ENTR \
3095 ((PAGE_SIZE - sizeof(struct kvfree_rcu_bulk_data)) / sizeof(void *))
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105struct kfree_rcu_cpu_work {
3106 struct rcu_work rcu_work;
3107 struct rcu_head *head_free;
3108 struct kvfree_rcu_bulk_data *bkvhead_free[FREE_N_CHANNELS];
3109 struct kfree_rcu_cpu *krcp;
3110};
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138struct kfree_rcu_cpu {
3139 struct rcu_head *head;
3140 struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS];
3141 struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
3142 raw_spinlock_t lock;
3143 struct delayed_work monitor_work;
3144 bool monitor_todo;
3145 bool initialized;
3146 int count;
3147
3148 struct delayed_work page_cache_work;
3149 atomic_t backoff_page_cache_fill;
3150 atomic_t work_in_progress;
3151 struct hrtimer hrtimer;
3152
3153 struct llist_head bkvcache;
3154 int nr_bkv_objs;
3155};
3156
3157static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc) = {
3158 .lock = __RAW_SPIN_LOCK_UNLOCKED(krc.lock),
3159};
3160
3161static __always_inline void
3162debug_rcu_bhead_unqueue(struct kvfree_rcu_bulk_data *bhead)
3163{
3164#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
3165 int i;
3166
3167 for (i = 0; i < bhead->nr_records; i++)
3168 debug_rcu_head_unqueue((struct rcu_head *)(bhead->records[i]));
3169#endif
3170}
3171
3172static inline struct kfree_rcu_cpu *
3173krc_this_cpu_lock(unsigned long *flags)
3174{
3175 struct kfree_rcu_cpu *krcp;
3176
3177 local_irq_save(*flags);
3178 krcp = this_cpu_ptr(&krc);
3179 raw_spin_lock(&krcp->lock);
3180
3181 return krcp;
3182}
3183
3184static inline void
3185krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags)
3186{
3187 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3188}
3189
3190static inline struct kvfree_rcu_bulk_data *
3191get_cached_bnode(struct kfree_rcu_cpu *krcp)
3192{
3193 if (!krcp->nr_bkv_objs)
3194 return NULL;
3195
3196 WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs - 1);
3197 return (struct kvfree_rcu_bulk_data *)
3198 llist_del_first(&krcp->bkvcache);
3199}
3200
3201static inline bool
3202put_cached_bnode(struct kfree_rcu_cpu *krcp,
3203 struct kvfree_rcu_bulk_data *bnode)
3204{
3205
3206 if (krcp->nr_bkv_objs >= rcu_min_cached_objs)
3207 return false;
3208
3209 llist_add((struct llist_node *) bnode, &krcp->bkvcache);
3210 WRITE_ONCE(krcp->nr_bkv_objs, krcp->nr_bkv_objs + 1);
3211 return true;
3212}
3213
3214static int
3215drain_page_cache(struct kfree_rcu_cpu *krcp)
3216{
3217 unsigned long flags;
3218 struct llist_node *page_list, *pos, *n;
3219 int freed = 0;
3220
3221 raw_spin_lock_irqsave(&krcp->lock, flags);
3222 page_list = llist_del_all(&krcp->bkvcache);
3223 WRITE_ONCE(krcp->nr_bkv_objs, 0);
3224 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3225
3226 llist_for_each_safe(pos, n, page_list) {
3227 free_page((unsigned long)pos);
3228 freed++;
3229 }
3230
3231 return freed;
3232}
3233
3234
3235
3236
3237
3238static void kfree_rcu_work(struct work_struct *work)
3239{
3240 unsigned long flags;
3241 struct kvfree_rcu_bulk_data *bkvhead[FREE_N_CHANNELS], *bnext;
3242 struct rcu_head *head, *next;
3243 struct kfree_rcu_cpu *krcp;
3244 struct kfree_rcu_cpu_work *krwp;
3245 int i, j;
3246
3247 krwp = container_of(to_rcu_work(work),
3248 struct kfree_rcu_cpu_work, rcu_work);
3249 krcp = krwp->krcp;
3250
3251 raw_spin_lock_irqsave(&krcp->lock, flags);
3252
3253 for (i = 0; i < FREE_N_CHANNELS; i++) {
3254 bkvhead[i] = krwp->bkvhead_free[i];
3255 krwp->bkvhead_free[i] = NULL;
3256 }
3257
3258
3259 head = krwp->head_free;
3260 krwp->head_free = NULL;
3261 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3262
3263
3264 for (i = 0; i < FREE_N_CHANNELS; i++) {
3265 for (; bkvhead[i]; bkvhead[i] = bnext) {
3266 bnext = bkvhead[i]->next;
3267 debug_rcu_bhead_unqueue(bkvhead[i]);
3268
3269 rcu_lock_acquire(&rcu_callback_map);
3270 if (i == 0) {
3271 trace_rcu_invoke_kfree_bulk_callback(
3272 rcu_state.name, bkvhead[i]->nr_records,
3273 bkvhead[i]->records);
3274
3275 kfree_bulk(bkvhead[i]->nr_records,
3276 bkvhead[i]->records);
3277 } else {
3278 for (j = 0; j < bkvhead[i]->nr_records; j++) {
3279 trace_rcu_invoke_kvfree_callback(
3280 rcu_state.name,
3281 bkvhead[i]->records[j], 0);
3282
3283 vfree(bkvhead[i]->records[j]);
3284 }
3285 }
3286 rcu_lock_release(&rcu_callback_map);
3287
3288 raw_spin_lock_irqsave(&krcp->lock, flags);
3289 if (put_cached_bnode(krcp, bkvhead[i]))
3290 bkvhead[i] = NULL;
3291 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3292
3293 if (bkvhead[i])
3294 free_page((unsigned long) bkvhead[i]);
3295
3296 cond_resched_tasks_rcu_qs();
3297 }
3298 }
3299
3300
3301
3302
3303
3304
3305
3306
3307 for (; head; head = next) {
3308 unsigned long offset = (unsigned long)head->func;
3309 void *ptr = (void *)head - offset;
3310
3311 next = head->next;
3312 debug_rcu_head_unqueue((struct rcu_head *)ptr);
3313 rcu_lock_acquire(&rcu_callback_map);
3314 trace_rcu_invoke_kvfree_callback(rcu_state.name, head, offset);
3315
3316 if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
3317 kvfree(ptr);
3318
3319 rcu_lock_release(&rcu_callback_map);
3320 cond_resched_tasks_rcu_qs();
3321 }
3322}
3323
3324
3325
3326
3327static void kfree_rcu_monitor(struct work_struct *work)
3328{
3329 struct kfree_rcu_cpu *krcp = container_of(work,
3330 struct kfree_rcu_cpu, monitor_work.work);
3331 unsigned long flags;
3332 int i, j;
3333
3334 raw_spin_lock_irqsave(&krcp->lock, flags);
3335
3336
3337 for (i = 0; i < KFREE_N_BATCHES; i++) {
3338 struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]);
3339
3340
3341
3342
3343
3344
3345 if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) ||
3346 (krcp->bkvhead[1] && !krwp->bkvhead_free[1]) ||
3347 (krcp->head && !krwp->head_free)) {
3348
3349
3350 for (j = 0; j < FREE_N_CHANNELS; j++) {
3351 if (!krwp->bkvhead_free[j]) {
3352 krwp->bkvhead_free[j] = krcp->bkvhead[j];
3353 krcp->bkvhead[j] = NULL;
3354 }
3355 }
3356
3357
3358
3359 if (!krwp->head_free) {
3360 krwp->head_free = krcp->head;
3361 krcp->head = NULL;
3362 }
3363
3364 WRITE_ONCE(krcp->count, 0);
3365
3366
3367
3368
3369
3370
3371 queue_rcu_work(system_wq, &krwp->rcu_work);
3372 }
3373 }
3374
3375
3376
3377
3378
3379
3380 if (!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head)
3381 krcp->monitor_todo = false;
3382 else
3383 schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
3384
3385 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3386}
3387
3388static enum hrtimer_restart
3389schedule_page_work_fn(struct hrtimer *t)
3390{
3391 struct kfree_rcu_cpu *krcp =
3392 container_of(t, struct kfree_rcu_cpu, hrtimer);
3393
3394 queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);
3395 return HRTIMER_NORESTART;
3396}
3397
3398static void fill_page_cache_func(struct work_struct *work)
3399{
3400 struct kvfree_rcu_bulk_data *bnode;
3401 struct kfree_rcu_cpu *krcp =
3402 container_of(work, struct kfree_rcu_cpu,
3403 page_cache_work.work);
3404 unsigned long flags;
3405 int nr_pages;
3406 bool pushed;
3407 int i;
3408
3409 nr_pages = atomic_read(&krcp->backoff_page_cache_fill) ?
3410 1 : rcu_min_cached_objs;
3411
3412 for (i = 0; i < nr_pages; i++) {
3413 bnode = (struct kvfree_rcu_bulk_data *)
3414 __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
3415
3416 if (bnode) {
3417 raw_spin_lock_irqsave(&krcp->lock, flags);
3418 pushed = put_cached_bnode(krcp, bnode);
3419 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3420
3421 if (!pushed) {
3422 free_page((unsigned long) bnode);
3423 break;
3424 }
3425 }
3426 }
3427
3428 atomic_set(&krcp->work_in_progress, 0);
3429 atomic_set(&krcp->backoff_page_cache_fill, 0);
3430}
3431
3432static void
3433run_page_cache_worker(struct kfree_rcu_cpu *krcp)
3434{
3435 if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
3436 !atomic_xchg(&krcp->work_in_progress, 1)) {
3437 if (atomic_read(&krcp->backoff_page_cache_fill)) {
3438 queue_delayed_work(system_wq,
3439 &krcp->page_cache_work,
3440 msecs_to_jiffies(rcu_delay_page_cache_fill_msec));
3441 } else {
3442 hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3443 krcp->hrtimer.function = schedule_page_work_fn;
3444 hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
3445 }
3446 }
3447}
3448
3449
3450
3451
3452
3453
3454
3455static inline bool
3456add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
3457 unsigned long *flags, void *ptr, bool can_alloc)
3458{
3459 struct kvfree_rcu_bulk_data *bnode;
3460 int idx;
3461
3462 *krcp = krc_this_cpu_lock(flags);
3463 if (unlikely(!(*krcp)->initialized))
3464 return false;
3465
3466 idx = !!is_vmalloc_addr(ptr);
3467
3468
3469 if (!(*krcp)->bkvhead[idx] ||
3470 (*krcp)->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
3471 bnode = get_cached_bnode(*krcp);
3472 if (!bnode && can_alloc) {
3473 krc_this_cpu_unlock(*krcp, *flags);
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486 bnode = (struct kvfree_rcu_bulk_data *)
3487 __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
3488 *krcp = krc_this_cpu_lock(flags);
3489 }
3490
3491 if (!bnode)
3492 return false;
3493
3494
3495 bnode->nr_records = 0;
3496 bnode->next = (*krcp)->bkvhead[idx];
3497
3498
3499 (*krcp)->bkvhead[idx] = bnode;
3500 }
3501
3502
3503 (*krcp)->bkvhead[idx]->records
3504 [(*krcp)->bkvhead[idx]->nr_records++] = ptr;
3505
3506 return true;
3507}
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
3522{
3523 unsigned long flags;
3524 struct kfree_rcu_cpu *krcp;
3525 bool success;
3526 void *ptr;
3527
3528 if (head) {
3529 ptr = (void *) head - (unsigned long) func;
3530 } else {
3531
3532
3533
3534
3535
3536
3537
3538 might_sleep();
3539 ptr = (unsigned long *) func;
3540 }
3541
3542
3543 if (debug_rcu_head_queue(ptr)) {
3544
3545 WARN_ONCE(1, "%s(): Double-freed call. rcu_head %p\n",
3546 __func__, head);
3547
3548
3549 return;
3550 }
3551
3552 kasan_record_aux_stack(ptr);
3553 success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head);
3554 if (!success) {
3555 run_page_cache_worker(krcp);
3556
3557 if (head == NULL)
3558
3559 goto unlock_return;
3560
3561 head->func = func;
3562 head->next = krcp->head;
3563 krcp->head = head;
3564 success = true;
3565 }
3566
3567 WRITE_ONCE(krcp->count, krcp->count + 1);
3568
3569
3570 if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
3571 !krcp->monitor_todo) {
3572 krcp->monitor_todo = true;
3573 schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
3574 }
3575
3576unlock_return:
3577 krc_this_cpu_unlock(krcp, flags);
3578
3579
3580
3581
3582
3583
3584 if (!success) {
3585 debug_rcu_head_unqueue((struct rcu_head *) ptr);
3586 synchronize_rcu();
3587 kvfree(ptr);
3588 }
3589}
3590EXPORT_SYMBOL_GPL(kvfree_call_rcu);
3591
3592static unsigned long
3593kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
3594{
3595 int cpu;
3596 unsigned long count = 0;
3597
3598
3599 for_each_possible_cpu(cpu) {
3600 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3601
3602 count += READ_ONCE(krcp->count);
3603 count += READ_ONCE(krcp->nr_bkv_objs);
3604 atomic_set(&krcp->backoff_page_cache_fill, 1);
3605 }
3606
3607 return count;
3608}
3609
3610static unsigned long
3611kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
3612{
3613 int cpu, freed = 0;
3614
3615 for_each_possible_cpu(cpu) {
3616 int count;
3617 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3618
3619 count = krcp->count;
3620 count += drain_page_cache(krcp);
3621 kfree_rcu_monitor(&krcp->monitor_work.work);
3622
3623 sc->nr_to_scan -= count;
3624 freed += count;
3625
3626 if (sc->nr_to_scan <= 0)
3627 break;
3628 }
3629
3630 return freed == 0 ? SHRINK_STOP : freed;
3631}
3632
3633static struct shrinker kfree_rcu_shrinker = {
3634 .count_objects = kfree_rcu_shrink_count,
3635 .scan_objects = kfree_rcu_shrink_scan,
3636 .batch = 0,
3637 .seeks = DEFAULT_SEEKS,
3638};
3639
3640void __init kfree_rcu_scheduler_running(void)
3641{
3642 int cpu;
3643 unsigned long flags;
3644
3645 for_each_possible_cpu(cpu) {
3646 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
3647
3648 raw_spin_lock_irqsave(&krcp->lock, flags);
3649 if ((!krcp->bkvhead[0] && !krcp->bkvhead[1] && !krcp->head) ||
3650 krcp->monitor_todo) {
3651 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3652 continue;
3653 }
3654 krcp->monitor_todo = true;
3655 schedule_delayed_work_on(cpu, &krcp->monitor_work,
3656 KFREE_DRAIN_JIFFIES);
3657 raw_spin_unlock_irqrestore(&krcp->lock, flags);
3658 }
3659}
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673static int rcu_blocking_is_gp(void)
3674{
3675 int ret;
3676
3677 if (IS_ENABLED(CONFIG_PREEMPTION))
3678 return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
3679 might_sleep();
3680 preempt_disable();
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694 ret = READ_ONCE(rcu_state.n_online_cpus) <= 1;
3695 preempt_enable();
3696 return ret;
3697}
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737void synchronize_rcu(void)
3738{
3739 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
3740 lock_is_held(&rcu_lock_map) ||
3741 lock_is_held(&rcu_sched_lock_map),
3742 "Illegal synchronize_rcu() in RCU read-side critical section");
3743 if (rcu_blocking_is_gp())
3744 return;
3745 if (rcu_gp_is_expedited())
3746 synchronize_rcu_expedited();
3747 else
3748 wait_rcu_gp(call_rcu);
3749}
3750EXPORT_SYMBOL_GPL(synchronize_rcu);
3751
3752
3753
3754
3755
3756
3757
3758
3759unsigned long get_state_synchronize_rcu(void)
3760{
3761
3762
3763
3764
3765 smp_mb();
3766 return rcu_seq_snap(&rcu_state.gp_seq);
3767}
3768EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782unsigned long start_poll_synchronize_rcu(void)
3783{
3784 unsigned long flags;
3785 unsigned long gp_seq = get_state_synchronize_rcu();
3786 bool needwake;
3787 struct rcu_data *rdp;
3788 struct rcu_node *rnp;
3789
3790 lockdep_assert_irqs_enabled();
3791 local_irq_save(flags);
3792 rdp = this_cpu_ptr(&rcu_data);
3793 rnp = rdp->mynode;
3794 raw_spin_lock_rcu_node(rnp);
3795 needwake = rcu_start_this_gp(rnp, rdp, gp_seq);
3796 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3797 if (needwake)
3798 rcu_gp_kthread_wake();
3799 return gp_seq;
3800}
3801EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828bool poll_state_synchronize_rcu(unsigned long oldstate)
3829{
3830 if (rcu_seq_done(&rcu_state.gp_seq, oldstate)) {
3831 smp_mb();
3832 return true;
3833 }
3834 return false;
3835}
3836EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857void cond_synchronize_rcu(unsigned long oldstate)
3858{
3859 if (!poll_state_synchronize_rcu(oldstate))
3860 synchronize_rcu();
3861}
3862EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
3863
3864
3865
3866
3867
3868
3869
3870
3871static int rcu_pending(int user)
3872{
3873 bool gp_in_progress;
3874 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
3875 struct rcu_node *rnp = rdp->mynode;
3876
3877 lockdep_assert_irqs_disabled();
3878
3879
3880 check_cpu_stall(rdp);
3881
3882
3883 if (rcu_nocb_need_deferred_wakeup(rdp, RCU_NOCB_WAKE))
3884 return 1;
3885
3886
3887 if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu())
3888 return 0;
3889
3890
3891 gp_in_progress = rcu_gp_in_progress();
3892 if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)
3893 return 1;
3894
3895
3896 if (!rcu_rdp_is_offloaded(rdp) &&
3897 rcu_segcblist_ready_cbs(&rdp->cblist))
3898 return 1;
3899
3900
3901 if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
3902 !rcu_rdp_is_offloaded(rdp) &&
3903 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
3904 return 1;
3905
3906
3907 if (rcu_seq_current(&rnp->gp_seq) != rdp->gp_seq ||
3908 unlikely(READ_ONCE(rdp->gpwrap)))
3909 return 1;
3910
3911
3912 return 0;
3913}
3914
3915
3916
3917
3918
3919static void rcu_barrier_trace(const char *s, int cpu, unsigned long done)
3920{
3921 trace_rcu_barrier(rcu_state.name, s, cpu,
3922 atomic_read(&rcu_state.barrier_cpu_count), done);
3923}
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935static void rcu_barrier_callback(struct rcu_head *rhp)
3936{
3937 unsigned long __maybe_unused s = rcu_state.barrier_sequence;
3938
3939 if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
3940 rcu_barrier_trace(TPS("LastCB"), -1, s);
3941 complete(&rcu_state.barrier_completion);
3942 } else {
3943 rcu_barrier_trace(TPS("CB"), -1, s);
3944 }
3945}
3946
3947
3948
3949
3950static void rcu_barrier_func(void *cpu_in)
3951{
3952 uintptr_t cpu = (uintptr_t)cpu_in;
3953 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3954
3955 rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
3956 rdp->barrier_head.func = rcu_barrier_callback;
3957 debug_rcu_head_queue(&rdp->barrier_head);
3958 rcu_nocb_lock(rdp);
3959 WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
3960 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
3961 atomic_inc(&rcu_state.barrier_cpu_count);
3962 } else {
3963 debug_rcu_head_unqueue(&rdp->barrier_head);
3964 rcu_barrier_trace(TPS("IRQNQ"), -1,
3965 rcu_state.barrier_sequence);
3966 }
3967 rcu_nocb_unlock(rdp);
3968}
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978void rcu_barrier(void)
3979{
3980 uintptr_t cpu;
3981 struct rcu_data *rdp;
3982 unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
3983
3984 rcu_barrier_trace(TPS("Begin"), -1, s);
3985
3986
3987 mutex_lock(&rcu_state.barrier_mutex);
3988
3989
3990 if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
3991 rcu_barrier_trace(TPS("EarlyExit"), -1,
3992 rcu_state.barrier_sequence);
3993 smp_mb();
3994 mutex_unlock(&rcu_state.barrier_mutex);
3995 return;
3996 }
3997
3998
3999 rcu_seq_start(&rcu_state.barrier_sequence);
4000 rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
4001
4002
4003
4004
4005
4006
4007
4008
4009 init_completion(&rcu_state.barrier_completion);
4010 atomic_set(&rcu_state.barrier_cpu_count, 2);
4011 cpus_read_lock();
4012
4013
4014
4015
4016
4017
4018 for_each_possible_cpu(cpu) {
4019 rdp = per_cpu_ptr(&rcu_data, cpu);
4020 if (cpu_is_offline(cpu) &&
4021 !rcu_rdp_is_offloaded(rdp))
4022 continue;
4023 if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) {
4024 rcu_barrier_trace(TPS("OnlineQ"), cpu,
4025 rcu_state.barrier_sequence);
4026 smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
4027 } else if (rcu_segcblist_n_cbs(&rdp->cblist) &&
4028 cpu_is_offline(cpu)) {
4029 rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu,
4030 rcu_state.barrier_sequence);
4031 local_irq_disable();
4032 rcu_barrier_func((void *)cpu);
4033 local_irq_enable();
4034 } else if (cpu_is_offline(cpu)) {
4035 rcu_barrier_trace(TPS("OfflineNoCBNoQ"), cpu,
4036 rcu_state.barrier_sequence);
4037 } else {
4038 rcu_barrier_trace(TPS("OnlineNQ"), cpu,
4039 rcu_state.barrier_sequence);
4040 }
4041 }
4042 cpus_read_unlock();
4043
4044
4045
4046
4047
4048 if (atomic_sub_and_test(2, &rcu_state.barrier_cpu_count))
4049 complete(&rcu_state.barrier_completion);
4050
4051
4052 wait_for_completion(&rcu_state.barrier_completion);
4053
4054
4055 rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
4056 rcu_seq_end(&rcu_state.barrier_sequence);
4057
4058
4059 mutex_unlock(&rcu_state.barrier_mutex);
4060}
4061EXPORT_SYMBOL_GPL(rcu_barrier);
4062
4063
4064
4065
4066
4067
4068
4069static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
4070{
4071 long mask;
4072 long oldmask;
4073 struct rcu_node *rnp = rnp_leaf;
4074
4075 raw_lockdep_assert_held_rcu_node(rnp_leaf);
4076 WARN_ON_ONCE(rnp->wait_blkd_tasks);
4077 for (;;) {
4078 mask = rnp->grpmask;
4079 rnp = rnp->parent;
4080 if (rnp == NULL)
4081 return;
4082 raw_spin_lock_rcu_node(rnp);
4083 oldmask = rnp->qsmaskinit;
4084 rnp->qsmaskinit |= mask;
4085 raw_spin_unlock_rcu_node(rnp);
4086 if (oldmask)
4087 return;
4088 }
4089}
4090
4091
4092
4093
4094static void __init
4095rcu_boot_init_percpu_data(int cpu)
4096{
4097 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
4098
4099
4100 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
4101 INIT_WORK(&rdp->strict_work, strict_work_handler);
4102 WARN_ON_ONCE(rdp->dynticks_nesting != 1);
4103 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
4104 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
4105 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
4106 rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
4107 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
4108 rdp->cpu = cpu;
4109 rcu_boot_init_nocb_percpu_data(rdp);
4110}
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122int rcutree_prepare_cpu(unsigned int cpu)
4123{
4124 unsigned long flags;
4125 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
4126 struct rcu_node *rnp = rcu_get_root();
4127
4128
4129 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4130 rdp->qlen_last_fqs_check = 0;
4131 rdp->n_force_qs_snap = rcu_state.n_force_qs;
4132 rdp->blimit = blimit;
4133 rdp->dynticks_nesting = 1;
4134 rcu_dynticks_eqs_online();
4135 raw_spin_unlock_rcu_node(rnp);
4136
4137
4138
4139
4140
4141 if (!rcu_segcblist_is_enabled(&rdp->cblist))
4142 rcu_segcblist_init(&rdp->cblist);
4143
4144
4145
4146
4147
4148
4149 rnp = rdp->mynode;
4150 raw_spin_lock_rcu_node(rnp);
4151 rdp->beenonline = true;
4152 rdp->gp_seq = READ_ONCE(rnp->gp_seq);
4153 rdp->gp_seq_needed = rdp->gp_seq;
4154 rdp->cpu_no_qs.b.norm = true;
4155 rdp->core_needs_qs = false;
4156 rdp->rcu_iw_pending = false;
4157 rdp->rcu_iw = IRQ_WORK_INIT_HARD(rcu_iw_handler);
4158 rdp->rcu_iw_gp_seq = rdp->gp_seq - 1;
4159 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
4160 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4161 rcu_spawn_one_boost_kthread(rnp);
4162 rcu_spawn_cpu_nocb_kthread(cpu);
4163 WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1);
4164
4165 return 0;
4166}
4167
4168
4169
4170
4171static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
4172{
4173 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
4174
4175 rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
4176}
4177
4178
4179
4180
4181
4182int rcutree_online_cpu(unsigned int cpu)
4183{
4184 unsigned long flags;
4185 struct rcu_data *rdp;
4186 struct rcu_node *rnp;
4187
4188 rdp = per_cpu_ptr(&rcu_data, cpu);
4189 rnp = rdp->mynode;
4190 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4191 rnp->ffmask |= rdp->grpmask;
4192 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4193 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
4194 return 0;
4195 sync_sched_exp_online_cleanup(cpu);
4196 rcutree_affinity_setting(cpu, -1);
4197
4198
4199 tick_dep_clear(TICK_DEP_BIT_RCU);
4200 return 0;
4201}
4202
4203
4204
4205
4206
4207int rcutree_offline_cpu(unsigned int cpu)
4208{
4209 unsigned long flags;
4210 struct rcu_data *rdp;
4211 struct rcu_node *rnp;
4212
4213 rdp = per_cpu_ptr(&rcu_data, cpu);
4214 rnp = rdp->mynode;
4215 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4216 rnp->ffmask &= ~rdp->grpmask;
4217 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4218
4219 rcutree_affinity_setting(cpu, cpu);
4220
4221
4222 tick_dep_set(TICK_DEP_BIT_RCU);
4223 return 0;
4224}
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237void rcu_cpu_starting(unsigned int cpu)
4238{
4239 unsigned long flags;
4240 unsigned long mask;
4241 struct rcu_data *rdp;
4242 struct rcu_node *rnp;
4243 bool newcpu;
4244
4245 rdp = per_cpu_ptr(&rcu_data, cpu);
4246 if (rdp->cpu_started)
4247 return;
4248 rdp->cpu_started = true;
4249
4250 rnp = rdp->mynode;
4251 mask = rdp->grpmask;
4252 WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4253 WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
4254 smp_mb();
4255 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4256 WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
4257 newcpu = !(rnp->expmaskinitnext & mask);
4258 rnp->expmaskinitnext |= mask;
4259
4260 smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + newcpu);
4261 ASSERT_EXCLUSIVE_WRITER(rcu_state.ncpus);
4262 rcu_gpnum_ovf(rnp, rdp);
4263 rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
4264 rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
4265
4266
4267 if (WARN_ON_ONCE(rnp->qsmask & mask)) {
4268 rcu_disable_urgency_upon_qs(rdp);
4269
4270 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
4271 } else {
4272 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4273 }
4274 smp_mb();
4275 WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4276 WARN_ON_ONCE(rnp->ofl_seq & 0x1);
4277 smp_mb();
4278}
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288void rcu_report_dead(unsigned int cpu)
4289{
4290 unsigned long flags;
4291 unsigned long mask;
4292 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
4293 struct rcu_node *rnp = rdp->mynode;
4294
4295
4296 do_nocb_deferred_wakeup(rdp);
4297
4298
4299 preempt_disable();
4300 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
4301 preempt_enable();
4302 rcu_preempt_deferred_qs(current);
4303
4304
4305 mask = rdp->grpmask;
4306 WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4307 WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
4308 smp_mb();
4309 raw_spin_lock(&rcu_state.ofl_lock);
4310 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4311 rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
4312 rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
4313 if (rnp->qsmask & mask) {
4314
4315 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
4316 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4317 }
4318 WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
4319 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4320 raw_spin_unlock(&rcu_state.ofl_lock);
4321 smp_mb();
4322 WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4323 WARN_ON_ONCE(rnp->ofl_seq & 0x1);
4324
4325 rdp->cpu_started = false;
4326}
4327
4328#ifdef CONFIG_HOTPLUG_CPU
4329
4330
4331
4332
4333
4334void rcutree_migrate_callbacks(int cpu)
4335{
4336 unsigned long flags;
4337 struct rcu_data *my_rdp;
4338 struct rcu_node *my_rnp;
4339 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
4340 bool needwake;
4341
4342 if (rcu_rdp_is_offloaded(rdp) ||
4343 rcu_segcblist_empty(&rdp->cblist))
4344 return;
4345
4346 local_irq_save(flags);
4347 my_rdp = this_cpu_ptr(&rcu_data);
4348 my_rnp = my_rdp->mynode;
4349 rcu_nocb_lock(my_rdp);
4350 WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
4351 raw_spin_lock_rcu_node(my_rnp);
4352
4353 needwake = rcu_advance_cbs(my_rnp, rdp) ||
4354 rcu_advance_cbs(my_rnp, my_rdp);
4355 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
4356 needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
4357 rcu_segcblist_disable(&rdp->cblist);
4358 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
4359 !rcu_segcblist_n_cbs(&my_rdp->cblist));
4360 if (rcu_rdp_is_offloaded(my_rdp)) {
4361 raw_spin_unlock_rcu_node(my_rnp);
4362 __call_rcu_nocb_wake(my_rdp, true, flags);
4363 } else {
4364 rcu_nocb_unlock(my_rdp);
4365 raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags);
4366 }
4367 if (needwake)
4368 rcu_gp_kthread_wake();
4369 lockdep_assert_irqs_enabled();
4370 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
4371 !rcu_segcblist_empty(&rdp->cblist),
4372 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
4373 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
4374 rcu_segcblist_first_cb(&rdp->cblist));
4375}
4376#endif
4377
4378
4379
4380
4381
4382static int rcu_pm_notify(struct notifier_block *self,
4383 unsigned long action, void *hcpu)
4384{
4385 switch (action) {
4386 case PM_HIBERNATION_PREPARE:
4387 case PM_SUSPEND_PREPARE:
4388 rcu_expedite_gp();
4389 break;
4390 case PM_POST_HIBERNATION:
4391 case PM_POST_SUSPEND:
4392 rcu_unexpedite_gp();
4393 break;
4394 default:
4395 break;
4396 }
4397 return NOTIFY_OK;
4398}
4399
4400
4401
4402
4403static int __init rcu_spawn_gp_kthread(void)
4404{
4405 unsigned long flags;
4406 int kthread_prio_in = kthread_prio;
4407 struct rcu_node *rnp;
4408 struct sched_param sp;
4409 struct task_struct *t;
4410
4411
4412 if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 2
4413 && IS_BUILTIN(CONFIG_RCU_TORTURE_TEST))
4414 kthread_prio = 2;
4415 else if (IS_ENABLED(CONFIG_RCU_BOOST) && kthread_prio < 1)
4416 kthread_prio = 1;
4417 else if (kthread_prio < 0)
4418 kthread_prio = 0;
4419 else if (kthread_prio > 99)
4420 kthread_prio = 99;
4421
4422 if (kthread_prio != kthread_prio_in)
4423 pr_alert("rcu_spawn_gp_kthread(): Limited prio to %d from %d\n",
4424 kthread_prio, kthread_prio_in);
4425
4426 rcu_scheduler_fully_active = 1;
4427 t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
4428 if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__))
4429 return 0;
4430 if (kthread_prio) {
4431 sp.sched_priority = kthread_prio;
4432 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
4433 }
4434 rnp = rcu_get_root();
4435 raw_spin_lock_irqsave_rcu_node(rnp, flags);
4436 WRITE_ONCE(rcu_state.gp_activity, jiffies);
4437 WRITE_ONCE(rcu_state.gp_req_activity, jiffies);
4438
4439 smp_store_release(&rcu_state.gp_kthread, t);
4440 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4441 wake_up_process(t);
4442 rcu_spawn_nocb_kthreads();
4443 rcu_spawn_boost_kthreads();
4444 rcu_spawn_core_kthreads();
4445 return 0;
4446}
4447early_initcall(rcu_spawn_gp_kthread);
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459void rcu_scheduler_starting(void)
4460{
4461 WARN_ON(num_online_cpus() != 1);
4462 WARN_ON(nr_context_switches() > 0);
4463 rcu_test_sync_prims();
4464 rcu_scheduler_active = RCU_SCHEDULER_INIT;
4465 rcu_test_sync_prims();
4466}
4467
4468
4469
4470
4471static void __init rcu_init_one(void)
4472{
4473 static const char * const buf[] = RCU_NODE_NAME_INIT;
4474 static const char * const fqs[] = RCU_FQS_NAME_INIT;
4475 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
4476 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
4477
4478 int levelspread[RCU_NUM_LVLS];
4479 int cpustride = 1;
4480 int i;
4481 int j;
4482 struct rcu_node *rnp;
4483
4484 BUILD_BUG_ON(RCU_NUM_LVLS > ARRAY_SIZE(buf));
4485
4486
4487 if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS)
4488 panic("rcu_init_one: rcu_num_lvls out of range");
4489
4490
4491
4492 for (i = 1; i < rcu_num_lvls; i++)
4493 rcu_state.level[i] =
4494 rcu_state.level[i - 1] + num_rcu_lvl[i - 1];
4495 rcu_init_levelspread(levelspread, num_rcu_lvl);
4496
4497
4498
4499 for (i = rcu_num_lvls - 1; i >= 0; i--) {
4500 cpustride *= levelspread[i];
4501 rnp = rcu_state.level[i];
4502 for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) {
4503 raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
4504 lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
4505 &rcu_node_class[i], buf[i]);
4506 raw_spin_lock_init(&rnp->fqslock);
4507 lockdep_set_class_and_name(&rnp->fqslock,
4508 &rcu_fqs_class[i], fqs[i]);
4509 rnp->gp_seq = rcu_state.gp_seq;
4510 rnp->gp_seq_needed = rcu_state.gp_seq;
4511 rnp->completedqs = rcu_state.gp_seq;
4512 rnp->qsmask = 0;
4513 rnp->qsmaskinit = 0;
4514 rnp->grplo = j * cpustride;
4515 rnp->grphi = (j + 1) * cpustride - 1;
4516 if (rnp->grphi >= nr_cpu_ids)
4517 rnp->grphi = nr_cpu_ids - 1;
4518 if (i == 0) {
4519 rnp->grpnum = 0;
4520 rnp->grpmask = 0;
4521 rnp->parent = NULL;
4522 } else {
4523 rnp->grpnum = j % levelspread[i - 1];
4524 rnp->grpmask = BIT(rnp->grpnum);
4525 rnp->parent = rcu_state.level[i - 1] +
4526 j / levelspread[i - 1];
4527 }
4528 rnp->level = i;
4529 INIT_LIST_HEAD(&rnp->blkd_tasks);
4530 rcu_init_one_nocb(rnp);
4531 init_waitqueue_head(&rnp->exp_wq[0]);
4532 init_waitqueue_head(&rnp->exp_wq[1]);
4533 init_waitqueue_head(&rnp->exp_wq[2]);
4534 init_waitqueue_head(&rnp->exp_wq[3]);
4535 spin_lock_init(&rnp->exp_lock);
4536 }
4537 }
4538
4539 init_swait_queue_head(&rcu_state.gp_wq);
4540 init_swait_queue_head(&rcu_state.expedited_wq);
4541 rnp = rcu_first_leaf_node();
4542 for_each_possible_cpu(i) {
4543 while (i > rnp->grphi)
4544 rnp++;
4545 per_cpu_ptr(&rcu_data, i)->mynode = rnp;
4546 rcu_boot_init_percpu_data(i);
4547 }
4548}
4549
4550
4551
4552
4553
4554
4555void rcu_init_geometry(void)
4556{
4557 ulong d;
4558 int i;
4559 static unsigned long old_nr_cpu_ids;
4560 int rcu_capacity[RCU_NUM_LVLS];
4561 static bool initialized;
4562
4563 if (initialized) {
4564
4565
4566
4567
4568 WARN_ON_ONCE(old_nr_cpu_ids != nr_cpu_ids);
4569 return;
4570 }
4571
4572 old_nr_cpu_ids = nr_cpu_ids;
4573 initialized = true;
4574
4575
4576
4577
4578
4579
4580
4581
4582 d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
4583 if (jiffies_till_first_fqs == ULONG_MAX)
4584 jiffies_till_first_fqs = d;
4585 if (jiffies_till_next_fqs == ULONG_MAX)
4586 jiffies_till_next_fqs = d;
4587 adjust_jiffies_till_sched_qs();
4588
4589
4590 if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
4591 nr_cpu_ids == NR_CPUS)
4592 return;
4593 pr_info("Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",
4594 rcu_fanout_leaf, nr_cpu_ids);
4595
4596
4597
4598
4599
4600
4601
4602 if (rcu_fanout_leaf < 2 ||
4603 rcu_fanout_leaf > sizeof(unsigned long) * 8) {
4604 rcu_fanout_leaf = RCU_FANOUT_LEAF;
4605 WARN_ON(1);
4606 return;
4607 }
4608
4609
4610
4611
4612
4613 rcu_capacity[0] = rcu_fanout_leaf;
4614 for (i = 1; i < RCU_NUM_LVLS; i++)
4615 rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT;
4616
4617
4618
4619
4620
4621 if (nr_cpu_ids > rcu_capacity[RCU_NUM_LVLS - 1]) {
4622 rcu_fanout_leaf = RCU_FANOUT_LEAF;
4623 WARN_ON(1);
4624 return;
4625 }
4626
4627
4628 for (i = 0; nr_cpu_ids > rcu_capacity[i]; i++) {
4629 }
4630 rcu_num_lvls = i + 1;
4631
4632
4633 for (i = 0; i < rcu_num_lvls; i++) {
4634 int cap = rcu_capacity[(rcu_num_lvls - 1) - i];
4635 num_rcu_lvl[i] = DIV_ROUND_UP(nr_cpu_ids, cap);
4636 }
4637
4638
4639 rcu_num_nodes = 0;
4640 for (i = 0; i < rcu_num_lvls; i++)
4641 rcu_num_nodes += num_rcu_lvl[i];
4642}
4643
4644
4645
4646
4647
4648static void __init rcu_dump_rcu_node_tree(void)
4649{
4650 int level = 0;
4651 struct rcu_node *rnp;
4652
4653 pr_info("rcu_node tree layout dump\n");
4654 pr_info(" ");
4655 rcu_for_each_node_breadth_first(rnp) {
4656 if (rnp->level != level) {
4657 pr_cont("\n");
4658 pr_info(" ");
4659 level = rnp->level;
4660 }
4661 pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum);
4662 }
4663 pr_cont("\n");
4664}
4665
4666struct workqueue_struct *rcu_gp_wq;
4667struct workqueue_struct *rcu_par_gp_wq;
4668
4669static void __init kfree_rcu_batch_init(void)
4670{
4671 int cpu;
4672 int i;
4673
4674
4675 if (rcu_delay_page_cache_fill_msec < 0 ||
4676 rcu_delay_page_cache_fill_msec > 100 * MSEC_PER_SEC) {
4677
4678 rcu_delay_page_cache_fill_msec =
4679 clamp(rcu_delay_page_cache_fill_msec, 0,
4680 (int) (100 * MSEC_PER_SEC));
4681
4682 pr_info("Adjusting rcutree.rcu_delay_page_cache_fill_msec to %d ms.\n",
4683 rcu_delay_page_cache_fill_msec);
4684 }
4685
4686 for_each_possible_cpu(cpu) {
4687 struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
4688
4689 for (i = 0; i < KFREE_N_BATCHES; i++) {
4690 INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
4691 krcp->krw_arr[i].krcp = krcp;
4692 }
4693
4694 INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
4695 INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
4696 krcp->initialized = true;
4697 }
4698 if (register_shrinker(&kfree_rcu_shrinker))
4699 pr_err("Failed to register kfree_rcu() shrinker!\n");
4700}
4701
4702void __init rcu_init(void)
4703{
4704 int cpu;
4705
4706 rcu_early_boot_tests();
4707
4708 kfree_rcu_batch_init();
4709 rcu_bootup_announce();
4710 rcu_init_geometry();
4711 rcu_init_one();
4712 if (dump_tree)
4713 rcu_dump_rcu_node_tree();
4714 if (use_softirq)
4715 open_softirq(RCU_SOFTIRQ, rcu_core_si);
4716
4717
4718
4719
4720
4721
4722 pm_notifier(rcu_pm_notify, 0);
4723 for_each_online_cpu(cpu) {
4724 rcutree_prepare_cpu(cpu);
4725 rcu_cpu_starting(cpu);
4726 rcutree_online_cpu(cpu);
4727 }
4728
4729
4730 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
4731 WARN_ON(!rcu_gp_wq);
4732 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
4733 WARN_ON(!rcu_par_gp_wq);
4734
4735
4736
4737 if (qovld < 0)
4738 qovld_calc = DEFAULT_RCU_QOVLD_MULT * qhimark;
4739 else
4740 qovld_calc = qovld;
4741}
4742
4743#include "tree_stall.h"
4744#include "tree_exp.h"
4745#include "tree_nocb.h"
4746#include "tree_plugin.h"
4747