1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/delay.h>
28#include <linux/gfp.h>
29#include <linux/oom.h>
30#include <linux/sched/debug.h>
31#include <linux/smpboot.h>
32#include <uapi/linux/sched/types.h>
33#include "../time/tick-internal.h"
34
35#ifdef CONFIG_RCU_BOOST
36
37#include "../locking/rtmutex_common.h"
38
39
40
41
42
43static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
44DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
45DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
46DEFINE_PER_CPU(char, rcu_cpu_has_work);
47
48#else
49
50
51
52
53
54
55
56#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
57
58#endif
59
60#ifdef CONFIG_RCU_NOCB_CPU
61static cpumask_var_t rcu_nocb_mask;
62static bool have_rcu_nocb_mask;
63static bool __read_mostly rcu_nocb_poll;
64#endif
65
66
67
68
69
70static void __init rcu_bootup_announce_oddness(void)
71{
72 if (IS_ENABLED(CONFIG_RCU_TRACE))
73 pr_info("\tRCU debugfs-based tracing is enabled.\n");
74 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
75 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
76 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
77 RCU_FANOUT);
78 if (rcu_fanout_exact)
79 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
80 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
81 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
82 if (IS_ENABLED(CONFIG_PROVE_RCU))
83 pr_info("\tRCU lockdep checking is enabled.\n");
84 if (RCU_NUM_LVLS >= 4)
85 pr_info("\tFour(or more)-level hierarchy is enabled.\n");
86 if (RCU_FANOUT_LEAF != 16)
87 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
88 RCU_FANOUT_LEAF);
89 if (rcu_fanout_leaf != RCU_FANOUT_LEAF)
90 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
91 if (nr_cpu_ids != NR_CPUS)
92 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
93 if (IS_ENABLED(CONFIG_RCU_BOOST))
94 pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
95}
96
97#ifdef CONFIG_PREEMPT_RCU
98
99RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
100static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
101static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
102
103static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
104 bool wake);
105
106
107
108
109static void __init rcu_bootup_announce(void)
110{
111 pr_info("Preemptible hierarchical RCU implementation.\n");
112 rcu_bootup_announce_oddness();
113}
114
115
116#define RCU_GP_TASKS 0x8
117#define RCU_EXP_TASKS 0x4
118#define RCU_GP_BLKD 0x2
119#define RCU_EXP_BLKD 0x1
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
150 __releases(rnp->lock)
151{
152 int blkd_state = (rnp->gp_tasks ? RCU_GP_TASKS : 0) +
153 (rnp->exp_tasks ? RCU_EXP_TASKS : 0) +
154 (rnp->qsmask & rdp->grpmask ? RCU_GP_BLKD : 0) +
155 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
156 struct task_struct *t = current;
157
158
159
160
161
162
163 switch (blkd_state) {
164 case 0:
165 case RCU_EXP_TASKS:
166 case RCU_EXP_TASKS + RCU_GP_BLKD:
167 case RCU_GP_TASKS:
168 case RCU_GP_TASKS + RCU_EXP_TASKS:
169
170
171
172
173
174
175
176 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
177 break;
178
179 case RCU_EXP_BLKD:
180 case RCU_GP_BLKD:
181 case RCU_GP_BLKD + RCU_EXP_BLKD:
182 case RCU_GP_TASKS + RCU_EXP_BLKD:
183 case RCU_GP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
184 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
185
186
187
188
189
190
191
192
193
194 list_add_tail(&t->rcu_node_entry, &rnp->blkd_tasks);
195 break;
196
197 case RCU_EXP_TASKS + RCU_EXP_BLKD:
198 case RCU_EXP_TASKS + RCU_GP_BLKD + RCU_EXP_BLKD:
199 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_EXP_BLKD:
200
201
202
203
204
205
206
207 list_add(&t->rcu_node_entry, rnp->exp_tasks);
208 break;
209
210 case RCU_GP_TASKS + RCU_GP_BLKD:
211 case RCU_GP_TASKS + RCU_EXP_TASKS + RCU_GP_BLKD:
212
213
214
215
216
217
218 list_add(&t->rcu_node_entry, rnp->gp_tasks);
219 break;
220
221 default:
222
223
224 WARN_ON_ONCE(1);
225 break;
226 }
227
228
229
230
231
232
233
234 if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD))
235 rnp->gp_tasks = &t->rcu_node_entry;
236 if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
237 rnp->exp_tasks = &t->rcu_node_entry;
238 raw_spin_unlock_rcu_node(rnp);
239
240
241
242
243
244
245
246 if (blkd_state & RCU_EXP_BLKD &&
247 t->rcu_read_unlock_special.b.exp_need_qs) {
248 t->rcu_read_unlock_special.b.exp_need_qs = false;
249 rcu_report_exp_rdp(rdp->rsp, rdp, true);
250 } else {
251 WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
252 }
253}
254
255
256
257
258
259
260
261
262
263
264static void rcu_preempt_qs(void)
265{
266 if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
267 trace_rcu_grace_period(TPS("rcu_preempt"),
268 __this_cpu_read(rcu_data_p->gpnum),
269 TPS("cpuqs"));
270 __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
271 barrier();
272 current->rcu_read_unlock_special.b.need_qs = false;
273 }
274}
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289static void rcu_preempt_note_context_switch(void)
290{
291 struct task_struct *t = current;
292 struct rcu_data *rdp;
293 struct rcu_node *rnp;
294
295 if (t->rcu_read_lock_nesting > 0 &&
296 !t->rcu_read_unlock_special.b.blocked) {
297
298
299 rdp = this_cpu_ptr(rcu_state_p->rda);
300 rnp = rdp->mynode;
301 raw_spin_lock_rcu_node(rnp);
302 t->rcu_read_unlock_special.b.blocked = true;
303 t->rcu_blocked_node = rnp;
304
305
306
307
308
309
310 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
311 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
312 trace_rcu_preempt_task(rdp->rsp->name,
313 t->pid,
314 (rnp->qsmask & rdp->grpmask)
315 ? rnp->gpnum
316 : rnp->gpnum + 1);
317 rcu_preempt_ctxt_queue(rnp, rdp);
318 } else if (t->rcu_read_lock_nesting < 0 &&
319 t->rcu_read_unlock_special.s) {
320
321
322
323
324
325 rcu_read_unlock_special(t);
326 }
327
328
329
330
331
332
333
334
335
336
337 rcu_preempt_qs();
338}
339
340
341
342
343
344
345static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
346{
347 return rnp->gp_tasks != NULL;
348}
349
350
351
352
353
354static struct list_head *rcu_next_node_entry(struct task_struct *t,
355 struct rcu_node *rnp)
356{
357 struct list_head *np;
358
359 np = t->rcu_node_entry.next;
360 if (np == &rnp->blkd_tasks)
361 np = NULL;
362 return np;
363}
364
365
366
367
368
369static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
370{
371 return !list_empty(&rnp->blkd_tasks);
372}
373
374
375
376
377
378
379void rcu_read_unlock_special(struct task_struct *t)
380{
381 bool empty_exp;
382 bool empty_norm;
383 bool empty_exp_now;
384 unsigned long flags;
385 struct list_head *np;
386 bool drop_boost_mutex = false;
387 struct rcu_data *rdp;
388 struct rcu_node *rnp;
389 union rcu_special special;
390
391
392 if (in_nmi())
393 return;
394
395 local_irq_save(flags);
396
397
398
399
400
401
402 special = t->rcu_read_unlock_special;
403 if (special.b.need_qs) {
404 rcu_preempt_qs();
405 t->rcu_read_unlock_special.b.need_qs = false;
406 if (!t->rcu_read_unlock_special.s) {
407 local_irq_restore(flags);
408 return;
409 }
410 }
411
412
413
414
415
416
417
418
419 if (special.b.exp_need_qs) {
420 WARN_ON_ONCE(special.b.blocked);
421 t->rcu_read_unlock_special.b.exp_need_qs = false;
422 rdp = this_cpu_ptr(rcu_state_p->rda);
423 rcu_report_exp_rdp(rcu_state_p, rdp, true);
424 if (!t->rcu_read_unlock_special.s) {
425 local_irq_restore(flags);
426 return;
427 }
428 }
429
430
431 if (in_irq() || in_serving_softirq()) {
432 lockdep_rcu_suspicious(__FILE__, __LINE__,
433 "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
434 pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
435 t->rcu_read_unlock_special.s,
436 t->rcu_read_unlock_special.b.blocked,
437 t->rcu_read_unlock_special.b.exp_need_qs,
438 t->rcu_read_unlock_special.b.need_qs);
439 local_irq_restore(flags);
440 return;
441 }
442
443
444 if (special.b.blocked) {
445 t->rcu_read_unlock_special.b.blocked = false;
446
447
448
449
450
451
452
453 rnp = t->rcu_blocked_node;
454 raw_spin_lock_rcu_node(rnp);
455 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
456 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
457 empty_exp = sync_rcu_preempt_exp_done(rnp);
458 smp_mb();
459 np = rcu_next_node_entry(t, rnp);
460 list_del_init(&t->rcu_node_entry);
461 t->rcu_blocked_node = NULL;
462 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
463 rnp->gpnum, t->pid);
464 if (&t->rcu_node_entry == rnp->gp_tasks)
465 rnp->gp_tasks = np;
466 if (&t->rcu_node_entry == rnp->exp_tasks)
467 rnp->exp_tasks = np;
468 if (IS_ENABLED(CONFIG_RCU_BOOST)) {
469 if (&t->rcu_node_entry == rnp->boost_tasks)
470 rnp->boost_tasks = np;
471
472 drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
473 }
474
475
476
477
478
479
480
481 empty_exp_now = sync_rcu_preempt_exp_done(rnp);
482 if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
483 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
484 rnp->gpnum,
485 0, rnp->qsmask,
486 rnp->level,
487 rnp->grplo,
488 rnp->grphi,
489 !!rnp->gp_tasks);
490 rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
491 } else {
492 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
493 }
494
495
496 if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
497 rt_mutex_unlock(&rnp->boost_mtx);
498
499
500
501
502
503 if (!empty_exp && empty_exp_now)
504 rcu_report_exp_rnp(rcu_state_p, rnp, true);
505 } else {
506 local_irq_restore(flags);
507 }
508}
509
510
511
512
513
514static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
515{
516 unsigned long flags;
517 struct task_struct *t;
518
519 raw_spin_lock_irqsave_rcu_node(rnp, flags);
520 if (!rcu_preempt_blocked_readers_cgp(rnp)) {
521 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
522 return;
523 }
524 t = list_entry(rnp->gp_tasks->prev,
525 struct task_struct, rcu_node_entry);
526 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
527 sched_show_task(t);
528 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
529}
530
531
532
533
534
535static void rcu_print_detail_task_stall(struct rcu_state *rsp)
536{
537 struct rcu_node *rnp = rcu_get_root(rsp);
538
539 rcu_print_detail_task_stall_rnp(rnp);
540 rcu_for_each_leaf_node(rsp, rnp)
541 rcu_print_detail_task_stall_rnp(rnp);
542}
543
544static void rcu_print_task_stall_begin(struct rcu_node *rnp)
545{
546 pr_err("\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
547 rnp->level, rnp->grplo, rnp->grphi);
548}
549
550static void rcu_print_task_stall_end(void)
551{
552 pr_cont("\n");
553}
554
555
556
557
558
559static int rcu_print_task_stall(struct rcu_node *rnp)
560{
561 struct task_struct *t;
562 int ndetected = 0;
563
564 if (!rcu_preempt_blocked_readers_cgp(rnp))
565 return 0;
566 rcu_print_task_stall_begin(rnp);
567 t = list_entry(rnp->gp_tasks->prev,
568 struct task_struct, rcu_node_entry);
569 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
570 pr_cont(" P%d", t->pid);
571 ndetected++;
572 }
573 rcu_print_task_stall_end();
574 return ndetected;
575}
576
577
578
579
580
581
582static int rcu_print_task_exp_stall(struct rcu_node *rnp)
583{
584 struct task_struct *t;
585 int ndetected = 0;
586
587 if (!rnp->exp_tasks)
588 return 0;
589 t = list_entry(rnp->exp_tasks->prev,
590 struct task_struct, rcu_node_entry);
591 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
592 pr_cont(" P%d", t->pid);
593 ndetected++;
594 }
595 return ndetected;
596}
597
598
599
600
601
602
603
604
605
606
607
608static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
609{
610 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
611 if (rcu_preempt_has_tasks(rnp))
612 rnp->gp_tasks = rnp->blkd_tasks.next;
613 WARN_ON_ONCE(rnp->qsmask);
614}
615
616
617
618
619
620
621
622
623static void rcu_preempt_check_callbacks(void)
624{
625 struct task_struct *t = current;
626
627 if (t->rcu_read_lock_nesting == 0) {
628 rcu_preempt_qs();
629 return;
630 }
631 if (t->rcu_read_lock_nesting > 0 &&
632 __this_cpu_read(rcu_data_p->core_needs_qs) &&
633 __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm))
634 t->rcu_read_unlock_special.b.need_qs = true;
635}
636
637#ifdef CONFIG_RCU_BOOST
638
639static void rcu_preempt_do_callbacks(void)
640{
641 rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
642}
643
644#endif
645
646
647
648
649void call_rcu(struct rcu_head *head, rcu_callback_t func)
650{
651 __call_rcu(head, func, rcu_state_p, -1, 0);
652}
653EXPORT_SYMBOL_GPL(call_rcu);
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669void synchronize_rcu(void)
670{
671 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
672 lock_is_held(&rcu_lock_map) ||
673 lock_is_held(&rcu_sched_lock_map),
674 "Illegal synchronize_rcu() in RCU read-side critical section");
675 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
676 return;
677 if (rcu_gp_is_expedited())
678 synchronize_rcu_expedited();
679 else
680 wait_rcu_gp(call_rcu);
681}
682EXPORT_SYMBOL_GPL(synchronize_rcu);
683
684
685
686
687
688
689
690
691
692void rcu_barrier(void)
693{
694 _rcu_barrier(rcu_state_p);
695}
696EXPORT_SYMBOL_GPL(rcu_barrier);
697
698
699
700
701static void __init __rcu_init_preempt(void)
702{
703 rcu_init_one(rcu_state_p);
704}
705
706
707
708
709
710
711
712void exit_rcu(void)
713{
714 struct task_struct *t = current;
715
716 if (likely(list_empty(¤t->rcu_node_entry)))
717 return;
718 t->rcu_read_lock_nesting = 1;
719 barrier();
720 t->rcu_read_unlock_special.b.blocked = true;
721 __rcu_read_unlock();
722}
723
724#else
725
726static struct rcu_state *const rcu_state_p = &rcu_sched_state;
727
728
729
730
731static void __init rcu_bootup_announce(void)
732{
733 pr_info("Hierarchical RCU implementation.\n");
734 rcu_bootup_announce_oddness();
735}
736
737
738
739
740
741static void rcu_preempt_note_context_switch(void)
742{
743}
744
745
746
747
748
749static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
750{
751 return 0;
752}
753
754
755
756
757static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
758{
759 return false;
760}
761
762
763
764
765
766static void rcu_print_detail_task_stall(struct rcu_state *rsp)
767{
768}
769
770
771
772
773
774static int rcu_print_task_stall(struct rcu_node *rnp)
775{
776 return 0;
777}
778
779
780
781
782
783
784static int rcu_print_task_exp_stall(struct rcu_node *rnp)
785{
786 return 0;
787}
788
789
790
791
792
793
794static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
795{
796 WARN_ON_ONCE(rnp->qsmask);
797}
798
799
800
801
802
803static void rcu_preempt_check_callbacks(void)
804{
805}
806
807
808
809
810
811void rcu_barrier(void)
812{
813 rcu_barrier_sched();
814}
815EXPORT_SYMBOL_GPL(rcu_barrier);
816
817
818
819
820static void __init __rcu_init_preempt(void)
821{
822}
823
824
825
826
827
828void exit_rcu(void)
829{
830}
831
832#endif
833
834#ifdef CONFIG_RCU_BOOST
835
836#include "../locking/rtmutex_common.h"
837
838#ifdef CONFIG_RCU_TRACE
839
840static void rcu_initiate_boost_trace(struct rcu_node *rnp)
841{
842 if (!rcu_preempt_has_tasks(rnp))
843 rnp->n_balk_blkd_tasks++;
844 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
845 rnp->n_balk_exp_gp_tasks++;
846 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
847 rnp->n_balk_boost_tasks++;
848 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
849 rnp->n_balk_notblocked++;
850 else if (rnp->gp_tasks != NULL &&
851 ULONG_CMP_LT(jiffies, rnp->boost_time))
852 rnp->n_balk_notyet++;
853 else
854 rnp->n_balk_nos++;
855}
856
857#else
858
859static void rcu_initiate_boost_trace(struct rcu_node *rnp)
860{
861}
862
863#endif
864
865static void rcu_wake_cond(struct task_struct *t, int status)
866{
867
868
869
870
871 if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
872 wake_up_process(t);
873}
874
875
876
877
878
879
880
881
882
883static int rcu_boost(struct rcu_node *rnp)
884{
885 unsigned long flags;
886 struct task_struct *t;
887 struct list_head *tb;
888
889 if (READ_ONCE(rnp->exp_tasks) == NULL &&
890 READ_ONCE(rnp->boost_tasks) == NULL)
891 return 0;
892
893 raw_spin_lock_irqsave_rcu_node(rnp, flags);
894
895
896
897
898
899 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
900 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
901 return 0;
902 }
903
904
905
906
907
908
909
910 if (rnp->exp_tasks != NULL) {
911 tb = rnp->exp_tasks;
912 rnp->n_exp_boosts++;
913 } else {
914 tb = rnp->boost_tasks;
915 rnp->n_normal_boosts++;
916 }
917 rnp->n_tasks_boosted++;
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935 t = container_of(tb, struct task_struct, rcu_node_entry);
936 rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
937 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
938
939 rt_mutex_lock(&rnp->boost_mtx);
940 rt_mutex_unlock(&rnp->boost_mtx);
941
942 return READ_ONCE(rnp->exp_tasks) != NULL ||
943 READ_ONCE(rnp->boost_tasks) != NULL;
944}
945
946
947
948
949static int rcu_boost_kthread(void *arg)
950{
951 struct rcu_node *rnp = (struct rcu_node *)arg;
952 int spincnt = 0;
953 int more2boost;
954
955 trace_rcu_utilization(TPS("Start boost kthread@init"));
956 for (;;) {
957 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
958 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
959 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
960 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
961 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
962 more2boost = rcu_boost(rnp);
963 if (more2boost)
964 spincnt++;
965 else
966 spincnt = 0;
967 if (spincnt > 10) {
968 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
969 trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
970 schedule_timeout_interruptible(2);
971 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
972 spincnt = 0;
973 }
974 }
975
976 trace_rcu_utilization(TPS("End boost kthread@notreached"));
977 return 0;
978}
979
980
981
982
983
984
985
986
987
988
989
990static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
991 __releases(rnp->lock)
992{
993 struct task_struct *t;
994
995 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
996 rnp->n_balk_exp_gp_tasks++;
997 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
998 return;
999 }
1000 if (rnp->exp_tasks != NULL ||
1001 (rnp->gp_tasks != NULL &&
1002 rnp->boost_tasks == NULL &&
1003 rnp->qsmask == 0 &&
1004 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1005 if (rnp->exp_tasks == NULL)
1006 rnp->boost_tasks = rnp->gp_tasks;
1007 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1008 t = rnp->boost_kthread_task;
1009 if (t)
1010 rcu_wake_cond(t, rnp->boost_kthread_status);
1011 } else {
1012 rcu_initiate_boost_trace(rnp);
1013 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1014 }
1015}
1016
1017
1018
1019
1020static void invoke_rcu_callbacks_kthread(void)
1021{
1022 unsigned long flags;
1023
1024 local_irq_save(flags);
1025 __this_cpu_write(rcu_cpu_has_work, 1);
1026 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1027 current != __this_cpu_read(rcu_cpu_kthread_task)) {
1028 rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
1029 __this_cpu_read(rcu_cpu_kthread_status));
1030 }
1031 local_irq_restore(flags);
1032}
1033
1034
1035
1036
1037
1038static bool rcu_is_callbacks_kthread(void)
1039{
1040 return __this_cpu_read(rcu_cpu_kthread_task) == current;
1041}
1042
1043#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1044
1045
1046
1047
1048static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1049{
1050 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1051}
1052
1053
1054
1055
1056
1057
1058static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1059 struct rcu_node *rnp)
1060{
1061 int rnp_index = rnp - &rsp->node[0];
1062 unsigned long flags;
1063 struct sched_param sp;
1064 struct task_struct *t;
1065
1066 if (rcu_state_p != rsp)
1067 return 0;
1068
1069 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
1070 return 0;
1071
1072 rsp->boost = 1;
1073 if (rnp->boost_kthread_task != NULL)
1074 return 0;
1075 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1076 "rcub/%d", rnp_index);
1077 if (IS_ERR(t))
1078 return PTR_ERR(t);
1079 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1080 rnp->boost_kthread_task = t;
1081 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1082 sp.sched_priority = kthread_prio;
1083 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1084 wake_up_process(t);
1085 return 0;
1086}
1087
1088static void rcu_kthread_do_work(void)
1089{
1090 rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
1091 rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
1092 rcu_preempt_do_callbacks();
1093}
1094
1095static void rcu_cpu_kthread_setup(unsigned int cpu)
1096{
1097 struct sched_param sp;
1098
1099 sp.sched_priority = kthread_prio;
1100 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1101}
1102
1103static void rcu_cpu_kthread_park(unsigned int cpu)
1104{
1105 per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
1106}
1107
1108static int rcu_cpu_kthread_should_run(unsigned int cpu)
1109{
1110 return __this_cpu_read(rcu_cpu_has_work);
1111}
1112
1113
1114
1115
1116
1117
1118static void rcu_cpu_kthread(unsigned int cpu)
1119{
1120 unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
1121 char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
1122 int spincnt;
1123
1124 for (spincnt = 0; spincnt < 10; spincnt++) {
1125 trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
1126 local_bh_disable();
1127 *statusp = RCU_KTHREAD_RUNNING;
1128 this_cpu_inc(rcu_cpu_kthread_loops);
1129 local_irq_disable();
1130 work = *workp;
1131 *workp = 0;
1132 local_irq_enable();
1133 if (work)
1134 rcu_kthread_do_work();
1135 local_bh_enable();
1136 if (*workp == 0) {
1137 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
1138 *statusp = RCU_KTHREAD_WAITING;
1139 return;
1140 }
1141 }
1142 *statusp = RCU_KTHREAD_YIELDING;
1143 trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
1144 schedule_timeout_interruptible(2);
1145 trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
1146 *statusp = RCU_KTHREAD_WAITING;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1159{
1160 struct task_struct *t = rnp->boost_kthread_task;
1161 unsigned long mask = rcu_rnp_online_cpus(rnp);
1162 cpumask_var_t cm;
1163 int cpu;
1164
1165 if (!t)
1166 return;
1167 if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
1168 return;
1169 for_each_leaf_node_possible_cpu(rnp, cpu)
1170 if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1171 cpu != outgoingcpu)
1172 cpumask_set_cpu(cpu, cm);
1173 if (cpumask_weight(cm) == 0)
1174 cpumask_setall(cm);
1175 set_cpus_allowed_ptr(t, cm);
1176 free_cpumask_var(cm);
1177}
1178
1179static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1180 .store = &rcu_cpu_kthread_task,
1181 .thread_should_run = rcu_cpu_kthread_should_run,
1182 .thread_fn = rcu_cpu_kthread,
1183 .thread_comm = "rcuc/%u",
1184 .setup = rcu_cpu_kthread_setup,
1185 .park = rcu_cpu_kthread_park,
1186};
1187
1188
1189
1190
1191static void __init rcu_spawn_boost_kthreads(void)
1192{
1193 struct rcu_node *rnp;
1194 int cpu;
1195
1196 for_each_possible_cpu(cpu)
1197 per_cpu(rcu_cpu_has_work, cpu) = 0;
1198 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1199 rcu_for_each_leaf_node(rcu_state_p, rnp)
1200 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1201}
1202
1203static void rcu_prepare_kthreads(int cpu)
1204{
1205 struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
1206 struct rcu_node *rnp = rdp->mynode;
1207
1208
1209 if (rcu_scheduler_fully_active)
1210 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1211}
1212
1213#else
1214
1215static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1216 __releases(rnp->lock)
1217{
1218 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1219}
1220
1221static void invoke_rcu_callbacks_kthread(void)
1222{
1223 WARN_ON_ONCE(1);
1224}
1225
1226static bool rcu_is_callbacks_kthread(void)
1227{
1228 return false;
1229}
1230
1231static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1232{
1233}
1234
1235static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1236{
1237}
1238
1239static void __init rcu_spawn_boost_kthreads(void)
1240{
1241}
1242
1243static void rcu_prepare_kthreads(int cpu)
1244{
1245}
1246
1247#endif
1248
1249#if !defined(CONFIG_RCU_FAST_NO_HZ)
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1261{
1262 *nextevt = KTIME_MAX;
1263 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)
1264 ? 0 : rcu_cpu_has_callbacks(NULL);
1265}
1266
1267
1268
1269
1270
1271static void rcu_cleanup_after_idle(void)
1272{
1273}
1274
1275
1276
1277
1278
1279static void rcu_prepare_for_idle(void)
1280{
1281}
1282
1283
1284
1285
1286
1287static void rcu_idle_count_callbacks_posted(void)
1288{
1289}
1290
1291#else
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316#define RCU_IDLE_GP_DELAY 4
1317#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)
1318
1319static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1320module_param(rcu_idle_gp_delay, int, 0644);
1321static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1322module_param(rcu_idle_lazy_gp_delay, int, 0644);
1323
1324
1325
1326
1327
1328
1329static bool __maybe_unused rcu_try_advance_all_cbs(void)
1330{
1331 bool cbs_ready = false;
1332 struct rcu_data *rdp;
1333 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1334 struct rcu_node *rnp;
1335 struct rcu_state *rsp;
1336
1337
1338 if (jiffies == rdtp->last_advance_all)
1339 return false;
1340 rdtp->last_advance_all = jiffies;
1341
1342 for_each_rcu_flavor(rsp) {
1343 rdp = this_cpu_ptr(rsp->rda);
1344 rnp = rdp->mynode;
1345
1346
1347
1348
1349
1350
1351 if ((rdp->completed != rnp->completed ||
1352 unlikely(READ_ONCE(rdp->gpwrap))) &&
1353 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1354 note_gp_changes(rsp, rdp);
1355
1356 if (cpu_has_callbacks_ready_to_invoke(rdp))
1357 cbs_ready = true;
1358 }
1359 return cbs_ready;
1360}
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1371{
1372 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1373 unsigned long dj;
1374
1375 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) {
1376 *nextevt = KTIME_MAX;
1377 return 0;
1378 }
1379
1380
1381 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1382
1383
1384 if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {
1385 *nextevt = KTIME_MAX;
1386 return 0;
1387 }
1388
1389
1390 if (rcu_try_advance_all_cbs()) {
1391
1392 invoke_rcu_core();
1393 return 1;
1394 }
1395 rdtp->last_accelerate = jiffies;
1396
1397
1398 if (!rdtp->all_lazy) {
1399 dj = round_up(rcu_idle_gp_delay + jiffies,
1400 rcu_idle_gp_delay) - jiffies;
1401 } else {
1402 dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
1403 }
1404 *nextevt = basemono + dj * TICK_NSEC;
1405 return 0;
1406}
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418static void rcu_prepare_for_idle(void)
1419{
1420 bool needwake;
1421 struct rcu_data *rdp;
1422 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1423 struct rcu_node *rnp;
1424 struct rcu_state *rsp;
1425 int tne;
1426
1427 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
1428 rcu_is_nocb_cpu(smp_processor_id()))
1429 return;
1430
1431
1432 tne = READ_ONCE(tick_nohz_active);
1433 if (tne != rdtp->tick_nohz_enabled_snap) {
1434 if (rcu_cpu_has_callbacks(NULL))
1435 invoke_rcu_core();
1436 rdtp->tick_nohz_enabled_snap = tne;
1437 return;
1438 }
1439 if (!tne)
1440 return;
1441
1442
1443
1444
1445
1446
1447 if (rdtp->all_lazy &&
1448 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
1449 rdtp->all_lazy = false;
1450 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1451 invoke_rcu_core();
1452 return;
1453 }
1454
1455
1456
1457
1458
1459 if (rdtp->last_accelerate == jiffies)
1460 return;
1461 rdtp->last_accelerate = jiffies;
1462 for_each_rcu_flavor(rsp) {
1463 rdp = this_cpu_ptr(rsp->rda);
1464 if (!*rdp->nxttail[RCU_DONE_TAIL])
1465 continue;
1466 rnp = rdp->mynode;
1467 raw_spin_lock_rcu_node(rnp);
1468 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
1469 raw_spin_unlock_rcu_node(rnp);
1470 if (needwake)
1471 rcu_gp_kthread_wake(rsp);
1472 }
1473}
1474
1475
1476
1477
1478
1479
1480static void rcu_cleanup_after_idle(void)
1481{
1482 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) ||
1483 rcu_is_nocb_cpu(smp_processor_id()))
1484 return;
1485 if (rcu_try_advance_all_cbs())
1486 invoke_rcu_core();
1487}
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497static void rcu_idle_count_callbacks_posted(void)
1498{
1499 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
1500}
1501
1502
1503
1504
1505static atomic_t oom_callback_count;
1506static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
1507
1508
1509
1510
1511
1512static void rcu_oom_callback(struct rcu_head *rhp)
1513{
1514 if (atomic_dec_and_test(&oom_callback_count))
1515 wake_up(&oom_callback_wq);
1516}
1517
1518
1519
1520
1521
1522
1523
1524
1525static void rcu_oom_notify_cpu(void *unused)
1526{
1527 struct rcu_state *rsp;
1528 struct rcu_data *rdp;
1529
1530 for_each_rcu_flavor(rsp) {
1531 rdp = raw_cpu_ptr(rsp->rda);
1532 if (rdp->qlen_lazy != 0) {
1533 atomic_inc(&oom_callback_count);
1534 rsp->call(&rdp->oom_head, rcu_oom_callback);
1535 }
1536 }
1537}
1538
1539
1540
1541
1542
1543
1544
1545
1546static int rcu_oom_notify(struct notifier_block *self,
1547 unsigned long notused, void *nfreed)
1548{
1549 int cpu;
1550
1551
1552 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
1553 smp_mb();
1554
1555
1556
1557
1558
1559 atomic_set(&oom_callback_count, 1);
1560
1561 for_each_online_cpu(cpu) {
1562 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1563 cond_resched_rcu_qs();
1564 }
1565
1566
1567 atomic_dec(&oom_callback_count);
1568
1569 return NOTIFY_OK;
1570}
1571
1572static struct notifier_block rcu_oom_nb = {
1573 .notifier_call = rcu_oom_notify
1574};
1575
1576static int __init rcu_register_oom_notifier(void)
1577{
1578 register_oom_notifier(&rcu_oom_nb);
1579 return 0;
1580}
1581early_initcall(rcu_register_oom_notifier);
1582
1583#endif
1584
1585#ifdef CONFIG_RCU_FAST_NO_HZ
1586
1587static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1588{
1589 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1590 unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
1591
1592 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
1593 rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
1594 ulong2long(nlpd),
1595 rdtp->all_lazy ? 'L' : '.',
1596 rdtp->tick_nohz_enabled_snap ? '.' : 'D');
1597}
1598
1599#else
1600
1601static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1602{
1603 *cp = '\0';
1604}
1605
1606#endif
1607
1608
1609static void print_cpu_stall_info_begin(void)
1610{
1611 pr_cont("\n");
1612}
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1627{
1628 char fast_no_hz[72];
1629 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1630 struct rcu_dynticks *rdtp = rdp->dynticks;
1631 char *ticks_title;
1632 unsigned long ticks_value;
1633
1634 if (rsp->gpnum == rdp->gpnum) {
1635 ticks_title = "ticks this GP";
1636 ticks_value = rdp->ticks_this_gp;
1637 } else {
1638 ticks_title = "GPs behind";
1639 ticks_value = rsp->gpnum - rdp->gpnum;
1640 }
1641 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
1642 pr_err("\t%d-%c%c%c: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",
1643 cpu,
1644 "O."[!!cpu_online(cpu)],
1645 "o."[!!(rdp->grpmask & rdp->mynode->qsmaskinit)],
1646 "N."[!!(rdp->grpmask & rdp->mynode->qsmaskinitnext)],
1647 ticks_value, ticks_title,
1648 rcu_dynticks_snap(rdtp) & 0xfff,
1649 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1650 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1651 READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,
1652 fast_no_hz);
1653}
1654
1655
1656static void print_cpu_stall_info_end(void)
1657{
1658 pr_err("\t");
1659}
1660
1661
1662static void zero_cpu_stall_ticks(struct rcu_data *rdp)
1663{
1664 rdp->ticks_this_gp = 0;
1665 rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
1666}
1667
1668
1669static void increment_cpu_stall_ticks(void)
1670{
1671 struct rcu_state *rsp;
1672
1673 for_each_rcu_flavor(rsp)
1674 raw_cpu_inc(rsp->rda->ticks_this_gp);
1675}
1676
1677#ifdef CONFIG_RCU_NOCB_CPU
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701static int __init rcu_nocb_setup(char *str)
1702{
1703 alloc_bootmem_cpumask_var(&rcu_nocb_mask);
1704 have_rcu_nocb_mask = true;
1705 cpulist_parse(str, rcu_nocb_mask);
1706 return 1;
1707}
1708__setup("rcu_nocbs=", rcu_nocb_setup);
1709
1710static int __init parse_rcu_nocb_poll(char *arg)
1711{
1712 rcu_nocb_poll = 1;
1713 return 0;
1714}
1715early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
1716
1717
1718
1719
1720
1721static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
1722{
1723 swake_up_all(sq);
1724}
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
1735{
1736 rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
1737}
1738
1739static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
1740{
1741 return &rnp->nocb_gp_wq[rnp->completed & 0x1];
1742}
1743
1744static void rcu_init_one_nocb(struct rcu_node *rnp)
1745{
1746 init_swait_queue_head(&rnp->nocb_gp_wq[0]);
1747 init_swait_queue_head(&rnp->nocb_gp_wq[1]);
1748}
1749
1750#ifndef CONFIG_RCU_NOCB_CPU_ALL
1751
1752bool rcu_is_nocb_cpu(int cpu)
1753{
1754 if (have_rcu_nocb_mask)
1755 return cpumask_test_cpu(cpu, rcu_nocb_mask);
1756 return false;
1757}
1758#endif
1759
1760
1761
1762
1763static void wake_nocb_leader(struct rcu_data *rdp, bool force)
1764{
1765 struct rcu_data *rdp_leader = rdp->nocb_leader;
1766
1767 if (!READ_ONCE(rdp_leader->nocb_kthread))
1768 return;
1769 if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
1770
1771 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
1772 swake_up(&rdp_leader->nocb_wq);
1773 }
1774}
1775
1776
1777
1778
1779
1780static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
1781{
1782 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1783 unsigned long ret;
1784#ifdef CONFIG_PROVE_RCU
1785 struct rcu_head *rhp;
1786#endif
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800 ret = atomic_long_read(&rdp->nocb_q_count);
1801
1802#ifdef CONFIG_PROVE_RCU
1803 rhp = READ_ONCE(rdp->nocb_head);
1804 if (!rhp)
1805 rhp = READ_ONCE(rdp->nocb_gp_head);
1806 if (!rhp)
1807 rhp = READ_ONCE(rdp->nocb_follower_head);
1808
1809
1810 if (!READ_ONCE(rdp->nocb_kthread) && rhp &&
1811 rcu_scheduler_fully_active) {
1812
1813 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
1814 cpu, rhp->func);
1815 WARN_ON_ONCE(1);
1816 }
1817#endif
1818
1819 return !!ret;
1820}
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1831 struct rcu_head *rhp,
1832 struct rcu_head **rhtp,
1833 int rhcount, int rhcount_lazy,
1834 unsigned long flags)
1835{
1836 int len;
1837 struct rcu_head **old_rhpp;
1838 struct task_struct *t;
1839
1840
1841 atomic_long_add(rhcount, &rdp->nocb_q_count);
1842
1843 old_rhpp = xchg(&rdp->nocb_tail, rhtp);
1844 WRITE_ONCE(*old_rhpp, rhp);
1845 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
1846 smp_mb__after_atomic();
1847
1848
1849 t = READ_ONCE(rdp->nocb_kthread);
1850 if (rcu_nocb_poll || !t) {
1851 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1852 TPS("WakeNotPoll"));
1853 return;
1854 }
1855 len = atomic_long_read(&rdp->nocb_q_count);
1856 if (old_rhpp == &rdp->nocb_head) {
1857 if (!irqs_disabled_flags(flags)) {
1858
1859 wake_nocb_leader(rdp, false);
1860 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1861 TPS("WakeEmpty"));
1862 } else {
1863 rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
1864 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1865 TPS("WakeEmptyIsDeferred"));
1866 }
1867 rdp->qlen_last_fqs_check = 0;
1868 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
1869
1870 if (!irqs_disabled_flags(flags)) {
1871 wake_nocb_leader(rdp, true);
1872 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1873 TPS("WakeOvf"));
1874 } else {
1875 rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
1876 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1877 TPS("WakeOvfIsDeferred"));
1878 }
1879 rdp->qlen_last_fqs_check = LONG_MAX / 2;
1880 } else {
1881 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
1882 }
1883 return;
1884}
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
1896 bool lazy, unsigned long flags)
1897{
1898
1899 if (!rcu_is_nocb_cpu(rdp->cpu))
1900 return false;
1901 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
1902 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
1903 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
1904 (unsigned long)rhp->func,
1905 -atomic_long_read(&rdp->nocb_q_count_lazy),
1906 -atomic_long_read(&rdp->nocb_q_count));
1907 else
1908 trace_rcu_callback(rdp->rsp->name, rhp,
1909 -atomic_long_read(&rdp->nocb_q_count_lazy),
1910 -atomic_long_read(&rdp->nocb_q_count));
1911
1912
1913
1914
1915
1916
1917 if (irqs_disabled_flags(flags) &&
1918 !rcu_is_watching() &&
1919 cpu_online(smp_processor_id()))
1920 invoke_rcu_core();
1921
1922 return true;
1923}
1924
1925
1926
1927
1928
1929static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
1930 struct rcu_data *rdp,
1931 unsigned long flags)
1932{
1933 long ql = rsp->qlen;
1934 long qll = rsp->qlen_lazy;
1935
1936
1937 if (!rcu_is_nocb_cpu(smp_processor_id()))
1938 return false;
1939 rsp->qlen = 0;
1940 rsp->qlen_lazy = 0;
1941
1942
1943 if (rsp->orphan_donelist != NULL) {
1944 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
1945 rsp->orphan_donetail, ql, qll, flags);
1946 ql = qll = 0;
1947 rsp->orphan_donelist = NULL;
1948 rsp->orphan_donetail = &rsp->orphan_donelist;
1949 }
1950 if (rsp->orphan_nxtlist != NULL) {
1951 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
1952 rsp->orphan_nxttail, ql, qll, flags);
1953 ql = qll = 0;
1954 rsp->orphan_nxtlist = NULL;
1955 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1956 }
1957 return true;
1958}
1959
1960
1961
1962
1963
1964static void rcu_nocb_wait_gp(struct rcu_data *rdp)
1965{
1966 unsigned long c;
1967 bool d;
1968 unsigned long flags;
1969 bool needwake;
1970 struct rcu_node *rnp = rdp->mynode;
1971
1972 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1973 needwake = rcu_start_future_gp(rnp, rdp, &c);
1974 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1975 if (needwake)
1976 rcu_gp_kthread_wake(rdp->rsp);
1977
1978
1979
1980
1981
1982 trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
1983 for (;;) {
1984 swait_event_interruptible(
1985 rnp->nocb_gp_wq[c & 0x1],
1986 (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
1987 if (likely(d))
1988 break;
1989 WARN_ON(signal_pending(current));
1990 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
1991 }
1992 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
1993 smp_mb();
1994}
1995
1996
1997
1998
1999
2000static void nocb_leader_wait(struct rcu_data *my_rdp)
2001{
2002 bool firsttime = true;
2003 bool gotcbs;
2004 struct rcu_data *rdp;
2005 struct rcu_head **tail;
2006
2007wait_again:
2008
2009
2010 if (!rcu_nocb_poll) {
2011 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
2012 swait_event_interruptible(my_rdp->nocb_wq,
2013 !READ_ONCE(my_rdp->nocb_leader_sleep));
2014
2015 } else if (firsttime) {
2016 firsttime = false;
2017 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll");
2018 }
2019
2020
2021
2022
2023
2024
2025 gotcbs = false;
2026 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2027 rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
2028 if (!rdp->nocb_gp_head)
2029 continue;
2030
2031
2032 WRITE_ONCE(rdp->nocb_head, NULL);
2033 rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2034 gotcbs = true;
2035 }
2036
2037
2038
2039
2040
2041 if (unlikely(!gotcbs)) {
2042 if (!rcu_nocb_poll)
2043 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
2044 "WokeEmpty");
2045 WARN_ON(signal_pending(current));
2046 schedule_timeout_interruptible(1);
2047
2048
2049 my_rdp->nocb_leader_sleep = true;
2050 smp_mb();
2051 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower)
2052 if (READ_ONCE(rdp->nocb_head)) {
2053
2054 my_rdp->nocb_leader_sleep = false;
2055 break;
2056 }
2057 goto wait_again;
2058 }
2059
2060
2061 rcu_nocb_wait_gp(my_rdp);
2062
2063
2064
2065
2066
2067
2068 my_rdp->nocb_leader_sleep = true;
2069 smp_mb();
2070
2071
2072 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2073 if (READ_ONCE(rdp->nocb_head))
2074 my_rdp->nocb_leader_sleep = false;
2075 if (!rdp->nocb_gp_head)
2076 continue;
2077
2078
2079 tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);
2080 *tail = rdp->nocb_gp_head;
2081 smp_mb__after_atomic();
2082 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2083
2084
2085
2086
2087 swake_up(&rdp->nocb_wq);
2088 }
2089 }
2090
2091
2092 if (!my_rdp->nocb_follower_head)
2093 goto wait_again;
2094}
2095
2096
2097
2098
2099
2100static void nocb_follower_wait(struct rcu_data *rdp)
2101{
2102 bool firsttime = true;
2103
2104 for (;;) {
2105 if (!rcu_nocb_poll) {
2106 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2107 "FollowerSleep");
2108 swait_event_interruptible(rdp->nocb_wq,
2109 READ_ONCE(rdp->nocb_follower_head));
2110 } else if (firsttime) {
2111
2112 firsttime = false;
2113 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll");
2114 }
2115 if (smp_load_acquire(&rdp->nocb_follower_head)) {
2116
2117 return;
2118 }
2119 if (!rcu_nocb_poll)
2120 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2121 "WokeEmpty");
2122 WARN_ON(signal_pending(current));
2123 schedule_timeout_interruptible(1);
2124 }
2125}
2126
2127
2128
2129
2130
2131
2132
2133static int rcu_nocb_kthread(void *arg)
2134{
2135 int c, cl;
2136 struct rcu_head *list;
2137 struct rcu_head *next;
2138 struct rcu_head **tail;
2139 struct rcu_data *rdp = arg;
2140
2141
2142 for (;;) {
2143
2144 if (rdp->nocb_leader == rdp)
2145 nocb_leader_wait(rdp);
2146 else
2147 nocb_follower_wait(rdp);
2148
2149
2150 list = READ_ONCE(rdp->nocb_follower_head);
2151 BUG_ON(!list);
2152 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");
2153 WRITE_ONCE(rdp->nocb_follower_head, NULL);
2154 tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head);
2155
2156
2157 trace_rcu_batch_start(rdp->rsp->name,
2158 atomic_long_read(&rdp->nocb_q_count_lazy),
2159 atomic_long_read(&rdp->nocb_q_count), -1);
2160 c = cl = 0;
2161 while (list) {
2162 next = list->next;
2163
2164 while (next == NULL && &list->next != tail) {
2165 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2166 TPS("WaitQueue"));
2167 schedule_timeout_interruptible(1);
2168 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2169 TPS("WokeQueue"));
2170 next = list->next;
2171 }
2172 debug_rcu_head_unqueue(list);
2173 local_bh_disable();
2174 if (__rcu_reclaim(rdp->rsp->name, list))
2175 cl++;
2176 c++;
2177 local_bh_enable();
2178 cond_resched_rcu_qs();
2179 list = next;
2180 }
2181 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2182 smp_mb__before_atomic();
2183 atomic_long_add(-c, &rdp->nocb_q_count);
2184 atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
2185 rdp->n_nocbs_invoked += c;
2186 }
2187 return 0;
2188}
2189
2190
2191static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2192{
2193 return READ_ONCE(rdp->nocb_defer_wakeup);
2194}
2195
2196
2197static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2198{
2199 int ndw;
2200
2201 if (!rcu_nocb_need_deferred_wakeup(rdp))
2202 return;
2203 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2204 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT);
2205 wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
2206 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
2207}
2208
2209void __init rcu_init_nohz(void)
2210{
2211 int cpu;
2212 bool need_rcu_nocb_mask = true;
2213 struct rcu_state *rsp;
2214
2215#ifdef CONFIG_RCU_NOCB_CPU_NONE
2216 need_rcu_nocb_mask = false;
2217#endif
2218
2219#if defined(CONFIG_NO_HZ_FULL)
2220 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
2221 need_rcu_nocb_mask = true;
2222#endif
2223
2224 if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
2225 if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
2226 pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
2227 return;
2228 }
2229 have_rcu_nocb_mask = true;
2230 }
2231 if (!have_rcu_nocb_mask)
2232 return;
2233
2234#ifdef CONFIG_RCU_NOCB_CPU_ZERO
2235 pr_info("\tOffload RCU callbacks from CPU 0\n");
2236 cpumask_set_cpu(0, rcu_nocb_mask);
2237#endif
2238#ifdef CONFIG_RCU_NOCB_CPU_ALL
2239 pr_info("\tOffload RCU callbacks from all CPUs\n");
2240 cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
2241#endif
2242#if defined(CONFIG_NO_HZ_FULL)
2243 if (tick_nohz_full_running)
2244 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2245#endif
2246
2247 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
2248 pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
2249 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2250 rcu_nocb_mask);
2251 }
2252 pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n",
2253 cpumask_pr_args(rcu_nocb_mask));
2254 if (rcu_nocb_poll)
2255 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2256
2257 for_each_rcu_flavor(rsp) {
2258 for_each_cpu(cpu, rcu_nocb_mask)
2259 init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu));
2260 rcu_organize_nocb_kthreads(rsp);
2261 }
2262}
2263
2264
2265static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2266{
2267 rdp->nocb_tail = &rdp->nocb_head;
2268 init_swait_queue_head(&rdp->nocb_wq);
2269 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2270}
2271
2272
2273
2274
2275
2276
2277
2278static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
2279{
2280 struct rcu_data *rdp;
2281 struct rcu_data *rdp_last;
2282 struct rcu_data *rdp_old_leader;
2283 struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);
2284 struct task_struct *t;
2285
2286
2287
2288
2289
2290 if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread)
2291 return;
2292
2293
2294 rdp_old_leader = rdp_spawn->nocb_leader;
2295 if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {
2296 rdp_last = NULL;
2297 rdp = rdp_old_leader;
2298 do {
2299 rdp->nocb_leader = rdp_spawn;
2300 if (rdp_last && rdp != rdp_spawn)
2301 rdp_last->nocb_next_follower = rdp;
2302 if (rdp == rdp_spawn) {
2303 rdp = rdp->nocb_next_follower;
2304 } else {
2305 rdp_last = rdp;
2306 rdp = rdp->nocb_next_follower;
2307 rdp_last->nocb_next_follower = NULL;
2308 }
2309 } while (rdp);
2310 rdp_spawn->nocb_next_follower = rdp_old_leader;
2311 }
2312
2313
2314 t = kthread_run(rcu_nocb_kthread, rdp_spawn,
2315 "rcuo%c/%d", rsp->abbr, cpu);
2316 BUG_ON(IS_ERR(t));
2317 WRITE_ONCE(rdp_spawn->nocb_kthread, t);
2318}
2319
2320
2321
2322
2323
2324static void rcu_spawn_all_nocb_kthreads(int cpu)
2325{
2326 struct rcu_state *rsp;
2327
2328 if (rcu_scheduler_fully_active)
2329 for_each_rcu_flavor(rsp)
2330 rcu_spawn_one_nocb_kthread(rsp, cpu);
2331}
2332
2333
2334
2335
2336
2337
2338
2339static void __init rcu_spawn_nocb_kthreads(void)
2340{
2341 int cpu;
2342
2343 for_each_online_cpu(cpu)
2344 rcu_spawn_all_nocb_kthreads(cpu);
2345}
2346
2347
2348static int rcu_nocb_leader_stride = -1;
2349module_param(rcu_nocb_leader_stride, int, 0444);
2350
2351
2352
2353
2354static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
2355{
2356 int cpu;
2357 int ls = rcu_nocb_leader_stride;
2358 int nl = 0;
2359 struct rcu_data *rdp;
2360 struct rcu_data *rdp_leader = NULL;
2361 struct rcu_data *rdp_prev = NULL;
2362
2363 if (!have_rcu_nocb_mask)
2364 return;
2365 if (ls == -1) {
2366 ls = int_sqrt(nr_cpu_ids);
2367 rcu_nocb_leader_stride = ls;
2368 }
2369
2370
2371
2372
2373
2374
2375 for_each_cpu(cpu, rcu_nocb_mask) {
2376 rdp = per_cpu_ptr(rsp->rda, cpu);
2377 if (rdp->cpu >= nl) {
2378
2379 nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
2380 rdp->nocb_leader = rdp;
2381 rdp_leader = rdp;
2382 } else {
2383
2384 rdp->nocb_leader = rdp_leader;
2385 rdp_prev->nocb_next_follower = rdp;
2386 }
2387 rdp_prev = rdp;
2388 }
2389}
2390
2391
2392static bool init_nocb_callback_list(struct rcu_data *rdp)
2393{
2394 if (!rcu_is_nocb_cpu(rdp->cpu))
2395 return false;
2396
2397
2398 if (rdp->nxtlist) {
2399 rdp->nocb_head = rdp->nxtlist;
2400 rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL];
2401 atomic_long_set(&rdp->nocb_q_count, rdp->qlen);
2402 atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy);
2403 rdp->nxtlist = NULL;
2404 rdp->qlen = 0;
2405 rdp->qlen_lazy = 0;
2406 }
2407 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2408 return true;
2409}
2410
2411#else
2412
2413static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
2414{
2415 WARN_ON_ONCE(1);
2416 return false;
2417}
2418
2419static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
2420{
2421}
2422
2423static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
2424{
2425}
2426
2427static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
2428{
2429 return NULL;
2430}
2431
2432static void rcu_init_one_nocb(struct rcu_node *rnp)
2433{
2434}
2435
2436static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2437 bool lazy, unsigned long flags)
2438{
2439 return false;
2440}
2441
2442static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2443 struct rcu_data *rdp,
2444 unsigned long flags)
2445{
2446 return false;
2447}
2448
2449static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2450{
2451}
2452
2453static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2454{
2455 return false;
2456}
2457
2458static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2459{
2460}
2461
2462static void rcu_spawn_all_nocb_kthreads(int cpu)
2463{
2464}
2465
2466static void __init rcu_spawn_nocb_kthreads(void)
2467{
2468}
2469
2470static bool init_nocb_callback_list(struct rcu_data *rdp)
2471{
2472 return false;
2473}
2474
2475#endif
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2487{
2488#ifdef CONFIG_NO_HZ_FULL
2489 if (tick_nohz_full_cpu(cpu))
2490 smp_send_reschedule(cpu);
2491#endif
2492}
2493
2494
2495#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2496
2497static int full_sysidle_state;
2498#define RCU_SYSIDLE_NOT 0
2499#define RCU_SYSIDLE_SHORT 1
2500#define RCU_SYSIDLE_LONG 2
2501#define RCU_SYSIDLE_FULL 3
2502#define RCU_SYSIDLE_FULL_NOTED 4
2503
2504
2505
2506
2507
2508
2509
2510static void rcu_sysidle_enter(int irq)
2511{
2512 unsigned long j;
2513 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
2514
2515
2516 if (!tick_nohz_full_enabled())
2517 return;
2518
2519
2520 if (irq) {
2521 rdtp->dynticks_idle_nesting--;
2522 WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
2523 if (rdtp->dynticks_idle_nesting != 0)
2524 return;
2525 } else {
2526 if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
2527 DYNTICK_TASK_NEST_VALUE) {
2528 rdtp->dynticks_idle_nesting = 0;
2529 } else {
2530 rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
2531 WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
2532 return;
2533 }
2534 }
2535
2536
2537 j = jiffies;
2538 WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
2539 smp_mb__before_atomic();
2540 atomic_inc(&rdtp->dynticks_idle);
2541 smp_mb__after_atomic();
2542 WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
2543}
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554void rcu_sysidle_force_exit(void)
2555{
2556 int oldstate = READ_ONCE(full_sysidle_state);
2557 int newoldstate;
2558
2559
2560
2561
2562
2563
2564 while (oldstate > RCU_SYSIDLE_SHORT) {
2565 newoldstate = cmpxchg(&full_sysidle_state,
2566 oldstate, RCU_SYSIDLE_NOT);
2567 if (oldstate == newoldstate &&
2568 oldstate == RCU_SYSIDLE_FULL_NOTED) {
2569 rcu_kick_nohz_cpu(tick_do_timer_cpu);
2570 return;
2571 }
2572 oldstate = newoldstate;
2573 }
2574 smp_mb();
2575}
2576
2577
2578
2579
2580
2581
2582static void rcu_sysidle_exit(int irq)
2583{
2584 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
2585
2586
2587 if (!tick_nohz_full_enabled())
2588 return;
2589
2590
2591 if (irq) {
2592 rdtp->dynticks_idle_nesting++;
2593 WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
2594 if (rdtp->dynticks_idle_nesting != 1)
2595 return;
2596 } else {
2597
2598
2599
2600
2601
2602 if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
2603 rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
2604 WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
2605 return;
2606 } else {
2607 rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
2608 }
2609 }
2610
2611
2612 smp_mb__before_atomic();
2613 atomic_inc(&rdtp->dynticks_idle);
2614 smp_mb__after_atomic();
2615 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626 if (smp_processor_id() == tick_do_timer_cpu)
2627 return;
2628
2629
2630 rcu_sysidle_force_exit();
2631}
2632
2633
2634
2635
2636
2637
2638static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2639 unsigned long *maxj)
2640{
2641 int cur;
2642 unsigned long j;
2643 struct rcu_dynticks *rdtp = rdp->dynticks;
2644
2645
2646 if (!tick_nohz_full_enabled())
2647 return;
2648
2649
2650
2651
2652
2653
2654 if (!*isidle || rdp->rsp != rcu_state_p ||
2655 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2656 return;
2657
2658 WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
2659
2660
2661 cur = atomic_read(&rdtp->dynticks_idle);
2662 if (cur & 0x1) {
2663 *isidle = false;
2664 return;
2665 }
2666 smp_mb();
2667
2668
2669 j = READ_ONCE(rdtp->dynticks_idle_jiffies);
2670
2671 if (ULONG_CMP_LT(*maxj, j))
2672 *maxj = j;
2673}
2674
2675
2676
2677
2678static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2679{
2680 return rsp == rcu_state_p;
2681}
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692static unsigned long rcu_sysidle_delay(void)
2693{
2694 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2695 return 0;
2696 return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
2697}
2698
2699
2700
2701
2702
2703static void rcu_sysidle(unsigned long j)
2704{
2705
2706 switch (READ_ONCE(full_sysidle_state)) {
2707 case RCU_SYSIDLE_NOT:
2708
2709
2710 WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
2711 break;
2712
2713 case RCU_SYSIDLE_SHORT:
2714
2715
2716
2717
2718
2719 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2720 (void)cmpxchg(&full_sysidle_state,
2721 RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
2722 break;
2723
2724 case RCU_SYSIDLE_LONG:
2725
2726
2727
2728
2729
2730 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2731 (void)cmpxchg(&full_sysidle_state,
2732 RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
2733 break;
2734
2735 default:
2736 break;
2737 }
2738}
2739
2740
2741
2742
2743
2744static void rcu_sysidle_cancel(void)
2745{
2746 smp_mb();
2747 if (full_sysidle_state > RCU_SYSIDLE_SHORT)
2748 WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
2749}
2750
2751
2752
2753
2754
2755static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2756 unsigned long maxj, bool gpkt)
2757{
2758 if (rsp != rcu_state_p)
2759 return;
2760 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2761 return;
2762 if (isidle)
2763 rcu_sysidle(maxj);
2764 else
2765 rcu_sysidle_cancel();
2766}
2767
2768
2769
2770
2771
2772static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2773 unsigned long maxj)
2774{
2775
2776 if (!tick_nohz_full_enabled())
2777 return;
2778
2779 rcu_sysidle_report(rsp, isidle, maxj, true);
2780}
2781
2782
2783struct rcu_sysidle_head {
2784 struct rcu_head rh;
2785 int inuse;
2786};
2787
2788static void rcu_sysidle_cb(struct rcu_head *rhp)
2789{
2790 struct rcu_sysidle_head *rshp;
2791
2792
2793
2794
2795
2796
2797 smp_mb();
2798
2799 rshp = container_of(rhp, struct rcu_sysidle_head, rh);
2800 WRITE_ONCE(rshp->inuse, 0);
2801}
2802
2803
2804
2805
2806
2807
2808bool rcu_sys_is_idle(void)
2809{
2810 static struct rcu_sysidle_head rsh;
2811 int rss = READ_ONCE(full_sysidle_state);
2812
2813 if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
2814 return false;
2815
2816
2817 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
2818 int oldrss = rss - 1;
2819
2820
2821
2822
2823
2824 while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
2825 int cpu;
2826 bool isidle = true;
2827 unsigned long maxj = jiffies - ULONG_MAX / 4;
2828 struct rcu_data *rdp;
2829
2830
2831 for_each_possible_cpu(cpu) {
2832 rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
2833 rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
2834 if (!isidle)
2835 break;
2836 }
2837 rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
2838 oldrss = rss;
2839 rss = READ_ONCE(full_sysidle_state);
2840 }
2841 }
2842
2843
2844 if (rss == RCU_SYSIDLE_FULL) {
2845 rss = cmpxchg(&full_sysidle_state,
2846 RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
2847 return rss == RCU_SYSIDLE_FULL;
2848 }
2849
2850 smp_mb();
2851
2852
2853 if (rss == RCU_SYSIDLE_FULL_NOTED)
2854 return true;
2855
2856
2857
2858
2859
2860
2861
2862
2863 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
2864 !rcu_gp_in_progress(rcu_state_p) &&
2865 !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
2866 call_rcu(&rsh.rh, rcu_sysidle_cb);
2867 return false;
2868}
2869
2870
2871
2872
2873static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2874{
2875 rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
2876}
2877
2878#else
2879
2880static void rcu_sysidle_enter(int irq)
2881{
2882}
2883
2884static void rcu_sysidle_exit(int irq)
2885{
2886}
2887
2888static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2889 unsigned long *maxj)
2890{
2891}
2892
2893static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2894{
2895 return false;
2896}
2897
2898static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2899 unsigned long maxj)
2900{
2901}
2902
2903static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2904{
2905}
2906
2907#endif
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
2919{
2920#ifdef CONFIG_NO_HZ_FULL
2921 if (tick_nohz_full_cpu(smp_processor_id()) &&
2922 (!rcu_gp_in_progress(rsp) ||
2923 ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ)))
2924 return true;
2925#endif
2926 return false;
2927}
2928
2929
2930
2931
2932
2933static void rcu_bind_gp_kthread(void)
2934{
2935 int __maybe_unused cpu;
2936
2937 if (!tick_nohz_full_enabled())
2938 return;
2939#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2940 cpu = tick_do_timer_cpu;
2941 if (cpu >= 0 && cpu < nr_cpu_ids)
2942 set_cpus_allowed_ptr(current, cpumask_of(cpu));
2943#else
2944 housekeeping_affine(current);
2945#endif
2946}
2947
2948
2949static void rcu_dynticks_task_enter(void)
2950{
2951#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
2952 WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
2953#endif
2954}
2955
2956
2957static void rcu_dynticks_task_exit(void)
2958{
2959#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
2960 WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
2961#endif
2962}
2963