1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30#include <linux/types.h>
31#include <linux/kernel.h>
32#include <linux/init.h>
33#include <linux/spinlock.h>
34#include <linux/smp.h>
35#include <linux/rcupdate.h>
36#include <linux/interrupt.h>
37#include <linux/sched.h>
38#include <linux/nmi.h>
39#include <linux/atomic.h>
40#include <linux/bitops.h>
41#include <linux/export.h>
42#include <linux/completion.h>
43#include <linux/moduleparam.h>
44#include <linux/percpu.h>
45#include <linux/notifier.h>
46#include <linux/cpu.h>
47#include <linux/mutex.h>
48#include <linux/time.h>
49#include <linux/kernel_stat.h>
50#include <linux/wait.h>
51#include <linux/kthread.h>
52#include <linux/prefetch.h>
53#include <linux/delay.h>
54#include <linux/stop_machine.h>
55#include <linux/random.h>
56#include <linux/ftrace_event.h>
57#include <linux/suspend.h>
58
59#include "rcutree.h"
60#include <trace/events/rcu.h>
61
62#include "rcu.h"
63
64
65
66
67
68
69#define TPS(x) tracepoint_string(x)
70
71
72
73static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
74static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
75
76
77
78
79
80
81
82
83
84#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
85static char sname##_varname[] = #sname; \
86static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
87struct rcu_state sname##_state = { \
88 .level = { &sname##_state.node[0] }, \
89 .call = cr, \
90 .fqs_state = RCU_GP_IDLE, \
91 .gpnum = 0UL - 300UL, \
92 .completed = 0UL - 300UL, \
93 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
94 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
95 .orphan_donetail = &sname##_state.orphan_donelist, \
96 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
97 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
98 .name = sname##_varname, \
99 .abbr = sabbr, \
100}; \
101DEFINE_PER_CPU(struct rcu_data, sname##_data)
102
103RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
104RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
105
106static struct rcu_state *rcu_state;
107LIST_HEAD(rcu_struct_flavors);
108
109
110static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF;
111module_param(rcu_fanout_leaf, int, 0444);
112int rcu_num_lvls __read_mostly = RCU_NUM_LVLS;
113static int num_rcu_lvl[] = {
114 NUM_RCU_LVL_0,
115 NUM_RCU_LVL_1,
116 NUM_RCU_LVL_2,
117 NUM_RCU_LVL_3,
118 NUM_RCU_LVL_4,
119};
120int rcu_num_nodes __read_mostly = NUM_RCU_NODES;
121
122
123
124
125
126
127
128
129
130
131int rcu_scheduler_active __read_mostly;
132EXPORT_SYMBOL_GPL(rcu_scheduler_active);
133
134
135
136
137
138
139
140
141
142
143
144
145
146static int rcu_scheduler_fully_active __read_mostly;
147
148#ifdef CONFIG_RCU_BOOST
149
150
151
152
153
154static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
155DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
156DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
157DEFINE_PER_CPU(char, rcu_cpu_has_work);
158
159#endif
160
161static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
162static void invoke_rcu_core(void);
163static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
164
165
166
167
168
169
170
171
172
173
174unsigned long rcutorture_testseq;
175unsigned long rcutorture_vernum;
176
177
178
179
180
181
182static int rcu_gp_in_progress(struct rcu_state *rsp)
183{
184 return ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum);
185}
186
187
188
189
190
191
192
193void rcu_sched_qs(int cpu)
194{
195 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
196
197 if (rdp->passed_quiesce == 0)
198 trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
199 rdp->passed_quiesce = 1;
200}
201
202void rcu_bh_qs(int cpu)
203{
204 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
205
206 if (rdp->passed_quiesce == 0)
207 trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
208 rdp->passed_quiesce = 1;
209}
210
211
212
213
214
215
216void rcu_note_context_switch(int cpu)
217{
218 trace_rcu_utilization(TPS("Start context switch"));
219 rcu_sched_qs(cpu);
220 rcu_preempt_note_context_switch(cpu);
221 trace_rcu_utilization(TPS("End context switch"));
222}
223EXPORT_SYMBOL_GPL(rcu_note_context_switch);
224
225DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
226 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
227 .dynticks = ATOMIC_INIT(1),
228#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
229 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
230 .dynticks_idle = ATOMIC_INIT(1),
231#endif
232};
233
234static long blimit = 10;
235static long qhimark = 10000;
236static long qlowmark = 100;
237
238module_param(blimit, long, 0444);
239module_param(qhimark, long, 0444);
240module_param(qlowmark, long, 0444);
241
242static ulong jiffies_till_first_fqs = ULONG_MAX;
243static ulong jiffies_till_next_fqs = ULONG_MAX;
244
245module_param(jiffies_till_first_fqs, ulong, 0644);
246module_param(jiffies_till_next_fqs, ulong, 0644);
247
248static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
249 struct rcu_data *rdp);
250static void force_qs_rnp(struct rcu_state *rsp,
251 int (*f)(struct rcu_data *rsp, bool *isidle,
252 unsigned long *maxj),
253 bool *isidle, unsigned long *maxj);
254static void force_quiescent_state(struct rcu_state *rsp);
255static int rcu_pending(int cpu);
256
257
258
259
260long rcu_batches_completed_sched(void)
261{
262 return rcu_sched_state.completed;
263}
264EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
265
266
267
268
269long rcu_batches_completed_bh(void)
270{
271 return rcu_bh_state.completed;
272}
273EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
274
275
276
277
278void rcu_bh_force_quiescent_state(void)
279{
280 force_quiescent_state(&rcu_bh_state);
281}
282EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
283
284
285
286
287
288
289
290
291void rcutorture_record_test_transition(void)
292{
293 rcutorture_testseq++;
294 rcutorture_vernum = 0;
295}
296EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
297
298
299
300
301
302
303void rcutorture_record_progress(unsigned long vernum)
304{
305 rcutorture_vernum++;
306}
307EXPORT_SYMBOL_GPL(rcutorture_record_progress);
308
309
310
311
312void rcu_sched_force_quiescent_state(void)
313{
314 force_quiescent_state(&rcu_sched_state);
315}
316EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
317
318
319
320
321static int
322cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
323{
324 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
325 rdp->nxttail[RCU_DONE_TAIL] != NULL;
326}
327
328
329
330
331
332
333static int
334cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
335{
336 int i;
337
338 if (rcu_gp_in_progress(rsp))
339 return 0;
340 if (rcu_nocb_needs_gp(rsp))
341 return 1;
342 if (!rdp->nxttail[RCU_NEXT_TAIL])
343 return 0;
344 if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
345 return 1;
346 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
347 if (rdp->nxttail[i - 1] != rdp->nxttail[i] &&
348 ULONG_CMP_LT(ACCESS_ONCE(rsp->completed),
349 rdp->nxtcompleted[i]))
350 return 1;
351 return 0;
352}
353
354
355
356
357static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
358{
359 return &rsp->node[0];
360}
361
362
363
364
365
366
367
368
369static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
370 bool user)
371{
372 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
373 if (!user && !is_idle_task(current)) {
374 struct task_struct *idle = idle_task(smp_processor_id());
375
376 trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
377 ftrace_dump(DUMP_ORIG);
378 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
379 current->pid, current->comm,
380 idle->pid, idle->comm);
381 }
382 rcu_prepare_for_idle(smp_processor_id());
383
384 smp_mb__before_atomic_inc();
385 atomic_inc(&rdtp->dynticks);
386 smp_mb__after_atomic_inc();
387 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
388
389
390
391
392
393 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
394 "Illegal idle entry in RCU read-side critical section.");
395 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
396 "Illegal idle entry in RCU-bh read-side critical section.");
397 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
398 "Illegal idle entry in RCU-sched read-side critical section.");
399}
400
401
402
403
404
405static void rcu_eqs_enter(bool user)
406{
407 long long oldval;
408 struct rcu_dynticks *rdtp;
409
410 rdtp = &__get_cpu_var(rcu_dynticks);
411 oldval = rdtp->dynticks_nesting;
412 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
413 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
414 rdtp->dynticks_nesting = 0;
415 else
416 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
417 rcu_eqs_enter_common(rdtp, oldval, user);
418}
419
420
421
422
423
424
425
426
427
428
429
430
431
432void rcu_idle_enter(void)
433{
434 unsigned long flags;
435
436 local_irq_save(flags);
437 rcu_eqs_enter(false);
438 rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
439 local_irq_restore(flags);
440}
441EXPORT_SYMBOL_GPL(rcu_idle_enter);
442
443#ifdef CONFIG_RCU_USER_QS
444
445
446
447
448
449
450
451
452void rcu_user_enter(void)
453{
454 rcu_eqs_enter(1);
455}
456#endif
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474void rcu_irq_exit(void)
475{
476 unsigned long flags;
477 long long oldval;
478 struct rcu_dynticks *rdtp;
479
480 local_irq_save(flags);
481 rdtp = &__get_cpu_var(rcu_dynticks);
482 oldval = rdtp->dynticks_nesting;
483 rdtp->dynticks_nesting--;
484 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
485 if (rdtp->dynticks_nesting)
486 trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting);
487 else
488 rcu_eqs_enter_common(rdtp, oldval, true);
489 rcu_sysidle_enter(rdtp, 1);
490 local_irq_restore(flags);
491}
492
493
494
495
496
497
498
499
500static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
501 int user)
502{
503 smp_mb__before_atomic_inc();
504 atomic_inc(&rdtp->dynticks);
505
506 smp_mb__after_atomic_inc();
507 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
508 rcu_cleanup_after_idle(smp_processor_id());
509 trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
510 if (!user && !is_idle_task(current)) {
511 struct task_struct *idle = idle_task(smp_processor_id());
512
513 trace_rcu_dyntick(TPS("Error on exit: not idle task"),
514 oldval, rdtp->dynticks_nesting);
515 ftrace_dump(DUMP_ORIG);
516 WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
517 current->pid, current->comm,
518 idle->pid, idle->comm);
519 }
520}
521
522
523
524
525
526static void rcu_eqs_exit(bool user)
527{
528 struct rcu_dynticks *rdtp;
529 long long oldval;
530
531 rdtp = &__get_cpu_var(rcu_dynticks);
532 oldval = rdtp->dynticks_nesting;
533 WARN_ON_ONCE(oldval < 0);
534 if (oldval & DYNTICK_TASK_NEST_MASK)
535 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
536 else
537 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
538 rcu_eqs_exit_common(rdtp, oldval, user);
539}
540
541
542
543
544
545
546
547
548
549
550
551
552void rcu_idle_exit(void)
553{
554 unsigned long flags;
555
556 local_irq_save(flags);
557 rcu_eqs_exit(false);
558 rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
559 local_irq_restore(flags);
560}
561EXPORT_SYMBOL_GPL(rcu_idle_exit);
562
563#ifdef CONFIG_RCU_USER_QS
564
565
566
567
568
569
570void rcu_user_exit(void)
571{
572 rcu_eqs_exit(1);
573}
574#endif
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595void rcu_irq_enter(void)
596{
597 unsigned long flags;
598 struct rcu_dynticks *rdtp;
599 long long oldval;
600
601 local_irq_save(flags);
602 rdtp = &__get_cpu_var(rcu_dynticks);
603 oldval = rdtp->dynticks_nesting;
604 rdtp->dynticks_nesting++;
605 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
606 if (oldval)
607 trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
608 else
609 rcu_eqs_exit_common(rdtp, oldval, true);
610 rcu_sysidle_exit(rdtp, 1);
611 local_irq_restore(flags);
612}
613
614
615
616
617
618
619
620
621void rcu_nmi_enter(void)
622{
623 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
624
625 if (rdtp->dynticks_nmi_nesting == 0 &&
626 (atomic_read(&rdtp->dynticks) & 0x1))
627 return;
628 rdtp->dynticks_nmi_nesting++;
629 smp_mb__before_atomic_inc();
630 atomic_inc(&rdtp->dynticks);
631
632 smp_mb__after_atomic_inc();
633 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
634}
635
636
637
638
639
640
641
642
643void rcu_nmi_exit(void)
644{
645 struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
646
647 if (rdtp->dynticks_nmi_nesting == 0 ||
648 --rdtp->dynticks_nmi_nesting != 0)
649 return;
650
651 smp_mb__before_atomic_inc();
652 atomic_inc(&rdtp->dynticks);
653 smp_mb__after_atomic_inc();
654 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
655}
656
657
658
659
660
661
662
663int rcu_is_cpu_idle(void)
664{
665 int ret;
666
667 preempt_disable();
668 ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
669 preempt_enable();
670 return ret;
671}
672EXPORT_SYMBOL(rcu_is_cpu_idle);
673
674#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697bool rcu_lockdep_current_cpu_online(void)
698{
699 struct rcu_data *rdp;
700 struct rcu_node *rnp;
701 bool ret;
702
703 if (in_nmi())
704 return 1;
705 preempt_disable();
706 rdp = &__get_cpu_var(rcu_sched_data);
707 rnp = rdp->mynode;
708 ret = (rdp->grpmask & rnp->qsmaskinit) ||
709 !rcu_scheduler_fully_active;
710 preempt_enable();
711 return ret;
712}
713EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
714
715#endif
716
717
718
719
720
721
722
723
724static int rcu_is_cpu_rrupt_from_idle(void)
725{
726 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
727}
728
729
730
731
732
733
734static int dyntick_save_progress_counter(struct rcu_data *rdp,
735 bool *isidle, unsigned long *maxj)
736{
737 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
738 rcu_sysidle_check_cpu(rdp, isidle, maxj);
739 return (rdp->dynticks_snap & 0x1) == 0;
740}
741
742
743
744
745
746
747
748static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
749 bool *isidle, unsigned long *maxj)
750{
751 unsigned int curr;
752 unsigned int snap;
753
754 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
755 snap = (unsigned int)rdp->dynticks_snap;
756
757
758
759
760
761
762
763
764
765 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
766 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
767 rdp->dynticks_fqs++;
768 return 1;
769 }
770
771
772
773
774
775
776
777
778
779
780
781
782 if (ULONG_CMP_GE(rdp->rsp->gp_start + 2, jiffies))
783 return 0;
784 barrier();
785 if (cpu_is_offline(rdp->cpu)) {
786 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
787 rdp->offline_fqs++;
788 return 1;
789 }
790
791
792
793
794
795
796
797
798 rcu_kick_nohz_cpu(rdp->cpu);
799
800 return 0;
801}
802
803static void record_gp_stall_check_time(struct rcu_state *rsp)
804{
805 rsp->gp_start = jiffies;
806 rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
807}
808
809
810
811
812
813
814
815static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
816{
817 int cpu;
818 unsigned long flags;
819 struct rcu_node *rnp;
820
821 rcu_for_each_leaf_node(rsp, rnp) {
822 raw_spin_lock_irqsave(&rnp->lock, flags);
823 if (rnp->qsmask != 0) {
824 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
825 if (rnp->qsmask & (1UL << cpu))
826 dump_cpu_task(rnp->grplo + cpu);
827 }
828 raw_spin_unlock_irqrestore(&rnp->lock, flags);
829 }
830}
831
832static void print_other_cpu_stall(struct rcu_state *rsp)
833{
834 int cpu;
835 long delta;
836 unsigned long flags;
837 int ndetected = 0;
838 struct rcu_node *rnp = rcu_get_root(rsp);
839 long totqlen = 0;
840
841
842
843 raw_spin_lock_irqsave(&rnp->lock, flags);
844 delta = jiffies - rsp->jiffies_stall;
845 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
846 raw_spin_unlock_irqrestore(&rnp->lock, flags);
847 return;
848 }
849 rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
850 raw_spin_unlock_irqrestore(&rnp->lock, flags);
851
852
853
854
855
856
857 pr_err("INFO: %s detected stalls on CPUs/tasks:",
858 rsp->name);
859 print_cpu_stall_info_begin();
860 rcu_for_each_leaf_node(rsp, rnp) {
861 raw_spin_lock_irqsave(&rnp->lock, flags);
862 ndetected += rcu_print_task_stall(rnp);
863 if (rnp->qsmask != 0) {
864 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
865 if (rnp->qsmask & (1UL << cpu)) {
866 print_cpu_stall_info(rsp,
867 rnp->grplo + cpu);
868 ndetected++;
869 }
870 }
871 raw_spin_unlock_irqrestore(&rnp->lock, flags);
872 }
873
874
875
876
877
878 rnp = rcu_get_root(rsp);
879 raw_spin_lock_irqsave(&rnp->lock, flags);
880 ndetected += rcu_print_task_stall(rnp);
881 raw_spin_unlock_irqrestore(&rnp->lock, flags);
882
883 print_cpu_stall_info_end();
884 for_each_possible_cpu(cpu)
885 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
886 pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
887 smp_processor_id(), (long)(jiffies - rsp->gp_start),
888 rsp->gpnum, rsp->completed, totqlen);
889 if (ndetected == 0)
890 pr_err("INFO: Stall ended before state dump start\n");
891 else if (!trigger_all_cpu_backtrace())
892 rcu_dump_cpu_stacks(rsp);
893
894
895
896 rcu_print_detail_task_stall(rsp);
897
898 force_quiescent_state(rsp);
899}
900
901static void print_cpu_stall(struct rcu_state *rsp)
902{
903 int cpu;
904 unsigned long flags;
905 struct rcu_node *rnp = rcu_get_root(rsp);
906 long totqlen = 0;
907
908
909
910
911
912
913 pr_err("INFO: %s self-detected stall on CPU", rsp->name);
914 print_cpu_stall_info_begin();
915 print_cpu_stall_info(rsp, smp_processor_id());
916 print_cpu_stall_info_end();
917 for_each_possible_cpu(cpu)
918 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
919 pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
920 jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
921 if (!trigger_all_cpu_backtrace())
922 dump_stack();
923
924 raw_spin_lock_irqsave(&rnp->lock, flags);
925 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
926 rsp->jiffies_stall = jiffies +
927 3 * rcu_jiffies_till_stall_check() + 3;
928 raw_spin_unlock_irqrestore(&rnp->lock, flags);
929
930 set_need_resched();
931}
932
933static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
934{
935 unsigned long j;
936 unsigned long js;
937 struct rcu_node *rnp;
938
939 if (rcu_cpu_stall_suppress)
940 return;
941 j = ACCESS_ONCE(jiffies);
942 js = ACCESS_ONCE(rsp->jiffies_stall);
943 rnp = rdp->mynode;
944 if (rcu_gp_in_progress(rsp) &&
945 (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
946
947
948 print_cpu_stall(rsp);
949
950 } else if (rcu_gp_in_progress(rsp) &&
951 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY)) {
952
953
954 print_other_cpu_stall(rsp);
955 }
956}
957
958
959
960
961
962
963
964
965
966
967void rcu_cpu_stall_reset(void)
968{
969 struct rcu_state *rsp;
970
971 for_each_rcu_flavor(rsp)
972 rsp->jiffies_stall = jiffies + ULONG_MAX / 2;
973}
974
975
976
977
978static void init_callback_list(struct rcu_data *rdp)
979{
980 int i;
981
982 if (init_nocb_callback_list(rdp))
983 return;
984 rdp->nxtlist = NULL;
985 for (i = 0; i < RCU_NEXT_SIZE; i++)
986 rdp->nxttail[i] = &rdp->nxtlist;
987}
988
989
990
991
992
993
994
995
996
997
998static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
999 struct rcu_node *rnp)
1000{
1001
1002
1003
1004
1005
1006
1007
1008 if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed)
1009 return rnp->completed + 1;
1010
1011
1012
1013
1014
1015 return rnp->completed + 2;
1016}
1017
1018
1019
1020
1021
1022static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1023 unsigned long c, const char *s)
1024{
1025 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
1026 rnp->completed, c, rnp->level,
1027 rnp->grplo, rnp->grphi, s);
1028}
1029
1030
1031
1032
1033
1034
1035
1036
1037static unsigned long __maybe_unused
1038rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
1039{
1040 unsigned long c;
1041 int i;
1042 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1043
1044
1045
1046
1047
1048 c = rcu_cbs_completed(rdp->rsp, rnp);
1049 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1050 if (rnp->need_future_gp[c & 0x1]) {
1051 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1052 return c;
1053 }
1054
1055
1056
1057
1058
1059
1060
1061
1062 if (rnp->gpnum != rnp->completed ||
1063 ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
1064 rnp->need_future_gp[c & 0x1]++;
1065 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
1066 return c;
1067 }
1068
1069
1070
1071
1072
1073
1074 if (rnp != rnp_root)
1075 raw_spin_lock(&rnp_root->lock);
1076
1077
1078
1079
1080
1081
1082
1083 c = rcu_cbs_completed(rdp->rsp, rnp_root);
1084 for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
1085 if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
1086 rdp->nxtcompleted[i] = c;
1087
1088
1089
1090
1091
1092 if (rnp_root->need_future_gp[c & 0x1]) {
1093 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
1094 goto unlock_out;
1095 }
1096
1097
1098 rnp_root->need_future_gp[c & 0x1]++;
1099
1100
1101 if (rnp_root->gpnum != rnp_root->completed) {
1102 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1103 } else {
1104 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1105 rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1106 }
1107unlock_out:
1108 if (rnp != rnp_root)
1109 raw_spin_unlock(&rnp_root->lock);
1110 return c;
1111}
1112
1113
1114
1115
1116
1117
1118
1119static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1120{
1121 int c = rnp->completed;
1122 int needmore;
1123 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1124
1125 rcu_nocb_gp_cleanup(rsp, rnp);
1126 rnp->need_future_gp[c & 0x1] = 0;
1127 needmore = rnp->need_future_gp[(c + 1) & 0x1];
1128 trace_rcu_future_gp(rnp, rdp, c,
1129 needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1130 return needmore;
1131}
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1145 struct rcu_data *rdp)
1146{
1147 unsigned long c;
1148 int i;
1149
1150
1151 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1152 return;
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 c = rcu_cbs_completed(rsp, rnp);
1169 for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1170 if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1171 !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1172 break;
1173
1174
1175
1176
1177
1178
1179
1180 if (++i >= RCU_NEXT_TAIL)
1181 return;
1182
1183
1184
1185
1186
1187
1188 for (; i <= RCU_NEXT_TAIL; i++) {
1189 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
1190 rdp->nxtcompleted[i] = c;
1191 }
1192
1193 rcu_start_future_gp(rnp, rdp);
1194
1195
1196 if (!*rdp->nxttail[RCU_WAIT_TAIL])
1197 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1198 else
1199 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
1200}
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1212 struct rcu_data *rdp)
1213{
1214 int i, j;
1215
1216
1217 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1218 return;
1219
1220
1221
1222
1223
1224 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
1225 if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1226 break;
1227 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1228 }
1229
1230 for (j = RCU_WAIT_TAIL; j < i; j++)
1231 rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1232
1233
1234 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1235 if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1236 break;
1237 rdp->nxttail[j] = rdp->nxttail[i];
1238 rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1239 }
1240
1241
1242 rcu_accelerate_cbs(rsp, rnp, rdp);
1243}
1244
1245
1246
1247
1248
1249
1250static void __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
1251{
1252
1253 if (rdp->completed == rnp->completed) {
1254
1255
1256 rcu_accelerate_cbs(rsp, rnp, rdp);
1257
1258 } else {
1259
1260
1261 rcu_advance_cbs(rsp, rnp, rdp);
1262
1263
1264 rdp->completed = rnp->completed;
1265 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuend"));
1266 }
1267
1268 if (rdp->gpnum != rnp->gpnum) {
1269
1270
1271
1272
1273
1274 rdp->gpnum = rnp->gpnum;
1275 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
1276 rdp->passed_quiesce = 0;
1277 rdp->qs_pending = !!(rnp->qsmask & rdp->grpmask);
1278 zero_cpu_stall_ticks(rdp);
1279 }
1280}
1281
1282static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1283{
1284 unsigned long flags;
1285 struct rcu_node *rnp;
1286
1287 local_irq_save(flags);
1288 rnp = rdp->mynode;
1289 if ((rdp->gpnum == ACCESS_ONCE(rnp->gpnum) &&
1290 rdp->completed == ACCESS_ONCE(rnp->completed)) ||
1291 !raw_spin_trylock(&rnp->lock)) {
1292 local_irq_restore(flags);
1293 return;
1294 }
1295 __note_gp_changes(rsp, rnp, rdp);
1296 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1297}
1298
1299
1300
1301
1302static int rcu_gp_init(struct rcu_state *rsp)
1303{
1304 struct rcu_data *rdp;
1305 struct rcu_node *rnp = rcu_get_root(rsp);
1306
1307 rcu_bind_gp_kthread();
1308 raw_spin_lock_irq(&rnp->lock);
1309 rsp->gp_flags = 0;
1310
1311 if (rcu_gp_in_progress(rsp)) {
1312
1313 raw_spin_unlock_irq(&rnp->lock);
1314 return 0;
1315 }
1316
1317
1318 rsp->gpnum++;
1319 trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
1320 record_gp_stall_check_time(rsp);
1321 raw_spin_unlock_irq(&rnp->lock);
1322
1323
1324 mutex_lock(&rsp->onoff_mutex);
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 rcu_for_each_node_breadth_first(rsp, rnp) {
1340 raw_spin_lock_irq(&rnp->lock);
1341 rdp = this_cpu_ptr(rsp->rda);
1342 rcu_preempt_check_blocked_tasks(rnp);
1343 rnp->qsmask = rnp->qsmaskinit;
1344 ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
1345 WARN_ON_ONCE(rnp->completed != rsp->completed);
1346 ACCESS_ONCE(rnp->completed) = rsp->completed;
1347 if (rnp == rdp->mynode)
1348 __note_gp_changes(rsp, rnp, rdp);
1349 rcu_preempt_boost_start_gp(rnp);
1350 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
1351 rnp->level, rnp->grplo,
1352 rnp->grphi, rnp->qsmask);
1353 raw_spin_unlock_irq(&rnp->lock);
1354#ifdef CONFIG_PROVE_RCU_DELAY
1355 if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
1356 system_state == SYSTEM_RUNNING)
1357 udelay(200);
1358#endif
1359 cond_resched();
1360 }
1361
1362 mutex_unlock(&rsp->onoff_mutex);
1363 return 1;
1364}
1365
1366
1367
1368
1369int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1370{
1371 int fqs_state = fqs_state_in;
1372 bool isidle = false;
1373 unsigned long maxj;
1374 struct rcu_node *rnp = rcu_get_root(rsp);
1375
1376 rsp->n_force_qs++;
1377 if (fqs_state == RCU_SAVE_DYNTICK) {
1378
1379 if (is_sysidle_rcu_state(rsp)) {
1380 isidle = 1;
1381 maxj = jiffies - ULONG_MAX / 4;
1382 }
1383 force_qs_rnp(rsp, dyntick_save_progress_counter,
1384 &isidle, &maxj);
1385 rcu_sysidle_report_gp(rsp, isidle, maxj);
1386 fqs_state = RCU_FORCE_QS;
1387 } else {
1388
1389 isidle = 0;
1390 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1391 }
1392
1393 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1394 raw_spin_lock_irq(&rnp->lock);
1395 rsp->gp_flags &= ~RCU_GP_FLAG_FQS;
1396 raw_spin_unlock_irq(&rnp->lock);
1397 }
1398 return fqs_state;
1399}
1400
1401
1402
1403
1404static void rcu_gp_cleanup(struct rcu_state *rsp)
1405{
1406 unsigned long gp_duration;
1407 int nocb = 0;
1408 struct rcu_data *rdp;
1409 struct rcu_node *rnp = rcu_get_root(rsp);
1410
1411 raw_spin_lock_irq(&rnp->lock);
1412 gp_duration = jiffies - rsp->gp_start;
1413 if (gp_duration > rsp->gp_max)
1414 rsp->gp_max = gp_duration;
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424 raw_spin_unlock_irq(&rnp->lock);
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435 rcu_for_each_node_breadth_first(rsp, rnp) {
1436 raw_spin_lock_irq(&rnp->lock);
1437 ACCESS_ONCE(rnp->completed) = rsp->gpnum;
1438 rdp = this_cpu_ptr(rsp->rda);
1439 if (rnp == rdp->mynode)
1440 __note_gp_changes(rsp, rnp, rdp);
1441 nocb += rcu_future_gp_cleanup(rsp, rnp);
1442 raw_spin_unlock_irq(&rnp->lock);
1443 cond_resched();
1444 }
1445 rnp = rcu_get_root(rsp);
1446 raw_spin_lock_irq(&rnp->lock);
1447 rcu_nocb_gp_set(rnp, nocb);
1448
1449 rsp->completed = rsp->gpnum;
1450 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
1451 rsp->fqs_state = RCU_GP_IDLE;
1452 rdp = this_cpu_ptr(rsp->rda);
1453 rcu_advance_cbs(rsp, rnp, rdp);
1454 if (cpu_needs_another_gp(rsp, rdp))
1455 rsp->gp_flags = 1;
1456 raw_spin_unlock_irq(&rnp->lock);
1457}
1458
1459
1460
1461
1462static int __noreturn rcu_gp_kthread(void *arg)
1463{
1464 int fqs_state;
1465 unsigned long j;
1466 int ret;
1467 struct rcu_state *rsp = arg;
1468 struct rcu_node *rnp = rcu_get_root(rsp);
1469
1470 for (;;) {
1471
1472
1473 for (;;) {
1474 wait_event_interruptible(rsp->gp_wq,
1475 rsp->gp_flags &
1476 RCU_GP_FLAG_INIT);
1477 if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
1478 rcu_gp_init(rsp))
1479 break;
1480 cond_resched();
1481 flush_signals(current);
1482 }
1483
1484
1485 fqs_state = RCU_SAVE_DYNTICK;
1486 j = jiffies_till_first_fqs;
1487 if (j > HZ) {
1488 j = HZ;
1489 jiffies_till_first_fqs = HZ;
1490 }
1491 for (;;) {
1492 rsp->jiffies_force_qs = jiffies + j;
1493 ret = wait_event_interruptible_timeout(rsp->gp_wq,
1494 (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
1495 (!ACCESS_ONCE(rnp->qsmask) &&
1496 !rcu_preempt_blocked_readers_cgp(rnp)),
1497 j);
1498
1499 if (!ACCESS_ONCE(rnp->qsmask) &&
1500 !rcu_preempt_blocked_readers_cgp(rnp))
1501 break;
1502
1503 if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) {
1504 fqs_state = rcu_gp_fqs(rsp, fqs_state);
1505 cond_resched();
1506 } else {
1507
1508 cond_resched();
1509 flush_signals(current);
1510 }
1511 j = jiffies_till_next_fqs;
1512 if (j > HZ) {
1513 j = HZ;
1514 jiffies_till_next_fqs = HZ;
1515 } else if (j < 1) {
1516 j = 1;
1517 jiffies_till_next_fqs = 1;
1518 }
1519 }
1520
1521
1522 rcu_gp_cleanup(rsp);
1523 }
1524}
1525
1526static void rsp_wakeup(struct irq_work *work)
1527{
1528 struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work);
1529
1530
1531 wake_up(&rsp->gp_wq);
1532}
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543static void
1544rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
1545 struct rcu_data *rdp)
1546{
1547 if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
1548
1549
1550
1551
1552
1553
1554 return;
1555 }
1556 rsp->gp_flags = RCU_GP_FLAG_INIT;
1557
1558
1559
1560
1561
1562
1563
1564 if (current != rsp->gp_kthread)
1565 irq_work_queue(&rsp->wakeup_work);
1566}
1567
1568
1569
1570
1571
1572
1573
1574
1575static void
1576rcu_start_gp(struct rcu_state *rsp)
1577{
1578 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1579 struct rcu_node *rnp = rcu_get_root(rsp);
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589 rcu_advance_cbs(rsp, rnp, rdp);
1590 rcu_start_gp_advanced(rsp, rnp, rdp);
1591}
1592
1593
1594
1595
1596
1597
1598
1599
1600static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1601 __releases(rcu_get_root(rsp)->lock)
1602{
1603 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
1604 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1605 wake_up(&rsp->gp_wq);
1606}
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616static void
1617rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1618 struct rcu_node *rnp, unsigned long flags)
1619 __releases(rnp->lock)
1620{
1621 struct rcu_node *rnp_c;
1622
1623
1624 for (;;) {
1625 if (!(rnp->qsmask & mask)) {
1626
1627
1628 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1629 return;
1630 }
1631 rnp->qsmask &= ~mask;
1632 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1633 mask, rnp->qsmask, rnp->level,
1634 rnp->grplo, rnp->grphi,
1635 !!rnp->gp_tasks);
1636 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
1637
1638
1639 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1640 return;
1641 }
1642 mask = rnp->grpmask;
1643 if (rnp->parent == NULL) {
1644
1645
1646
1647 break;
1648 }
1649 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1650 rnp_c = rnp;
1651 rnp = rnp->parent;
1652 raw_spin_lock_irqsave(&rnp->lock, flags);
1653 WARN_ON_ONCE(rnp_c->qsmask);
1654 }
1655
1656
1657
1658
1659
1660
1661 rcu_report_qs_rsp(rsp, flags);
1662}
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673static void
1674rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
1675{
1676 unsigned long flags;
1677 unsigned long mask;
1678 struct rcu_node *rnp;
1679
1680 rnp = rdp->mynode;
1681 raw_spin_lock_irqsave(&rnp->lock, flags);
1682 if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||
1683 rnp->completed == rnp->gpnum) {
1684
1685
1686
1687
1688
1689
1690
1691 rdp->passed_quiesce = 0;
1692 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1693 return;
1694 }
1695 mask = rdp->grpmask;
1696 if ((rnp->qsmask & mask) == 0) {
1697 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1698 } else {
1699 rdp->qs_pending = 0;
1700
1701
1702
1703
1704
1705 rcu_accelerate_cbs(rsp, rnp, rdp);
1706
1707 rcu_report_qs_rnp(mask, rsp, rnp, flags);
1708 }
1709}
1710
1711
1712
1713
1714
1715
1716
1717static void
1718rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1719{
1720
1721 note_gp_changes(rsp, rdp);
1722
1723
1724
1725
1726
1727 if (!rdp->qs_pending)
1728 return;
1729
1730
1731
1732
1733
1734 if (!rdp->passed_quiesce)
1735 return;
1736
1737
1738
1739
1740
1741 rcu_report_qs_rdp(rdp->cpu, rsp, rdp);
1742}
1743
1744#ifdef CONFIG_HOTPLUG_CPU
1745
1746
1747
1748
1749
1750
1751static void
1752rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1753 struct rcu_node *rnp, struct rcu_data *rdp)
1754{
1755
1756 if (rcu_is_nocb_cpu(rdp->cpu))
1757 return;
1758
1759
1760
1761
1762
1763
1764 if (rdp->nxtlist != NULL) {
1765 rsp->qlen_lazy += rdp->qlen_lazy;
1766 rsp->qlen += rdp->qlen;
1767 rdp->n_cbs_orphaned += rdp->qlen;
1768 rdp->qlen_lazy = 0;
1769 ACCESS_ONCE(rdp->qlen) = 0;
1770 }
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781 if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) {
1782 *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
1783 rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
1784 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1785 }
1786
1787
1788
1789
1790
1791
1792 if (rdp->nxtlist != NULL) {
1793 *rsp->orphan_donetail = rdp->nxtlist;
1794 rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
1795 }
1796
1797
1798 init_callback_list(rdp);
1799}
1800
1801
1802
1803
1804
1805static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1806{
1807 int i;
1808 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1809
1810
1811 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
1812 return;
1813
1814
1815 rdp->qlen_lazy += rsp->qlen_lazy;
1816 rdp->qlen += rsp->qlen;
1817 rdp->n_cbs_adopted += rsp->qlen;
1818 if (rsp->qlen_lazy != rsp->qlen)
1819 rcu_idle_count_callbacks_posted();
1820 rsp->qlen_lazy = 0;
1821 rsp->qlen = 0;
1822
1823
1824
1825
1826
1827
1828
1829
1830 if (rsp->orphan_donelist != NULL) {
1831 *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL];
1832 *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist;
1833 for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--)
1834 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1835 rdp->nxttail[i] = rsp->orphan_donetail;
1836 rsp->orphan_donelist = NULL;
1837 rsp->orphan_donetail = &rsp->orphan_donelist;
1838 }
1839
1840
1841 if (rsp->orphan_nxtlist != NULL) {
1842 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
1843 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
1844 rsp->orphan_nxtlist = NULL;
1845 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1846 }
1847}
1848
1849
1850
1851
1852static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1853{
1854 RCU_TRACE(unsigned long mask);
1855 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda));
1856 RCU_TRACE(struct rcu_node *rnp = rdp->mynode);
1857
1858 RCU_TRACE(mask = rdp->grpmask);
1859 trace_rcu_grace_period(rsp->name,
1860 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1861 TPS("cpuofl"));
1862}
1863
1864
1865
1866
1867
1868
1869
1870
1871static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1872{
1873 unsigned long flags;
1874 unsigned long mask;
1875 int need_report = 0;
1876 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1877 struct rcu_node *rnp = rdp->mynode;
1878
1879
1880 rcu_boost_kthread_setaffinity(rnp, -1);
1881
1882
1883
1884
1885 mutex_lock(&rsp->onoff_mutex);
1886 raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
1887
1888
1889 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
1890 rcu_adopt_orphan_cbs(rsp);
1891
1892
1893 mask = rdp->grpmask;
1894 do {
1895 raw_spin_lock(&rnp->lock);
1896 rnp->qsmaskinit &= ~mask;
1897 if (rnp->qsmaskinit != 0) {
1898 if (rnp != rdp->mynode)
1899 raw_spin_unlock(&rnp->lock);
1900 break;
1901 }
1902 if (rnp == rdp->mynode)
1903 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1904 else
1905 raw_spin_unlock(&rnp->lock);
1906 mask = rnp->grpmask;
1907 rnp = rnp->parent;
1908 } while (rnp != NULL);
1909
1910
1911
1912
1913
1914
1915
1916 raw_spin_unlock(&rsp->orphan_lock);
1917 rnp = rdp->mynode;
1918 if (need_report & RCU_OFL_TASKS_NORM_GP)
1919 rcu_report_unblock_qs_rnp(rnp, flags);
1920 else
1921 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1922 if (need_report & RCU_OFL_TASKS_EXP_GP)
1923 rcu_report_exp_rnp(rsp, rnp, true);
1924 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
1925 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
1926 cpu, rdp->qlen, rdp->nxtlist);
1927 init_callback_list(rdp);
1928
1929 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
1930 mutex_unlock(&rsp->onoff_mutex);
1931}
1932
1933#else
1934
1935static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1936{
1937}
1938
1939static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1940{
1941}
1942
1943#endif
1944
1945
1946
1947
1948
1949static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1950{
1951 unsigned long flags;
1952 struct rcu_head *next, *list, **tail;
1953 long bl, count, count_lazy;
1954 int i;
1955
1956
1957 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1958 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1959 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
1960 need_resched(), is_idle_task(current),
1961 rcu_is_callbacks_kthread());
1962 return;
1963 }
1964
1965
1966
1967
1968
1969 local_irq_save(flags);
1970 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1971 bl = rdp->blimit;
1972 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
1973 list = rdp->nxtlist;
1974 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1975 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
1976 tail = rdp->nxttail[RCU_DONE_TAIL];
1977 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
1978 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
1979 rdp->nxttail[i] = &rdp->nxtlist;
1980 local_irq_restore(flags);
1981
1982
1983 count = count_lazy = 0;
1984 while (list) {
1985 next = list->next;
1986 prefetch(next);
1987 debug_rcu_head_unqueue(list);
1988 if (__rcu_reclaim(rsp->name, list))
1989 count_lazy++;
1990 list = next;
1991
1992 if (++count >= bl &&
1993 (need_resched() ||
1994 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
1995 break;
1996 }
1997
1998 local_irq_save(flags);
1999 trace_rcu_batch_end(rsp->name, count, !!list, need_resched(),
2000 is_idle_task(current),
2001 rcu_is_callbacks_kthread());
2002
2003
2004 if (list != NULL) {
2005 *tail = rdp->nxtlist;
2006 rdp->nxtlist = list;
2007 for (i = 0; i < RCU_NEXT_SIZE; i++)
2008 if (&rdp->nxtlist == rdp->nxttail[i])
2009 rdp->nxttail[i] = tail;
2010 else
2011 break;
2012 }
2013 smp_mb();
2014 rdp->qlen_lazy -= count_lazy;
2015 ACCESS_ONCE(rdp->qlen) -= count;
2016 rdp->n_cbs_invoked += count;
2017
2018
2019 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
2020 rdp->blimit = blimit;
2021
2022
2023 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
2024 rdp->qlen_last_fqs_check = 0;
2025 rdp->n_force_qs_snap = rsp->n_force_qs;
2026 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
2027 rdp->qlen_last_fqs_check = rdp->qlen;
2028 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0));
2029
2030 local_irq_restore(flags);
2031
2032
2033 if (cpu_has_callbacks_ready_to_invoke(rdp))
2034 invoke_rcu_core();
2035}
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046void rcu_check_callbacks(int cpu, int user)
2047{
2048 trace_rcu_utilization(TPS("Start scheduler-tick"));
2049 increment_cpu_stall_ticks();
2050 if (user || rcu_is_cpu_rrupt_from_idle()) {
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064 rcu_sched_qs(cpu);
2065 rcu_bh_qs(cpu);
2066
2067 } else if (!in_softirq()) {
2068
2069
2070
2071
2072
2073
2074
2075
2076 rcu_bh_qs(cpu);
2077 }
2078 rcu_preempt_check_callbacks(cpu);
2079 if (rcu_pending(cpu))
2080 invoke_rcu_core();
2081 trace_rcu_utilization(TPS("End scheduler-tick"));
2082}
2083
2084
2085
2086
2087
2088
2089
2090
2091static void force_qs_rnp(struct rcu_state *rsp,
2092 int (*f)(struct rcu_data *rsp, bool *isidle,
2093 unsigned long *maxj),
2094 bool *isidle, unsigned long *maxj)
2095{
2096 unsigned long bit;
2097 int cpu;
2098 unsigned long flags;
2099 unsigned long mask;
2100 struct rcu_node *rnp;
2101
2102 rcu_for_each_leaf_node(rsp, rnp) {
2103 cond_resched();
2104 mask = 0;
2105 raw_spin_lock_irqsave(&rnp->lock, flags);
2106 if (!rcu_gp_in_progress(rsp)) {
2107 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2108 return;
2109 }
2110 if (rnp->qsmask == 0) {
2111 rcu_initiate_boost(rnp, flags);
2112 continue;
2113 }
2114 cpu = rnp->grplo;
2115 bit = 1;
2116 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2117 if ((rnp->qsmask & bit) != 0) {
2118 if ((rnp->qsmaskinit & bit) != 0)
2119 *isidle = 0;
2120 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2121 mask |= bit;
2122 }
2123 }
2124 if (mask != 0) {
2125
2126
2127 rcu_report_qs_rnp(mask, rsp, rnp, flags);
2128 continue;
2129 }
2130 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2131 }
2132 rnp = rcu_get_root(rsp);
2133 if (rnp->qsmask == 0) {
2134 raw_spin_lock_irqsave(&rnp->lock, flags);
2135 rcu_initiate_boost(rnp, flags);
2136 }
2137}
2138
2139
2140
2141
2142
2143static void force_quiescent_state(struct rcu_state *rsp)
2144{
2145 unsigned long flags;
2146 bool ret;
2147 struct rcu_node *rnp;
2148 struct rcu_node *rnp_old = NULL;
2149
2150
2151 rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
2152 for (; rnp != NULL; rnp = rnp->parent) {
2153 ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
2154 !raw_spin_trylock(&rnp->fqslock);
2155 if (rnp_old != NULL)
2156 raw_spin_unlock(&rnp_old->fqslock);
2157 if (ret) {
2158 rsp->n_force_qs_lh++;
2159 return;
2160 }
2161 rnp_old = rnp;
2162 }
2163
2164
2165
2166 raw_spin_lock_irqsave(&rnp_old->lock, flags);
2167 raw_spin_unlock(&rnp_old->fqslock);
2168 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
2169 rsp->n_force_qs_lh++;
2170 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2171 return;
2172 }
2173 rsp->gp_flags |= RCU_GP_FLAG_FQS;
2174 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2175 wake_up(&rsp->gp_wq);
2176}
2177
2178
2179
2180
2181
2182
2183static void
2184__rcu_process_callbacks(struct rcu_state *rsp)
2185{
2186 unsigned long flags;
2187 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2188
2189 WARN_ON_ONCE(rdp->beenonline == 0);
2190
2191
2192 rcu_check_quiescent_state(rsp, rdp);
2193
2194
2195 local_irq_save(flags);
2196 if (cpu_needs_another_gp(rsp, rdp)) {
2197 raw_spin_lock(&rcu_get_root(rsp)->lock);
2198 rcu_start_gp(rsp);
2199 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
2200 } else {
2201 local_irq_restore(flags);
2202 }
2203
2204
2205 if (cpu_has_callbacks_ready_to_invoke(rdp))
2206 invoke_rcu_callbacks(rsp, rdp);
2207}
2208
2209
2210
2211
2212static void rcu_process_callbacks(struct softirq_action *unused)
2213{
2214 struct rcu_state *rsp;
2215
2216 if (cpu_is_offline(smp_processor_id()))
2217 return;
2218 trace_rcu_utilization(TPS("Start RCU core"));
2219 for_each_rcu_flavor(rsp)
2220 __rcu_process_callbacks(rsp);
2221 trace_rcu_utilization(TPS("End RCU core"));
2222}
2223
2224
2225
2226
2227
2228
2229
2230
2231static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
2232{
2233 if (unlikely(!ACCESS_ONCE(rcu_scheduler_fully_active)))
2234 return;
2235 if (likely(!rsp->boost)) {
2236 rcu_do_batch(rsp, rdp);
2237 return;
2238 }
2239 invoke_rcu_callbacks_kthread();
2240}
2241
2242static void invoke_rcu_core(void)
2243{
2244 if (cpu_online(smp_processor_id()))
2245 raise_softirq(RCU_SOFTIRQ);
2246}
2247
2248
2249
2250
2251static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2252 struct rcu_head *head, unsigned long flags)
2253{
2254
2255
2256
2257
2258 if (rcu_is_cpu_idle() && cpu_online(smp_processor_id()))
2259 invoke_rcu_core();
2260
2261
2262 if (irqs_disabled_flags(flags) || cpu_is_offline(smp_processor_id()))
2263 return;
2264
2265
2266
2267
2268
2269
2270
2271
2272 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
2273
2274
2275 note_gp_changes(rsp, rdp);
2276
2277
2278 if (!rcu_gp_in_progress(rsp)) {
2279 struct rcu_node *rnp_root = rcu_get_root(rsp);
2280
2281 raw_spin_lock(&rnp_root->lock);
2282 rcu_start_gp(rsp);
2283 raw_spin_unlock(&rnp_root->lock);
2284 } else {
2285
2286 rdp->blimit = LONG_MAX;
2287 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
2288 *rdp->nxttail[RCU_DONE_TAIL] != head)
2289 force_quiescent_state(rsp);
2290 rdp->n_force_qs_snap = rsp->n_force_qs;
2291 rdp->qlen_last_fqs_check = rdp->qlen;
2292 }
2293 }
2294}
2295
2296
2297
2298
2299static void rcu_leak_callback(struct rcu_head *rhp)
2300{
2301}
2302
2303
2304
2305
2306
2307
2308
2309static void
2310__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2311 struct rcu_state *rsp, int cpu, bool lazy)
2312{
2313 unsigned long flags;
2314 struct rcu_data *rdp;
2315
2316 WARN_ON_ONCE((unsigned long)head & 0x3);
2317 if (debug_rcu_head_queue(head)) {
2318
2319 ACCESS_ONCE(head->func) = rcu_leak_callback;
2320 WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
2321 return;
2322 }
2323 head->func = func;
2324 head->next = NULL;
2325
2326
2327
2328
2329
2330
2331
2332 local_irq_save(flags);
2333 rdp = this_cpu_ptr(rsp->rda);
2334
2335
2336 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
2337 int offline;
2338
2339 if (cpu != -1)
2340 rdp = per_cpu_ptr(rsp->rda, cpu);
2341 offline = !__call_rcu_nocb(rdp, head, lazy);
2342 WARN_ON_ONCE(offline);
2343
2344 local_irq_restore(flags);
2345 return;
2346 }
2347 ACCESS_ONCE(rdp->qlen)++;
2348 if (lazy)
2349 rdp->qlen_lazy++;
2350 else
2351 rcu_idle_count_callbacks_posted();
2352 smp_mb();
2353 *rdp->nxttail[RCU_NEXT_TAIL] = head;
2354 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
2355
2356 if (__is_kfree_rcu_offset((unsigned long)func))
2357 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
2358 rdp->qlen_lazy, rdp->qlen);
2359 else
2360 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
2361
2362
2363 __call_rcu_core(rsp, rdp, head, flags);
2364 local_irq_restore(flags);
2365}
2366
2367
2368
2369
2370void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2371{
2372 __call_rcu(head, func, &rcu_sched_state, -1, 0);
2373}
2374EXPORT_SYMBOL_GPL(call_rcu_sched);
2375
2376
2377
2378
2379void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2380{
2381 __call_rcu(head, func, &rcu_bh_state, -1, 0);
2382}
2383EXPORT_SYMBOL_GPL(call_rcu_bh);
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394static inline int rcu_blocking_is_gp(void)
2395{
2396 int ret;
2397
2398 might_sleep();
2399 preempt_disable();
2400 ret = num_online_cpus() <= 1;
2401 preempt_enable();
2402 return ret;
2403}
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446void synchronize_sched(void)
2447{
2448 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2449 !lock_is_held(&rcu_lock_map) &&
2450 !lock_is_held(&rcu_sched_lock_map),
2451 "Illegal synchronize_sched() in RCU-sched read-side critical section");
2452 if (rcu_blocking_is_gp())
2453 return;
2454 if (rcu_expedited)
2455 synchronize_sched_expedited();
2456 else
2457 wait_rcu_gp(call_rcu_sched);
2458}
2459EXPORT_SYMBOL_GPL(synchronize_sched);
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473void synchronize_rcu_bh(void)
2474{
2475 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
2476 !lock_is_held(&rcu_lock_map) &&
2477 !lock_is_held(&rcu_sched_lock_map),
2478 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
2479 if (rcu_blocking_is_gp())
2480 return;
2481 if (rcu_expedited)
2482 synchronize_rcu_bh_expedited();
2483 else
2484 wait_rcu_gp(call_rcu_bh);
2485}
2486EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
2487
2488static int synchronize_sched_expedited_cpu_stop(void *data)
2489{
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501 smp_mb();
2502 return 0;
2503}
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542void synchronize_sched_expedited(void)
2543{
2544 long firstsnap, s, snap;
2545 int trycount = 0;
2546 struct rcu_state *rsp = &rcu_sched_state;
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556 if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
2557 (ulong)atomic_long_read(&rsp->expedited_done) +
2558 ULONG_MAX / 8)) {
2559 synchronize_sched();
2560 atomic_long_inc(&rsp->expedited_wrap);
2561 return;
2562 }
2563
2564
2565
2566
2567
2568 snap = atomic_long_inc_return(&rsp->expedited_start);
2569 firstsnap = snap;
2570 get_online_cpus();
2571 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2572
2573
2574
2575
2576
2577 while (try_stop_cpus(cpu_online_mask,
2578 synchronize_sched_expedited_cpu_stop,
2579 NULL) == -EAGAIN) {
2580 put_online_cpus();
2581 atomic_long_inc(&rsp->expedited_tryfail);
2582
2583
2584 s = atomic_long_read(&rsp->expedited_done);
2585 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2586
2587 smp_mb__before_atomic_inc();
2588 atomic_long_inc(&rsp->expedited_workdone1);
2589 return;
2590 }
2591
2592
2593 if (trycount++ < 10) {
2594 udelay(trycount * num_online_cpus());
2595 } else {
2596 wait_rcu_gp(call_rcu_sched);
2597 atomic_long_inc(&rsp->expedited_normal);
2598 return;
2599 }
2600
2601
2602 s = atomic_long_read(&rsp->expedited_done);
2603 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2604
2605 smp_mb__before_atomic_inc();
2606 atomic_long_inc(&rsp->expedited_workdone2);
2607 return;
2608 }
2609
2610
2611
2612
2613
2614
2615
2616
2617 get_online_cpus();
2618 snap = atomic_long_read(&rsp->expedited_start);
2619 smp_mb();
2620 }
2621 atomic_long_inc(&rsp->expedited_stoppedcpus);
2622
2623
2624
2625
2626
2627
2628
2629 do {
2630 atomic_long_inc(&rsp->expedited_done_tries);
2631 s = atomic_long_read(&rsp->expedited_done);
2632 if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
2633
2634 smp_mb__before_atomic_inc();
2635 atomic_long_inc(&rsp->expedited_done_lost);
2636 break;
2637 }
2638 } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
2639 atomic_long_inc(&rsp->expedited_done_exit);
2640
2641 put_online_cpus();
2642}
2643EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
2644
2645
2646
2647
2648
2649
2650
2651
2652static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2653{
2654 struct rcu_node *rnp = rdp->mynode;
2655
2656 rdp->n_rcu_pending++;
2657
2658
2659 check_cpu_stall(rsp, rdp);
2660
2661
2662 if (rcu_scheduler_fully_active &&
2663 rdp->qs_pending && !rdp->passed_quiesce) {
2664 rdp->n_rp_qs_pending++;
2665 } else if (rdp->qs_pending && rdp->passed_quiesce) {
2666 rdp->n_rp_report_qs++;
2667 return 1;
2668 }
2669
2670
2671 if (cpu_has_callbacks_ready_to_invoke(rdp)) {
2672 rdp->n_rp_cb_ready++;
2673 return 1;
2674 }
2675
2676
2677 if (cpu_needs_another_gp(rsp, rdp)) {
2678 rdp->n_rp_cpu_needs_gp++;
2679 return 1;
2680 }
2681
2682
2683 if (ACCESS_ONCE(rnp->completed) != rdp->completed) {
2684 rdp->n_rp_gp_completed++;
2685 return 1;
2686 }
2687
2688
2689 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) {
2690 rdp->n_rp_gp_started++;
2691 return 1;
2692 }
2693
2694
2695 rdp->n_rp_need_nothing++;
2696 return 0;
2697}
2698
2699
2700
2701
2702
2703
2704static int rcu_pending(int cpu)
2705{
2706 struct rcu_state *rsp;
2707
2708 for_each_rcu_flavor(rsp)
2709 if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu)))
2710 return 1;
2711 return 0;
2712}
2713
2714
2715
2716
2717
2718
2719static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
2720{
2721 bool al = true;
2722 bool hc = false;
2723 struct rcu_data *rdp;
2724 struct rcu_state *rsp;
2725
2726 for_each_rcu_flavor(rsp) {
2727 rdp = per_cpu_ptr(rsp->rda, cpu);
2728 if (rdp->qlen != rdp->qlen_lazy)
2729 al = false;
2730 if (rdp->nxtlist)
2731 hc = true;
2732 }
2733 if (all_lazy)
2734 *all_lazy = al;
2735 return hc;
2736}
2737
2738
2739
2740
2741
2742static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s,
2743 int cpu, unsigned long done)
2744{
2745 trace_rcu_barrier(rsp->name, s, cpu,
2746 atomic_read(&rsp->barrier_cpu_count), done);
2747}
2748
2749
2750
2751
2752
2753static void rcu_barrier_callback(struct rcu_head *rhp)
2754{
2755 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head);
2756 struct rcu_state *rsp = rdp->rsp;
2757
2758 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) {
2759 _rcu_barrier_trace(rsp, "LastCB", -1, rsp->n_barrier_done);
2760 complete(&rsp->barrier_completion);
2761 } else {
2762 _rcu_barrier_trace(rsp, "CB", -1, rsp->n_barrier_done);
2763 }
2764}
2765
2766
2767
2768
2769static void rcu_barrier_func(void *type)
2770{
2771 struct rcu_state *rsp = type;
2772 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
2773
2774 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->n_barrier_done);
2775 atomic_inc(&rsp->barrier_cpu_count);
2776 rsp->call(&rdp->barrier_head, rcu_barrier_callback);
2777}
2778
2779
2780
2781
2782
2783static void _rcu_barrier(struct rcu_state *rsp)
2784{
2785 int cpu;
2786 struct rcu_data *rdp;
2787 unsigned long snap = ACCESS_ONCE(rsp->n_barrier_done);
2788 unsigned long snap_done;
2789
2790 _rcu_barrier_trace(rsp, "Begin", -1, snap);
2791
2792
2793 mutex_lock(&rsp->barrier_mutex);
2794
2795
2796
2797
2798
2799 smp_mb();
2800
2801
2802
2803
2804
2805
2806
2807 snap_done = rsp->n_barrier_done;
2808 _rcu_barrier_trace(rsp, "Check", -1, snap_done);
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820 if (ULONG_CMP_GE(snap_done, (snap + 3) & ~0x1)) {
2821 _rcu_barrier_trace(rsp, "EarlyExit", -1, snap_done);
2822 smp_mb();
2823 mutex_unlock(&rsp->barrier_mutex);
2824 return;
2825 }
2826
2827
2828
2829
2830
2831
2832 ACCESS_ONCE(rsp->n_barrier_done)++;
2833 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
2834 _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
2835 smp_mb();
2836
2837
2838
2839
2840
2841
2842
2843 init_completion(&rsp->barrier_completion);
2844 atomic_set(&rsp->barrier_cpu_count, 1);
2845 get_online_cpus();
2846
2847
2848
2849
2850
2851
2852 for_each_possible_cpu(cpu) {
2853 if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
2854 continue;
2855 rdp = per_cpu_ptr(rsp->rda, cpu);
2856 if (rcu_is_nocb_cpu(cpu)) {
2857 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
2858 rsp->n_barrier_done);
2859 atomic_inc(&rsp->barrier_cpu_count);
2860 __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
2861 rsp, cpu, 0);
2862 } else if (ACCESS_ONCE(rdp->qlen)) {
2863 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2864 rsp->n_barrier_done);
2865 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
2866 } else {
2867 _rcu_barrier_trace(rsp, "OnlineNQ", cpu,
2868 rsp->n_barrier_done);
2869 }
2870 }
2871 put_online_cpus();
2872
2873
2874
2875
2876
2877 if (atomic_dec_and_test(&rsp->barrier_cpu_count))
2878 complete(&rsp->barrier_completion);
2879
2880
2881 smp_mb();
2882 ACCESS_ONCE(rsp->n_barrier_done)++;
2883 WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
2884 _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
2885 smp_mb();
2886
2887
2888 wait_for_completion(&rsp->barrier_completion);
2889
2890
2891 mutex_unlock(&rsp->barrier_mutex);
2892}
2893
2894
2895
2896
2897void rcu_barrier_bh(void)
2898{
2899 _rcu_barrier(&rcu_bh_state);
2900}
2901EXPORT_SYMBOL_GPL(rcu_barrier_bh);
2902
2903
2904
2905
2906void rcu_barrier_sched(void)
2907{
2908 _rcu_barrier(&rcu_sched_state);
2909}
2910EXPORT_SYMBOL_GPL(rcu_barrier_sched);
2911
2912
2913
2914
2915static void __init
2916rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2917{
2918 unsigned long flags;
2919 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2920 struct rcu_node *rnp = rcu_get_root(rsp);
2921
2922
2923 raw_spin_lock_irqsave(&rnp->lock, flags);
2924 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
2925 init_callback_list(rdp);
2926 rdp->qlen_lazy = 0;
2927 ACCESS_ONCE(rdp->qlen) = 0;
2928 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2929 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2930 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2931 rdp->cpu = cpu;
2932 rdp->rsp = rsp;
2933 rcu_boot_init_nocb_percpu_data(rdp);
2934 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2935}
2936
2937
2938
2939
2940
2941
2942
2943static void
2944rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2945{
2946 unsigned long flags;
2947 unsigned long mask;
2948 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2949 struct rcu_node *rnp = rcu_get_root(rsp);
2950
2951
2952 mutex_lock(&rsp->onoff_mutex);
2953
2954
2955 raw_spin_lock_irqsave(&rnp->lock, flags);
2956 rdp->beenonline = 1;
2957 rdp->preemptible = preemptible;
2958 rdp->qlen_last_fqs_check = 0;
2959 rdp->n_force_qs_snap = rsp->n_force_qs;
2960 rdp->blimit = blimit;
2961 init_callback_list(rdp);
2962 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2963 rcu_sysidle_init_percpu_data(rdp->dynticks);
2964 atomic_set(&rdp->dynticks->dynticks,
2965 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2966 raw_spin_unlock(&rnp->lock);
2967
2968
2969 rnp = rdp->mynode;
2970 mask = rdp->grpmask;
2971 do {
2972
2973 raw_spin_lock(&rnp->lock);
2974 rnp->qsmaskinit |= mask;
2975 mask = rnp->grpmask;
2976 if (rnp == rdp->mynode) {
2977
2978
2979
2980
2981
2982 rdp->gpnum = rnp->completed;
2983 rdp->completed = rnp->completed;
2984 rdp->passed_quiesce = 0;
2985 rdp->qs_pending = 0;
2986 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
2987 }
2988 raw_spin_unlock(&rnp->lock);
2989 rnp = rnp->parent;
2990 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
2991 local_irq_restore(flags);
2992
2993 mutex_unlock(&rsp->onoff_mutex);
2994}
2995
2996static void rcu_prepare_cpu(int cpu)
2997{
2998 struct rcu_state *rsp;
2999
3000 for_each_rcu_flavor(rsp)
3001 rcu_init_percpu_data(cpu, rsp,
3002 strcmp(rsp->name, "rcu_preempt") == 0);
3003}
3004
3005
3006
3007
3008static int rcu_cpu_notify(struct notifier_block *self,
3009 unsigned long action, void *hcpu)
3010{
3011 long cpu = (long)hcpu;
3012 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
3013 struct rcu_node *rnp = rdp->mynode;
3014 struct rcu_state *rsp;
3015
3016 trace_rcu_utilization(TPS("Start CPU hotplug"));
3017 switch (action) {
3018 case CPU_UP_PREPARE:
3019 case CPU_UP_PREPARE_FROZEN:
3020 rcu_prepare_cpu(cpu);
3021 rcu_prepare_kthreads(cpu);
3022 break;
3023 case CPU_ONLINE:
3024 case CPU_DOWN_FAILED:
3025 rcu_boost_kthread_setaffinity(rnp, -1);
3026 break;
3027 case CPU_DOWN_PREPARE:
3028 rcu_boost_kthread_setaffinity(rnp, cpu);
3029 break;
3030 case CPU_DYING:
3031 case CPU_DYING_FROZEN:
3032 for_each_rcu_flavor(rsp)
3033 rcu_cleanup_dying_cpu(rsp);
3034 break;
3035 case CPU_DEAD:
3036 case CPU_DEAD_FROZEN:
3037 case CPU_UP_CANCELED:
3038 case CPU_UP_CANCELED_FROZEN:
3039 for_each_rcu_flavor(rsp)
3040 rcu_cleanup_dead_cpu(cpu, rsp);
3041 break;
3042 default:
3043 break;
3044 }
3045 trace_rcu_utilization(TPS("End CPU hotplug"));
3046 return NOTIFY_OK;
3047}
3048
3049static int rcu_pm_notify(struct notifier_block *self,
3050 unsigned long action, void *hcpu)
3051{
3052 switch (action) {
3053 case PM_HIBERNATION_PREPARE:
3054 case PM_SUSPEND_PREPARE:
3055 if (nr_cpu_ids <= 256)
3056 rcu_expedited = 1;
3057 break;
3058 case PM_POST_HIBERNATION:
3059 case PM_POST_SUSPEND:
3060 rcu_expedited = 0;
3061 break;
3062 default:
3063 break;
3064 }
3065 return NOTIFY_OK;
3066}
3067
3068
3069
3070
3071static int __init rcu_spawn_gp_kthread(void)
3072{
3073 unsigned long flags;
3074 struct rcu_node *rnp;
3075 struct rcu_state *rsp;
3076 struct task_struct *t;
3077
3078 for_each_rcu_flavor(rsp) {
3079 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
3080 BUG_ON(IS_ERR(t));
3081 rnp = rcu_get_root(rsp);
3082 raw_spin_lock_irqsave(&rnp->lock, flags);
3083 rsp->gp_kthread = t;
3084 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3085 rcu_spawn_nocb_kthreads(rsp);
3086 }
3087 return 0;
3088}
3089early_initcall(rcu_spawn_gp_kthread);
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099void rcu_scheduler_starting(void)
3100{
3101 WARN_ON(num_online_cpus() != 1);
3102 WARN_ON(nr_context_switches() > 0);
3103 rcu_scheduler_active = 1;
3104}
3105
3106
3107
3108
3109
3110#ifdef CONFIG_RCU_FANOUT_EXACT
3111static void __init rcu_init_levelspread(struct rcu_state *rsp)
3112{
3113 int i;
3114
3115 for (i = rcu_num_lvls - 1; i > 0; i--)
3116 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
3117 rsp->levelspread[0] = rcu_fanout_leaf;
3118}
3119#else
3120static void __init rcu_init_levelspread(struct rcu_state *rsp)
3121{
3122 int ccur;
3123 int cprv;
3124 int i;
3125
3126 cprv = nr_cpu_ids;
3127 for (i = rcu_num_lvls - 1; i >= 0; i--) {
3128 ccur = rsp->levelcnt[i];
3129 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
3130 cprv = ccur;
3131 }
3132}
3133#endif
3134
3135
3136
3137
3138static void __init rcu_init_one(struct rcu_state *rsp,
3139 struct rcu_data __percpu *rda)
3140{
3141 static char *buf[] = { "rcu_node_0",
3142 "rcu_node_1",
3143 "rcu_node_2",
3144 "rcu_node_3" };
3145 static char *fqs[] = { "rcu_node_fqs_0",
3146 "rcu_node_fqs_1",
3147 "rcu_node_fqs_2",
3148 "rcu_node_fqs_3" };
3149 int cpustride = 1;
3150 int i;
3151 int j;
3152 struct rcu_node *rnp;
3153
3154 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf));
3155
3156
3157 if (rcu_num_lvls > RCU_NUM_LVLS)
3158 panic("rcu_init_one: rcu_num_lvls overflow");
3159
3160
3161
3162 for (i = 0; i < rcu_num_lvls; i++)
3163 rsp->levelcnt[i] = num_rcu_lvl[i];
3164 for (i = 1; i < rcu_num_lvls; i++)
3165 rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
3166 rcu_init_levelspread(rsp);
3167
3168
3169
3170 for (i = rcu_num_lvls - 1; i >= 0; i--) {
3171 cpustride *= rsp->levelspread[i];
3172 rnp = rsp->level[i];
3173 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
3174 raw_spin_lock_init(&rnp->lock);
3175 lockdep_set_class_and_name(&rnp->lock,
3176 &rcu_node_class[i], buf[i]);
3177 raw_spin_lock_init(&rnp->fqslock);
3178 lockdep_set_class_and_name(&rnp->fqslock,
3179 &rcu_fqs_class[i], fqs[i]);
3180 rnp->gpnum = rsp->gpnum;
3181 rnp->completed = rsp->completed;
3182 rnp->qsmask = 0;
3183 rnp->qsmaskinit = 0;
3184 rnp->grplo = j * cpustride;
3185 rnp->grphi = (j + 1) * cpustride - 1;
3186 if (rnp->grphi >= NR_CPUS)
3187 rnp->grphi = NR_CPUS - 1;
3188 if (i == 0) {
3189 rnp->grpnum = 0;
3190 rnp->grpmask = 0;
3191 rnp->parent = NULL;
3192 } else {
3193 rnp->grpnum = j % rsp->levelspread[i - 1];
3194 rnp->grpmask = 1UL << rnp->grpnum;
3195 rnp->parent = rsp->level[i - 1] +
3196 j / rsp->levelspread[i - 1];
3197 }
3198 rnp->level = i;
3199 INIT_LIST_HEAD(&rnp->blkd_tasks);
3200 rcu_init_one_nocb(rnp);
3201 }
3202 }
3203
3204 rsp->rda = rda;
3205 init_waitqueue_head(&rsp->gp_wq);
3206 init_irq_work(&rsp->wakeup_work, rsp_wakeup);
3207 rnp = rsp->level[rcu_num_lvls - 1];
3208 for_each_possible_cpu(i) {
3209 while (i > rnp->grphi)
3210 rnp++;
3211 per_cpu_ptr(rsp->rda, i)->mynode = rnp;
3212 rcu_boot_init_percpu_data(i, rsp);
3213 }
3214 list_add(&rsp->flavors, &rcu_struct_flavors);
3215}
3216
3217
3218
3219
3220
3221
3222static void __init rcu_init_geometry(void)
3223{
3224 ulong d;
3225 int i;
3226 int j;
3227 int n = nr_cpu_ids;
3228 int rcu_capacity[MAX_RCU_LVLS + 1];
3229
3230
3231
3232
3233
3234
3235
3236
3237 d = RCU_JIFFIES_TILL_FORCE_QS + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
3238 if (jiffies_till_first_fqs == ULONG_MAX)
3239 jiffies_till_first_fqs = d;
3240 if (jiffies_till_next_fqs == ULONG_MAX)
3241 jiffies_till_next_fqs = d;
3242
3243
3244 if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&
3245 nr_cpu_ids == NR_CPUS)
3246 return;
3247
3248
3249
3250
3251
3252
3253 rcu_capacity[0] = 1;
3254 rcu_capacity[1] = rcu_fanout_leaf;
3255 for (i = 2; i <= MAX_RCU_LVLS; i++)
3256 rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT;
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266 if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF ||
3267 rcu_fanout_leaf > sizeof(unsigned long) * 8 ||
3268 n > rcu_capacity[MAX_RCU_LVLS]) {
3269 WARN_ON(1);
3270 return;
3271 }
3272
3273
3274 for (i = 1; i <= MAX_RCU_LVLS; i++)
3275 if (n <= rcu_capacity[i]) {
3276 for (j = 0; j <= i; j++)
3277 num_rcu_lvl[j] =
3278 DIV_ROUND_UP(n, rcu_capacity[i - j]);
3279 rcu_num_lvls = i;
3280 for (j = i + 1; j <= MAX_RCU_LVLS; j++)
3281 num_rcu_lvl[j] = 0;
3282 break;
3283 }
3284
3285
3286 rcu_num_nodes = 0;
3287 for (i = 0; i <= MAX_RCU_LVLS; i++)
3288 rcu_num_nodes += num_rcu_lvl[i];
3289 rcu_num_nodes -= n;
3290}
3291
3292void __init rcu_init(void)
3293{
3294 int cpu;
3295
3296 rcu_bootup_announce();
3297 rcu_init_geometry();
3298 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
3299 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
3300 __rcu_init_preempt();
3301 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3302
3303
3304
3305
3306
3307
3308 cpu_notifier(rcu_cpu_notify, 0);
3309 pm_notifier(rcu_pm_notify, 0);
3310 for_each_online_cpu(cpu)
3311 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
3312}
3313
3314#include "rcutree_plugin.h"
3315