1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/kernel_stat.h>
19#include <linux/percpu.h>
20#include <linux/nmi.h>
21#include <linux/profile.h>
22#include <linux/sched/signal.h>
23#include <linux/sched/clock.h>
24#include <linux/sched/stat.h>
25#include <linux/sched/nohz.h>
26#include <linux/sched/loadavg.h>
27#include <linux/module.h>
28#include <linux/irq_work.h>
29#include <linux/posix-timers.h>
30#include <linux/context_tracking.h>
31#include <linux/mm.h>
32
33#include <asm/irq_regs.h>
34
35#include "tick-internal.h"
36
37#include <trace/events/timer.h>
38
39
40
41
42static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
43
44struct tick_sched *tick_get_tick_sched(int cpu)
45{
46 return &per_cpu(tick_cpu_sched, cpu);
47}
48
49#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
50
51
52
53
54
55static ktime_t last_jiffies_update;
56
57
58
59
60static void tick_do_update_jiffies64(ktime_t now)
61{
62 unsigned long ticks = 1;
63 ktime_t delta, nextp;
64
65
66
67
68
69
70
71
72
73
74 if (IS_ENABLED(CONFIG_64BIT)) {
75 if (ktime_before(now, smp_load_acquire(&tick_next_period)))
76 return;
77 } else {
78 unsigned int seq;
79
80
81
82
83
84 do {
85 seq = read_seqcount_begin(&jiffies_seq);
86 nextp = tick_next_period;
87 } while (read_seqcount_retry(&jiffies_seq, seq));
88
89 if (ktime_before(now, nextp))
90 return;
91 }
92
93
94 raw_spin_lock(&jiffies_lock);
95
96
97
98
99 if (ktime_before(now, tick_next_period)) {
100 raw_spin_unlock(&jiffies_lock);
101 return;
102 }
103
104 write_seqcount_begin(&jiffies_seq);
105
106 delta = ktime_sub(now, tick_next_period);
107 if (unlikely(delta >= TICK_NSEC)) {
108
109 s64 incr = TICK_NSEC;
110
111 ticks += ktime_divns(delta, incr);
112
113 last_jiffies_update = ktime_add_ns(last_jiffies_update,
114 incr * ticks);
115 } else {
116 last_jiffies_update = ktime_add_ns(last_jiffies_update,
117 TICK_NSEC);
118 }
119
120
121 jiffies_64 += ticks;
122
123
124
125
126 nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
127
128 if (IS_ENABLED(CONFIG_64BIT)) {
129
130
131
132
133
134
135 smp_store_release(&tick_next_period, nextp);
136 } else {
137
138
139
140
141 tick_next_period = nextp;
142 }
143
144
145
146
147
148
149 write_seqcount_end(&jiffies_seq);
150
151 calc_global_load();
152
153 raw_spin_unlock(&jiffies_lock);
154 update_wall_time();
155}
156
157
158
159
160static ktime_t tick_init_jiffy_update(void)
161{
162 ktime_t period;
163
164 raw_spin_lock(&jiffies_lock);
165 write_seqcount_begin(&jiffies_seq);
166
167 if (last_jiffies_update == 0)
168 last_jiffies_update = tick_next_period;
169 period = last_jiffies_update;
170 write_seqcount_end(&jiffies_seq);
171 raw_spin_unlock(&jiffies_lock);
172 return period;
173}
174
175static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
176{
177 int cpu = smp_processor_id();
178
179#ifdef CONFIG_NO_HZ_COMMON
180
181
182
183
184
185
186
187
188
189
190 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
191#ifdef CONFIG_NO_HZ_FULL
192 WARN_ON(tick_nohz_full_running);
193#endif
194 tick_do_timer_cpu = cpu;
195 }
196#endif
197
198
199 if (tick_do_timer_cpu == cpu)
200 tick_do_update_jiffies64(now);
201
202 if (ts->inidle)
203 ts->got_idle_tick = 1;
204}
205
206static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
207{
208#ifdef CONFIG_NO_HZ_COMMON
209
210
211
212
213
214
215
216
217 if (ts->tick_stopped) {
218 touch_softlockup_watchdog_sched();
219 if (is_idle_task(current))
220 ts->idle_jiffies++;
221
222
223
224
225
226 ts->next_tick = 0;
227 }
228#endif
229 update_process_times(user_mode(regs));
230 profile_tick(CPU_PROFILING);
231}
232#endif
233
234#ifdef CONFIG_NO_HZ_FULL
235cpumask_var_t tick_nohz_full_mask;
236bool tick_nohz_full_running;
237EXPORT_SYMBOL_GPL(tick_nohz_full_running);
238static atomic_t tick_dep_mask;
239
240static bool check_tick_dependency(atomic_t *dep)
241{
242 int val = atomic_read(dep);
243
244 if (val & TICK_DEP_MASK_POSIX_TIMER) {
245 trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
246 return true;
247 }
248
249 if (val & TICK_DEP_MASK_PERF_EVENTS) {
250 trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
251 return true;
252 }
253
254 if (val & TICK_DEP_MASK_SCHED) {
255 trace_tick_stop(0, TICK_DEP_MASK_SCHED);
256 return true;
257 }
258
259 if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
260 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
261 return true;
262 }
263
264 if (val & TICK_DEP_MASK_RCU) {
265 trace_tick_stop(0, TICK_DEP_MASK_RCU);
266 return true;
267 }
268
269 return false;
270}
271
272static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
273{
274 lockdep_assert_irqs_disabled();
275
276 if (unlikely(!cpu_online(cpu)))
277 return false;
278
279 if (check_tick_dependency(&tick_dep_mask))
280 return false;
281
282 if (check_tick_dependency(&ts->tick_dep_mask))
283 return false;
284
285 if (check_tick_dependency(¤t->tick_dep_mask))
286 return false;
287
288 if (check_tick_dependency(¤t->signal->tick_dep_mask))
289 return false;
290
291 return true;
292}
293
294static void nohz_full_kick_func(struct irq_work *work)
295{
296
297}
298
299static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
300 .func = nohz_full_kick_func,
301 .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
302};
303
304
305
306
307
308
309
310static void tick_nohz_full_kick(void)
311{
312 if (!tick_nohz_full_cpu(smp_processor_id()))
313 return;
314
315 irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
316}
317
318
319
320
321
322void tick_nohz_full_kick_cpu(int cpu)
323{
324 if (!tick_nohz_full_cpu(cpu))
325 return;
326
327 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
328}
329
330static void tick_nohz_kick_task(struct task_struct *tsk)
331{
332 int cpu;
333
334
335
336
337
338
339
340
341
342
343
344
345
346 if (!sched_task_on_rq(tsk))
347 return;
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362 cpu = task_cpu(tsk);
363
364 preempt_disable();
365 if (cpu_online(cpu))
366 tick_nohz_full_kick_cpu(cpu);
367 preempt_enable();
368}
369
370
371
372
373
374static void tick_nohz_full_kick_all(void)
375{
376 int cpu;
377
378 if (!tick_nohz_full_running)
379 return;
380
381 preempt_disable();
382 for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
383 tick_nohz_full_kick_cpu(cpu);
384 preempt_enable();
385}
386
387static void tick_nohz_dep_set_all(atomic_t *dep,
388 enum tick_dep_bits bit)
389{
390 int prev;
391
392 prev = atomic_fetch_or(BIT(bit), dep);
393 if (!prev)
394 tick_nohz_full_kick_all();
395}
396
397
398
399
400
401void tick_nohz_dep_set(enum tick_dep_bits bit)
402{
403 tick_nohz_dep_set_all(&tick_dep_mask, bit);
404}
405
406void tick_nohz_dep_clear(enum tick_dep_bits bit)
407{
408 atomic_andnot(BIT(bit), &tick_dep_mask);
409}
410
411
412
413
414
415void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
416{
417 int prev;
418 struct tick_sched *ts;
419
420 ts = per_cpu_ptr(&tick_cpu_sched, cpu);
421
422 prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
423 if (!prev) {
424 preempt_disable();
425
426 if (cpu == smp_processor_id()) {
427 tick_nohz_full_kick();
428 } else {
429
430 if (!WARN_ON_ONCE(in_nmi()))
431 tick_nohz_full_kick_cpu(cpu);
432 }
433 preempt_enable();
434 }
435}
436EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
437
438void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
439{
440 struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
441
442 atomic_andnot(BIT(bit), &ts->tick_dep_mask);
443}
444EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
445
446
447
448
449
450void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
451{
452 if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
453 tick_nohz_kick_task(tsk);
454}
455EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
456
457void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
458{
459 atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
460}
461EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
462
463
464
465
466
467void tick_nohz_dep_set_signal(struct task_struct *tsk,
468 enum tick_dep_bits bit)
469{
470 int prev;
471 struct signal_struct *sig = tsk->signal;
472
473 prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
474 if (!prev) {
475 struct task_struct *t;
476
477 lockdep_assert_held(&tsk->sighand->siglock);
478 __for_each_thread(sig, t)
479 tick_nohz_kick_task(t);
480 }
481}
482
483void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
484{
485 atomic_andnot(BIT(bit), &sig->tick_dep_mask);
486}
487
488
489
490
491
492
493void __tick_nohz_task_switch(void)
494{
495 struct tick_sched *ts;
496
497 if (!tick_nohz_full_cpu(smp_processor_id()))
498 return;
499
500 ts = this_cpu_ptr(&tick_cpu_sched);
501
502 if (ts->tick_stopped) {
503 if (atomic_read(¤t->tick_dep_mask) ||
504 atomic_read(¤t->signal->tick_dep_mask))
505 tick_nohz_full_kick();
506 }
507}
508
509
510void __init tick_nohz_full_setup(cpumask_var_t cpumask)
511{
512 alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
513 cpumask_copy(tick_nohz_full_mask, cpumask);
514 tick_nohz_full_running = true;
515}
516EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
517
518static int tick_nohz_cpu_down(unsigned int cpu)
519{
520
521
522
523
524
525 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
526 return -EBUSY;
527 return 0;
528}
529
530void __init tick_nohz_init(void)
531{
532 int cpu, ret;
533
534 if (!tick_nohz_full_running)
535 return;
536
537
538
539
540
541
542 if (!arch_irq_work_has_interrupt()) {
543 pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
544 cpumask_clear(tick_nohz_full_mask);
545 tick_nohz_full_running = false;
546 return;
547 }
548
549 if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
550 !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
551 cpu = smp_processor_id();
552
553 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
554 pr_warn("NO_HZ: Clearing %d from nohz_full range "
555 "for timekeeping\n", cpu);
556 cpumask_clear_cpu(cpu, tick_nohz_full_mask);
557 }
558 }
559
560 for_each_cpu(cpu, tick_nohz_full_mask)
561 context_tracking_cpu_set(cpu);
562
563 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
564 "kernel/nohz:predown", NULL,
565 tick_nohz_cpu_down);
566 WARN_ON(ret < 0);
567 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
568 cpumask_pr_args(tick_nohz_full_mask));
569}
570#endif
571
572
573
574
575#ifdef CONFIG_NO_HZ_COMMON
576
577
578
579bool tick_nohz_enabled __read_mostly = true;
580unsigned long tick_nohz_active __read_mostly;
581
582
583
584static int __init setup_tick_nohz(char *str)
585{
586 return (kstrtobool(str, &tick_nohz_enabled) == 0);
587}
588
589__setup("nohz=", setup_tick_nohz);
590
591bool tick_nohz_tick_stopped(void)
592{
593 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
594
595 return ts->tick_stopped;
596}
597
598bool tick_nohz_tick_stopped_cpu(int cpu)
599{
600 struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
601
602 return ts->tick_stopped;
603}
604
605
606
607
608
609
610
611
612
613
614
615static void tick_nohz_update_jiffies(ktime_t now)
616{
617 unsigned long flags;
618
619 __this_cpu_write(tick_cpu_sched.idle_waketime, now);
620
621 local_irq_save(flags);
622 tick_do_update_jiffies64(now);
623 local_irq_restore(flags);
624
625 touch_softlockup_watchdog_sched();
626}
627
628
629
630
631static void
632update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
633{
634 ktime_t delta;
635
636 if (ts->idle_active) {
637 delta = ktime_sub(now, ts->idle_entrytime);
638 if (nr_iowait_cpu(cpu) > 0)
639 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
640 else
641 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
642 ts->idle_entrytime = now;
643 }
644
645 if (last_update_time)
646 *last_update_time = ktime_to_us(now);
647
648}
649
650static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
651{
652 update_ts_time_stats(smp_processor_id(), ts, now, NULL);
653 ts->idle_active = 0;
654
655 sched_clock_idle_wakeup_event();
656}
657
658static void tick_nohz_start_idle(struct tick_sched *ts)
659{
660 ts->idle_entrytime = ktime_get();
661 ts->idle_active = 1;
662 sched_clock_idle_sleep_event();
663}
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
680{
681 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
682 ktime_t now, idle;
683
684 if (!tick_nohz_active)
685 return -1;
686
687 now = ktime_get();
688 if (last_update_time) {
689 update_ts_time_stats(cpu, ts, now, last_update_time);
690 idle = ts->idle_sleeptime;
691 } else {
692 if (ts->idle_active && !nr_iowait_cpu(cpu)) {
693 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
694
695 idle = ktime_add(ts->idle_sleeptime, delta);
696 } else {
697 idle = ts->idle_sleeptime;
698 }
699 }
700
701 return ktime_to_us(idle);
702
703}
704EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
721{
722 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
723 ktime_t now, iowait;
724
725 if (!tick_nohz_active)
726 return -1;
727
728 now = ktime_get();
729 if (last_update_time) {
730 update_ts_time_stats(cpu, ts, now, last_update_time);
731 iowait = ts->iowait_sleeptime;
732 } else {
733 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
734 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
735
736 iowait = ktime_add(ts->iowait_sleeptime, delta);
737 } else {
738 iowait = ts->iowait_sleeptime;
739 }
740 }
741
742 return ktime_to_us(iowait);
743}
744EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
745
746static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
747{
748 hrtimer_cancel(&ts->sched_timer);
749 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
750
751
752 hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
753
754 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
755 hrtimer_start_expires(&ts->sched_timer,
756 HRTIMER_MODE_ABS_PINNED_HARD);
757 } else {
758 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
759 }
760
761
762
763
764
765 ts->next_tick = 0;
766}
767
768static inline bool local_timer_softirq_pending(void)
769{
770 return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
771}
772
773static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
774{
775 u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
776 unsigned long basejiff;
777 unsigned int seq;
778
779
780 do {
781 seq = read_seqcount_begin(&jiffies_seq);
782 basemono = last_jiffies_update;
783 basejiff = jiffies;
784 } while (read_seqcount_retry(&jiffies_seq, seq));
785 ts->last_jiffies = basejiff;
786 ts->timer_expires_base = basemono;
787
788
789
790
791
792
793
794
795
796
797
798 if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
799 irq_work_needs_cpu() || local_timer_softirq_pending()) {
800 next_tick = basemono + TICK_NSEC;
801 } else {
802
803
804
805
806
807
808
809 next_tmr = get_next_timer_interrupt(basejiff, basemono);
810 ts->next_timer = next_tmr;
811
812 next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
813 }
814
815
816
817
818
819 delta = next_tick - basemono;
820 if (delta <= (u64)TICK_NSEC) {
821
822
823
824
825 timer_clear_idle();
826
827
828
829
830 if (!ts->tick_stopped) {
831 ts->timer_expires = 0;
832 goto out;
833 }
834 }
835
836
837
838
839
840
841 delta = timekeeping_max_deferment();
842 if (cpu != tick_do_timer_cpu &&
843 (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
844 delta = KTIME_MAX;
845
846
847 if (delta < (KTIME_MAX - basemono))
848 expires = basemono + delta;
849 else
850 expires = KTIME_MAX;
851
852 ts->timer_expires = min_t(u64, expires, next_tick);
853
854out:
855 return ts->timer_expires;
856}
857
858static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
859{
860 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
861 u64 basemono = ts->timer_expires_base;
862 u64 expires = ts->timer_expires;
863 ktime_t tick = expires;
864
865
866 ts->timer_expires_base = 0;
867
868
869
870
871
872
873
874
875
876 if (cpu == tick_do_timer_cpu) {
877 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
878 ts->do_timer_last = 1;
879 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
880 ts->do_timer_last = 0;
881 }
882
883
884 if (ts->tick_stopped && (expires == ts->next_tick)) {
885
886 if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
887 return;
888
889 WARN_ON_ONCE(1);
890 printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
891 basemono, ts->next_tick, dev->next_event,
892 hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
893 }
894
895
896
897
898
899
900
901
902 if (!ts->tick_stopped) {
903 calc_load_nohz_start();
904 quiet_vmstat();
905
906 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
907 ts->tick_stopped = 1;
908 trace_tick_stop(1, TICK_DEP_MASK_NONE);
909 }
910
911 ts->next_tick = tick;
912
913
914
915
916
917 if (unlikely(expires == KTIME_MAX)) {
918 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
919 hrtimer_cancel(&ts->sched_timer);
920 return;
921 }
922
923 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
924 hrtimer_start(&ts->sched_timer, tick,
925 HRTIMER_MODE_ABS_PINNED_HARD);
926 } else {
927 hrtimer_set_expires(&ts->sched_timer, tick);
928 tick_program_event(tick, 1);
929 }
930}
931
932static void tick_nohz_retain_tick(struct tick_sched *ts)
933{
934 ts->timer_expires_base = 0;
935}
936
937#ifdef CONFIG_NO_HZ_FULL
938static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
939{
940 if (tick_nohz_next_event(ts, cpu))
941 tick_nohz_stop_tick(ts, cpu);
942 else
943 tick_nohz_retain_tick(ts);
944}
945#endif
946
947static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
948{
949
950 tick_do_update_jiffies64(now);
951
952
953
954
955
956 timer_clear_idle();
957
958 calc_load_nohz_stop();
959 touch_softlockup_watchdog_sched();
960
961
962
963 ts->tick_stopped = 0;
964 tick_nohz_restart(ts, now);
965}
966
967static void __tick_nohz_full_update_tick(struct tick_sched *ts,
968 ktime_t now)
969{
970#ifdef CONFIG_NO_HZ_FULL
971 int cpu = smp_processor_id();
972
973 if (can_stop_full_tick(cpu, ts))
974 tick_nohz_stop_sched_tick(ts, cpu);
975 else if (ts->tick_stopped)
976 tick_nohz_restart_sched_tick(ts, now);
977#endif
978}
979
980static void tick_nohz_full_update_tick(struct tick_sched *ts)
981{
982 if (!tick_nohz_full_cpu(smp_processor_id()))
983 return;
984
985 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
986 return;
987
988 __tick_nohz_full_update_tick(ts, ktime_get());
989}
990
991static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
992{
993
994
995
996
997
998
999
1000 if (unlikely(!cpu_online(cpu))) {
1001 if (cpu == tick_do_timer_cpu)
1002 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
1003
1004
1005
1006
1007 ts->next_tick = 0;
1008 return false;
1009 }
1010
1011 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
1012 return false;
1013
1014 if (need_resched())
1015 return false;
1016
1017 if (unlikely(local_softirq_pending())) {
1018 static int ratelimit;
1019
1020 if (ratelimit < 10 &&
1021 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
1022 pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
1023 (unsigned int) local_softirq_pending());
1024 ratelimit++;
1025 }
1026 return false;
1027 }
1028
1029 if (tick_nohz_full_enabled()) {
1030
1031
1032
1033
1034 if (tick_do_timer_cpu == cpu)
1035 return false;
1036
1037
1038
1039
1040
1041 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
1042 return false;
1043
1044
1045 if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
1046 return false;
1047 }
1048
1049 return true;
1050}
1051
1052static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
1053{
1054 ktime_t expires;
1055 int cpu = smp_processor_id();
1056
1057
1058
1059
1060
1061 if (ts->timer_expires_base)
1062 expires = ts->timer_expires;
1063 else if (can_stop_idle_tick(cpu, ts))
1064 expires = tick_nohz_next_event(ts, cpu);
1065 else
1066 return;
1067
1068 ts->idle_calls++;
1069
1070 if (expires > 0LL) {
1071 int was_stopped = ts->tick_stopped;
1072
1073 tick_nohz_stop_tick(ts, cpu);
1074
1075 ts->idle_sleeps++;
1076 ts->idle_expires = expires;
1077
1078 if (!was_stopped && ts->tick_stopped) {
1079 ts->idle_jiffies = ts->last_jiffies;
1080 nohz_balance_enter_idle(cpu);
1081 }
1082 } else {
1083 tick_nohz_retain_tick(ts);
1084 }
1085}
1086
1087
1088
1089
1090
1091
1092void tick_nohz_idle_stop_tick(void)
1093{
1094 __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
1095}
1096
1097void tick_nohz_idle_retain_tick(void)
1098{
1099 tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
1100
1101
1102
1103
1104 timer_clear_idle();
1105}
1106
1107
1108
1109
1110
1111
1112void tick_nohz_idle_enter(void)
1113{
1114 struct tick_sched *ts;
1115
1116 lockdep_assert_irqs_enabled();
1117
1118 local_irq_disable();
1119
1120 ts = this_cpu_ptr(&tick_cpu_sched);
1121
1122 WARN_ON_ONCE(ts->timer_expires_base);
1123
1124 ts->inidle = 1;
1125 tick_nohz_start_idle(ts);
1126
1127 local_irq_enable();
1128}
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138void tick_nohz_irq_exit(void)
1139{
1140 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1141
1142 if (ts->inidle)
1143 tick_nohz_start_idle(ts);
1144 else
1145 tick_nohz_full_update_tick(ts);
1146}
1147
1148
1149
1150
1151bool tick_nohz_idle_got_tick(void)
1152{
1153 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1154
1155 if (ts->got_idle_tick) {
1156 ts->got_idle_tick = 0;
1157 return true;
1158 }
1159 return false;
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169ktime_t tick_nohz_get_next_hrtimer(void)
1170{
1171 return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1172}
1173
1174
1175
1176
1177
1178
1179
1180ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
1181{
1182 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1183 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1184 int cpu = smp_processor_id();
1185
1186
1187
1188
1189 ktime_t now = ts->idle_entrytime;
1190 ktime_t next_event;
1191
1192 WARN_ON_ONCE(!ts->inidle);
1193
1194 *delta_next = ktime_sub(dev->next_event, now);
1195
1196 if (!can_stop_idle_tick(cpu, ts))
1197 return *delta_next;
1198
1199 next_event = tick_nohz_next_event(ts, cpu);
1200 if (!next_event)
1201 return *delta_next;
1202
1203
1204
1205
1206
1207 next_event = min_t(u64, next_event,
1208 hrtimer_next_event_without(&ts->sched_timer));
1209
1210 return ktime_sub(next_event, now);
1211}
1212
1213
1214
1215
1216
1217
1218
1219unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1220{
1221 struct tick_sched *ts = tick_get_tick_sched(cpu);
1222
1223 return ts->idle_calls;
1224}
1225
1226
1227
1228
1229
1230
1231unsigned long tick_nohz_get_idle_calls(void)
1232{
1233 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1234
1235 return ts->idle_calls;
1236}
1237
1238static void tick_nohz_account_idle_time(struct tick_sched *ts,
1239 ktime_t now)
1240{
1241 unsigned long ticks;
1242
1243 ts->idle_exittime = now;
1244
1245 if (vtime_accounting_enabled_this_cpu())
1246 return;
1247
1248
1249
1250
1251
1252 ticks = jiffies - ts->idle_jiffies;
1253
1254
1255
1256 if (ticks && ticks < LONG_MAX)
1257 account_idle_ticks(ticks);
1258}
1259
1260void tick_nohz_idle_restart_tick(void)
1261{
1262 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1263
1264 if (ts->tick_stopped) {
1265 ktime_t now = ktime_get();
1266 tick_nohz_restart_sched_tick(ts, now);
1267 tick_nohz_account_idle_time(ts, now);
1268 }
1269}
1270
1271static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
1272{
1273 if (tick_nohz_full_cpu(smp_processor_id()))
1274 __tick_nohz_full_update_tick(ts, now);
1275 else
1276 tick_nohz_restart_sched_tick(ts, now);
1277
1278 tick_nohz_account_idle_time(ts, now);
1279}
1280
1281
1282
1283
1284
1285
1286
1287
1288void tick_nohz_idle_exit(void)
1289{
1290 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1291 bool idle_active, tick_stopped;
1292 ktime_t now;
1293
1294 local_irq_disable();
1295
1296 WARN_ON_ONCE(!ts->inidle);
1297 WARN_ON_ONCE(ts->timer_expires_base);
1298
1299 ts->inidle = 0;
1300 idle_active = ts->idle_active;
1301 tick_stopped = ts->tick_stopped;
1302
1303 if (idle_active || tick_stopped)
1304 now = ktime_get();
1305
1306 if (idle_active)
1307 tick_nohz_stop_idle(ts, now);
1308
1309 if (tick_stopped)
1310 tick_nohz_idle_update_tick(ts, now);
1311
1312 local_irq_enable();
1313}
1314
1315
1316
1317
1318static void tick_nohz_handler(struct clock_event_device *dev)
1319{
1320 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1321 struct pt_regs *regs = get_irq_regs();
1322 ktime_t now = ktime_get();
1323
1324 dev->next_event = KTIME_MAX;
1325
1326 tick_sched_do_timer(ts, now);
1327 tick_sched_handle(ts, regs);
1328
1329
1330 if (unlikely(ts->tick_stopped))
1331 return;
1332
1333 hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1334 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1335}
1336
1337static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
1338{
1339 if (!tick_nohz_enabled)
1340 return;
1341 ts->nohz_mode = mode;
1342
1343 if (!test_and_set_bit(0, &tick_nohz_active))
1344 timers_update_nohz();
1345}
1346
1347
1348
1349
1350static void tick_nohz_switch_to_nohz(void)
1351{
1352 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1353 ktime_t next;
1354
1355 if (!tick_nohz_enabled)
1356 return;
1357
1358 if (tick_switch_to_oneshot(tick_nohz_handler))
1359 return;
1360
1361
1362
1363
1364
1365 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1366
1367 next = tick_init_jiffy_update();
1368
1369 hrtimer_set_expires(&ts->sched_timer, next);
1370 hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
1371 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1372 tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
1373}
1374
1375static inline void tick_nohz_irq_enter(void)
1376{
1377 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1378 ktime_t now;
1379
1380 if (!ts->idle_active && !ts->tick_stopped)
1381 return;
1382 now = ktime_get();
1383 if (ts->idle_active)
1384 tick_nohz_stop_idle(ts, now);
1385 if (ts->tick_stopped)
1386 tick_nohz_update_jiffies(now);
1387}
1388
1389#else
1390
1391static inline void tick_nohz_switch_to_nohz(void) { }
1392static inline void tick_nohz_irq_enter(void) { }
1393static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1394
1395#endif
1396
1397
1398
1399
1400void tick_irq_enter(void)
1401{
1402 tick_check_oneshot_broadcast_this_cpu();
1403 tick_nohz_irq_enter();
1404}
1405
1406
1407
1408
1409#ifdef CONFIG_HIGH_RES_TIMERS
1410
1411
1412
1413
1414static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
1415{
1416 struct tick_sched *ts =
1417 container_of(timer, struct tick_sched, sched_timer);
1418 struct pt_regs *regs = get_irq_regs();
1419 ktime_t now = ktime_get();
1420
1421 tick_sched_do_timer(ts, now);
1422
1423
1424
1425
1426
1427 if (regs)
1428 tick_sched_handle(ts, regs);
1429 else
1430 ts->next_tick = 0;
1431
1432
1433 if (unlikely(ts->tick_stopped))
1434 return HRTIMER_NORESTART;
1435
1436 hrtimer_forward(timer, now, TICK_NSEC);
1437
1438 return HRTIMER_RESTART;
1439}
1440
1441static int sched_skew_tick;
1442
1443static int __init skew_tick(char *str)
1444{
1445 get_option(&str, &sched_skew_tick);
1446
1447 return 0;
1448}
1449early_param("skew_tick", skew_tick);
1450
1451
1452
1453
1454void tick_setup_sched_timer(void)
1455{
1456 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1457 ktime_t now = ktime_get();
1458
1459
1460
1461
1462 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1463 ts->sched_timer.function = tick_sched_timer;
1464
1465
1466 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
1467
1468
1469 if (sched_skew_tick) {
1470 u64 offset = TICK_NSEC >> 1;
1471 do_div(offset, num_possible_cpus());
1472 offset *= smp_processor_id();
1473 hrtimer_add_expires_ns(&ts->sched_timer, offset);
1474 }
1475
1476 hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1477 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
1478 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1479}
1480#endif
1481
1482#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
1483void tick_cancel_sched_timer(int cpu)
1484{
1485 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1486
1487# ifdef CONFIG_HIGH_RES_TIMERS
1488 if (ts->sched_timer.base)
1489 hrtimer_cancel(&ts->sched_timer);
1490# endif
1491
1492 memset(ts, 0, sizeof(*ts));
1493}
1494#endif
1495
1496
1497
1498
1499void tick_clock_notify(void)
1500{
1501 int cpu;
1502
1503 for_each_possible_cpu(cpu)
1504 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
1505}
1506
1507
1508
1509
1510void tick_oneshot_notify(void)
1511{
1512 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1513
1514 set_bit(0, &ts->check_clocks);
1515}
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525int tick_check_oneshot_change(int allow_nohz)
1526{
1527 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1528
1529 if (!test_and_clear_bit(0, &ts->check_clocks))
1530 return 0;
1531
1532 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
1533 return 0;
1534
1535 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1536 return 0;
1537
1538 if (!allow_nohz)
1539 return 1;
1540
1541 tick_nohz_switch_to_nohz();
1542 return 0;
1543}
1544