1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/kernel_stat.h>
19#include <linux/percpu.h>
20#include <linux/profile.h>
21#include <linux/sched.h>
22#include <linux/module.h>
23
24#include <asm/irq_regs.h>
25
26#include "tick-internal.h"
27
28
29
30
31static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
32
33
34
35
36static ktime_t last_jiffies_update;
37
38struct tick_sched *tick_get_tick_sched(int cpu)
39{
40 return &per_cpu(tick_cpu_sched, cpu);
41}
42
43
44
45
46static void tick_do_update_jiffies64(ktime_t now)
47{
48 unsigned long ticks = 0;
49 ktime_t delta;
50
51
52
53
54 delta = ktime_sub(now, last_jiffies_update);
55 if (delta.tv64 < tick_period.tv64)
56 return;
57
58
59 write_seqlock(&jiffies_lock);
60
61 delta = ktime_sub(now, last_jiffies_update);
62 if (delta.tv64 >= tick_period.tv64) {
63
64 delta = ktime_sub(delta, tick_period);
65 last_jiffies_update = ktime_add(last_jiffies_update,
66 tick_period);
67
68
69 if (unlikely(delta.tv64 >= tick_period.tv64)) {
70 s64 incr = ktime_to_ns(tick_period);
71
72 ticks = ktime_divns(delta, incr);
73
74 last_jiffies_update = ktime_add_ns(last_jiffies_update,
75 incr * ticks);
76 }
77 do_timer(++ticks);
78
79
80 tick_next_period = ktime_add(last_jiffies_update, tick_period);
81 }
82 write_sequnlock(&jiffies_lock);
83}
84
85
86
87
88static ktime_t tick_init_jiffy_update(void)
89{
90 ktime_t period;
91
92 write_seqlock(&jiffies_lock);
93
94 if (last_jiffies_update.tv64 == 0)
95 last_jiffies_update = tick_next_period;
96 period = last_jiffies_update;
97 write_sequnlock(&jiffies_lock);
98 return period;
99}
100
101
102static void tick_sched_do_timer(ktime_t now)
103{
104 int cpu = smp_processor_id();
105
106#ifdef CONFIG_NO_HZ
107
108
109
110
111
112
113
114 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
115 tick_do_timer_cpu = cpu;
116#endif
117
118
119 if (tick_do_timer_cpu == cpu)
120 tick_do_update_jiffies64(now);
121}
122
123static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
124{
125#ifdef CONFIG_NO_HZ
126
127
128
129
130
131
132
133
134 if (ts->tick_stopped) {
135 touch_softlockup_watchdog();
136 if (is_idle_task(current))
137 ts->idle_jiffies++;
138 }
139#endif
140 update_process_times(user_mode(regs));
141 profile_tick(CPU_PROFILING);
142}
143
144
145
146
147#ifdef CONFIG_NO_HZ
148
149
150
151int tick_nohz_enabled __read_mostly = 1;
152
153
154
155
156static int __init setup_tick_nohz(char *str)
157{
158 if (!strcmp(str, "off"))
159 tick_nohz_enabled = 0;
160 else if (!strcmp(str, "on"))
161 tick_nohz_enabled = 1;
162 else
163 return 0;
164 return 1;
165}
166
167__setup("nohz=", setup_tick_nohz);
168
169
170
171
172
173
174
175
176
177
178
179static void tick_nohz_update_jiffies(ktime_t now)
180{
181 int cpu = smp_processor_id();
182 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
183 unsigned long flags;
184
185 ts->idle_waketime = now;
186
187 local_irq_save(flags);
188 tick_do_update_jiffies64(now);
189 local_irq_restore(flags);
190
191 touch_softlockup_watchdog();
192}
193
194
195
196
197static void
198update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
199{
200 ktime_t delta;
201
202 if (ts->idle_active) {
203 delta = ktime_sub(now, ts->idle_entrytime);
204 if (nr_iowait_cpu(cpu) > 0)
205 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
206 else
207 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
208 ts->idle_entrytime = now;
209 }
210
211 if (last_update_time)
212 *last_update_time = ktime_to_us(now);
213
214}
215
216static void tick_nohz_stop_idle(int cpu, ktime_t now)
217{
218 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
219
220 update_ts_time_stats(cpu, ts, now, NULL);
221 ts->idle_active = 0;
222
223 sched_clock_idle_wakeup_event(0);
224}
225
226static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
227{
228 ktime_t now = ktime_get();
229
230 ts->idle_entrytime = now;
231 ts->idle_active = 1;
232 sched_clock_idle_sleep_event();
233 return now;
234}
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
251{
252 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
253 ktime_t now, idle;
254
255 if (!tick_nohz_enabled)
256 return -1;
257
258 now = ktime_get();
259 if (last_update_time) {
260 update_ts_time_stats(cpu, ts, now, last_update_time);
261 idle = ts->idle_sleeptime;
262 } else {
263 if (ts->idle_active && !nr_iowait_cpu(cpu)) {
264 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
265
266 idle = ktime_add(ts->idle_sleeptime, delta);
267 } else {
268 idle = ts->idle_sleeptime;
269 }
270 }
271
272 return ktime_to_us(idle);
273
274}
275EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
292{
293 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
294 ktime_t now, iowait;
295
296 if (!tick_nohz_enabled)
297 return -1;
298
299 now = ktime_get();
300 if (last_update_time) {
301 update_ts_time_stats(cpu, ts, now, last_update_time);
302 iowait = ts->iowait_sleeptime;
303 } else {
304 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
305 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
306
307 iowait = ktime_add(ts->iowait_sleeptime, delta);
308 } else {
309 iowait = ts->iowait_sleeptime;
310 }
311 }
312
313 return ktime_to_us(iowait);
314}
315EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
316
317static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
318 ktime_t now, int cpu)
319{
320 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
321 ktime_t last_update, expires, ret = { .tv64 = 0 };
322 unsigned long rcu_delta_jiffies;
323 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
324 u64 time_delta;
325
326
327 do {
328 seq = read_seqbegin(&jiffies_lock);
329 last_update = last_jiffies_update;
330 last_jiffies = jiffies;
331 time_delta = timekeeping_max_deferment();
332 } while (read_seqretry(&jiffies_lock, seq));
333
334 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
335 arch_needs_cpu(cpu)) {
336 next_jiffies = last_jiffies + 1;
337 delta_jiffies = 1;
338 } else {
339
340 next_jiffies = get_next_timer_interrupt(last_jiffies);
341 delta_jiffies = next_jiffies - last_jiffies;
342 if (rcu_delta_jiffies < delta_jiffies) {
343 next_jiffies = last_jiffies + rcu_delta_jiffies;
344 delta_jiffies = rcu_delta_jiffies;
345 }
346 }
347
348
349
350
351 if (!ts->tick_stopped && delta_jiffies == 1)
352 goto out;
353
354
355 if ((long)delta_jiffies >= 1) {
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 if (cpu == tick_do_timer_cpu) {
371 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
372 ts->do_timer_last = 1;
373 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
374 time_delta = KTIME_MAX;
375 ts->do_timer_last = 0;
376 } else if (!ts->do_timer_last) {
377 time_delta = KTIME_MAX;
378 }
379
380
381
382
383
384
385
386
387 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
388
389
390
391
392
393
394
395 time_delta = min_t(u64, time_delta,
396 tick_period.tv64 * delta_jiffies);
397 }
398
399 if (time_delta < KTIME_MAX)
400 expires = ktime_add_ns(last_update, time_delta);
401 else
402 expires.tv64 = KTIME_MAX;
403
404
405 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
406 goto out;
407
408 ret = expires;
409
410
411
412
413
414
415
416
417 if (!ts->tick_stopped) {
418 nohz_balance_enter_idle(cpu);
419 calc_load_enter_idle();
420
421 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
422 ts->tick_stopped = 1;
423 }
424
425
426
427
428
429 if (unlikely(expires.tv64 == KTIME_MAX)) {
430 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
431 hrtimer_cancel(&ts->sched_timer);
432 goto out;
433 }
434
435 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
436 hrtimer_start(&ts->sched_timer, expires,
437 HRTIMER_MODE_ABS_PINNED);
438
439 if (hrtimer_active(&ts->sched_timer))
440 goto out;
441 } else if (!tick_program_event(expires, 0))
442 goto out;
443
444
445
446
447
448 tick_do_update_jiffies64(ktime_get());
449 }
450 raise_softirq_irqoff(TIMER_SOFTIRQ);
451out:
452 ts->next_jiffies = next_jiffies;
453 ts->last_jiffies = last_jiffies;
454 ts->sleep_length = ktime_sub(dev->next_event, now);
455
456 return ret;
457}
458
459static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
460{
461
462
463
464
465
466
467
468 if (unlikely(!cpu_online(cpu))) {
469 if (cpu == tick_do_timer_cpu)
470 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
471 }
472
473 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
474 return false;
475
476 if (need_resched())
477 return false;
478
479 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
480 static int ratelimit;
481
482 if (ratelimit < 10 &&
483 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
484 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
485 (unsigned int) local_softirq_pending());
486 ratelimit++;
487 }
488 return false;
489 }
490
491 return true;
492}
493
494static void __tick_nohz_idle_enter(struct tick_sched *ts)
495{
496 ktime_t now, expires;
497 int cpu = smp_processor_id();
498
499 now = tick_nohz_start_idle(cpu, ts);
500
501 if (can_stop_idle_tick(cpu, ts)) {
502 int was_stopped = ts->tick_stopped;
503
504 ts->idle_calls++;
505
506 expires = tick_nohz_stop_sched_tick(ts, now, cpu);
507 if (expires.tv64 > 0LL) {
508 ts->idle_sleeps++;
509 ts->idle_expires = expires;
510 }
511
512 if (!was_stopped && ts->tick_stopped)
513 ts->idle_jiffies = ts->last_jiffies;
514 }
515}
516
517
518
519
520
521
522
523
524
525
526
527
528
529void tick_nohz_idle_enter(void)
530{
531 struct tick_sched *ts;
532
533 WARN_ON_ONCE(irqs_disabled());
534
535
536
537
538
539
540
541 set_cpu_sd_state_idle();
542
543 local_irq_disable();
544
545 ts = &__get_cpu_var(tick_cpu_sched);
546
547
548
549
550
551 ts->inidle = 1;
552 __tick_nohz_idle_enter(ts);
553
554 local_irq_enable();
555}
556
557
558
559
560
561
562
563
564
565void tick_nohz_irq_exit(void)
566{
567 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
568
569 if (!ts->inidle)
570 return;
571
572
573 menu_hrtimer_cancel();
574 __tick_nohz_idle_enter(ts);
575}
576
577
578
579
580
581
582ktime_t tick_nohz_get_sleep_length(void)
583{
584 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
585
586 return ts->sleep_length;
587}
588
589static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
590{
591 hrtimer_cancel(&ts->sched_timer);
592 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
593
594 while (1) {
595
596 hrtimer_forward(&ts->sched_timer, now, tick_period);
597
598 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
599 hrtimer_start_expires(&ts->sched_timer,
600 HRTIMER_MODE_ABS_PINNED);
601
602 if (hrtimer_active(&ts->sched_timer))
603 break;
604 } else {
605 if (!tick_program_event(
606 hrtimer_get_expires(&ts->sched_timer), 0))
607 break;
608 }
609
610 now = ktime_get();
611 tick_do_update_jiffies64(now);
612 }
613}
614
615static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
616{
617
618 tick_do_update_jiffies64(now);
619 update_cpu_load_nohz();
620
621 calc_load_exit_idle();
622 touch_softlockup_watchdog();
623
624
625
626 ts->tick_stopped = 0;
627 ts->idle_exittime = now;
628
629 tick_nohz_restart(ts, now);
630}
631
632static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
633{
634#ifndef CONFIG_VIRT_CPU_ACCOUNTING
635 unsigned long ticks;
636
637
638
639
640
641 ticks = jiffies - ts->idle_jiffies;
642
643
644
645 if (ticks && ticks < LONG_MAX)
646 account_idle_ticks(ticks);
647#endif
648}
649
650
651
652
653
654
655
656
657void tick_nohz_idle_exit(void)
658{
659 int cpu = smp_processor_id();
660 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
661 ktime_t now;
662
663 local_irq_disable();
664
665 WARN_ON_ONCE(!ts->inidle);
666
667 ts->inidle = 0;
668
669
670 menu_hrtimer_cancel();
671 if (ts->idle_active || ts->tick_stopped)
672 now = ktime_get();
673
674 if (ts->idle_active)
675 tick_nohz_stop_idle(cpu, now);
676
677 if (ts->tick_stopped) {
678 tick_nohz_restart_sched_tick(ts, now);
679 tick_nohz_account_idle_ticks(ts);
680 }
681
682 local_irq_enable();
683}
684
685static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
686{
687 hrtimer_forward(&ts->sched_timer, now, tick_period);
688 return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
689}
690
691
692
693
694static void tick_nohz_handler(struct clock_event_device *dev)
695{
696 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
697 struct pt_regs *regs = get_irq_regs();
698 ktime_t now = ktime_get();
699
700 dev->next_event.tv64 = KTIME_MAX;
701
702 tick_sched_do_timer(now);
703 tick_sched_handle(ts, regs);
704
705 while (tick_nohz_reprogram(ts, now)) {
706 now = ktime_get();
707 tick_do_update_jiffies64(now);
708 }
709}
710
711
712
713
714static void tick_nohz_switch_to_nohz(void)
715{
716 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
717 ktime_t next;
718
719 if (!tick_nohz_enabled)
720 return;
721
722 local_irq_disable();
723 if (tick_switch_to_oneshot(tick_nohz_handler)) {
724 local_irq_enable();
725 return;
726 }
727
728 ts->nohz_mode = NOHZ_MODE_LOWRES;
729
730
731
732
733
734 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
735
736 next = tick_init_jiffy_update();
737
738 for (;;) {
739 hrtimer_set_expires(&ts->sched_timer, next);
740 if (!tick_program_event(next, 0))
741 break;
742 next = ktime_add(next, tick_period);
743 }
744 local_irq_enable();
745}
746
747
748
749
750
751
752
753
754
755
756
757
758static void tick_nohz_kick_tick(int cpu, ktime_t now)
759{
760#if 0
761
762
763 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
764 ktime_t delta;
765
766
767
768
769
770 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
771 if (delta.tv64 <= tick_period.tv64)
772 return;
773
774 tick_nohz_restart(ts, now);
775#endif
776}
777
778static inline void tick_check_nohz(int cpu)
779{
780 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
781 ktime_t now;
782
783 if (!ts->idle_active && !ts->tick_stopped)
784 return;
785 now = ktime_get();
786 if (ts->idle_active)
787 tick_nohz_stop_idle(cpu, now);
788 if (ts->tick_stopped) {
789 tick_nohz_update_jiffies(now);
790 tick_nohz_kick_tick(cpu, now);
791 }
792}
793
794#else
795
796static inline void tick_nohz_switch_to_nohz(void) { }
797static inline void tick_check_nohz(int cpu) { }
798
799#endif
800
801
802
803
804void tick_check_idle(int cpu)
805{
806 tick_check_oneshot_broadcast(cpu);
807 tick_check_nohz(cpu);
808}
809
810
811
812
813#ifdef CONFIG_HIGH_RES_TIMERS
814
815
816
817
818static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
819{
820 struct tick_sched *ts =
821 container_of(timer, struct tick_sched, sched_timer);
822 struct pt_regs *regs = get_irq_regs();
823 ktime_t now = ktime_get();
824
825 tick_sched_do_timer(now);
826
827
828
829
830
831 if (regs)
832 tick_sched_handle(ts, regs);
833
834 hrtimer_forward(timer, now, tick_period);
835
836 return HRTIMER_RESTART;
837}
838
839static int sched_skew_tick;
840
841static int __init skew_tick(char *str)
842{
843 get_option(&str, &sched_skew_tick);
844
845 return 0;
846}
847early_param("skew_tick", skew_tick);
848
849
850
851
852void tick_setup_sched_timer(void)
853{
854 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
855 ktime_t now = ktime_get();
856
857
858
859
860 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
861 ts->sched_timer.function = tick_sched_timer;
862
863
864 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
865
866
867 if (sched_skew_tick) {
868 u64 offset = ktime_to_ns(tick_period) >> 1;
869 do_div(offset, num_possible_cpus());
870 offset *= smp_processor_id();
871 hrtimer_add_expires_ns(&ts->sched_timer, offset);
872 }
873
874 for (;;) {
875 hrtimer_forward(&ts->sched_timer, now, tick_period);
876 hrtimer_start_expires(&ts->sched_timer,
877 HRTIMER_MODE_ABS_PINNED);
878
879 if (hrtimer_active(&ts->sched_timer))
880 break;
881 now = ktime_get();
882 }
883
884#ifdef CONFIG_NO_HZ
885 if (tick_nohz_enabled)
886 ts->nohz_mode = NOHZ_MODE_HIGHRES;
887#endif
888}
889#endif
890
891#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS
892void tick_cancel_sched_timer(int cpu)
893{
894 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
895
896# ifdef CONFIG_HIGH_RES_TIMERS
897 if (ts->sched_timer.base)
898 hrtimer_cancel(&ts->sched_timer);
899# endif
900
901 ts->nohz_mode = NOHZ_MODE_INACTIVE;
902}
903#endif
904
905
906
907
908void tick_clock_notify(void)
909{
910 int cpu;
911
912 for_each_possible_cpu(cpu)
913 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
914}
915
916
917
918
919void tick_oneshot_notify(void)
920{
921 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
922
923 set_bit(0, &ts->check_clocks);
924}
925
926
927
928
929
930
931
932
933
934int tick_check_oneshot_change(int allow_nohz)
935{
936 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
937
938 if (!test_and_clear_bit(0, &ts->check_clocks))
939 return 0;
940
941 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
942 return 0;
943
944 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
945 return 0;
946
947 if (!allow_nohz)
948 return 1;
949
950 tick_nohz_switch_to_nohz();
951 return 0;
952}
953