1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel_stat.h>
22#include <linux/export.h>
23#include <linux/interrupt.h>
24#include <linux/percpu.h>
25#include <linux/init.h>
26#include <linux/mm.h>
27#include <linux/swap.h>
28#include <linux/pid_namespace.h>
29#include <linux/notifier.h>
30#include <linux/thread_info.h>
31#include <linux/time.h>
32#include <linux/jiffies.h>
33#include <linux/posix-timers.h>
34#include <linux/cpu.h>
35#include <linux/syscalls.h>
36#include <linux/delay.h>
37#include <linux/tick.h>
38#include <linux/kallsyms.h>
39#include <linux/irq_work.h>
40#include <linux/sched/signal.h>
41#include <linux/sched/sysctl.h>
42#include <linux/sched/nohz.h>
43#include <linux/sched/debug.h>
44#include <linux/slab.h>
45#include <linux/compat.h>
46
47#include <linux/uaccess.h>
48#include <asm/unistd.h>
49#include <asm/div64.h>
50#include <asm/timex.h>
51#include <asm/io.h>
52
53#include "tick-internal.h"
54
55#define CREATE_TRACE_POINTS
56#include <trace/events/timer.h>
57
58__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
59
60EXPORT_SYMBOL(jiffies_64);
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151#define LVL_CLK_SHIFT 3
152#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT)
153#define LVL_CLK_MASK (LVL_CLK_DIV - 1)
154#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT)
155#define LVL_GRAN(n) (1UL << LVL_SHIFT(n))
156
157
158
159
160
161#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
162
163
164#define LVL_BITS 6
165#define LVL_SIZE (1UL << LVL_BITS)
166#define LVL_MASK (LVL_SIZE - 1)
167#define LVL_OFFS(n) ((n) * LVL_SIZE)
168
169
170#if HZ > 100
171# define LVL_DEPTH 9
172# else
173# define LVL_DEPTH 8
174#endif
175
176
177#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH))
178#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
179
180
181
182
183
184#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
185
186#ifdef CONFIG_NO_HZ_COMMON
187# define NR_BASES 2
188# define BASE_STD 0
189# define BASE_DEF 1
190#else
191# define NR_BASES 1
192# define BASE_STD 0
193# define BASE_DEF 0
194#endif
195
196struct timer_base {
197 raw_spinlock_t lock;
198 struct timer_list *running_timer;
199#ifdef CONFIG_PREEMPT_RT
200 spinlock_t expiry_lock;
201 atomic_t timer_waiters;
202#endif
203 unsigned long clk;
204 unsigned long next_expiry;
205 unsigned int cpu;
206 bool is_idle;
207 bool must_forward_clk;
208 DECLARE_BITMAP(pending_map, WHEEL_SIZE);
209 struct hlist_head vectors[WHEEL_SIZE];
210} ____cacheline_aligned;
211
212static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
213
214#ifdef CONFIG_NO_HZ_COMMON
215
216static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
217static DEFINE_MUTEX(timer_keys_mutex);
218
219static void timer_update_keys(struct work_struct *work);
220static DECLARE_WORK(timer_update_work, timer_update_keys);
221
222#ifdef CONFIG_SMP
223unsigned int sysctl_timer_migration = 1;
224
225DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
226
227static void timers_update_migration(void)
228{
229 if (sysctl_timer_migration && tick_nohz_active)
230 static_branch_enable(&timers_migration_enabled);
231 else
232 static_branch_disable(&timers_migration_enabled);
233}
234#else
235static inline void timers_update_migration(void) { }
236#endif
237
238static void timer_update_keys(struct work_struct *work)
239{
240 mutex_lock(&timer_keys_mutex);
241 timers_update_migration();
242 static_branch_enable(&timers_nohz_active);
243 mutex_unlock(&timer_keys_mutex);
244}
245
246void timers_update_nohz(void)
247{
248 schedule_work(&timer_update_work);
249}
250
251int timer_migration_handler(struct ctl_table *table, int write,
252 void __user *buffer, size_t *lenp,
253 loff_t *ppos)
254{
255 int ret;
256
257 mutex_lock(&timer_keys_mutex);
258 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
259 if (!ret && write)
260 timers_update_migration();
261 mutex_unlock(&timer_keys_mutex);
262 return ret;
263}
264
265static inline bool is_timers_nohz_active(void)
266{
267 return static_branch_unlikely(&timers_nohz_active);
268}
269#else
270static inline bool is_timers_nohz_active(void) { return false; }
271#endif
272
273static unsigned long round_jiffies_common(unsigned long j, int cpu,
274 bool force_up)
275{
276 int rem;
277 unsigned long original = j;
278
279
280
281
282
283
284
285
286
287 j += cpu * 3;
288
289 rem = j % HZ;
290
291
292
293
294
295
296
297
298 if (rem < HZ/4 && !force_up)
299 j = j - rem;
300 else
301 j = j - rem + HZ;
302
303
304 j -= cpu * 3;
305
306
307
308
309
310 return time_is_after_jiffies(j) ? j : original;
311}
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333unsigned long __round_jiffies(unsigned long j, int cpu)
334{
335 return round_jiffies_common(j, cpu, false);
336}
337EXPORT_SYMBOL_GPL(__round_jiffies);
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359unsigned long __round_jiffies_relative(unsigned long j, int cpu)
360{
361 unsigned long j0 = jiffies;
362
363
364 return round_jiffies_common(j + j0, cpu, false) - j0;
365}
366EXPORT_SYMBOL_GPL(__round_jiffies_relative);
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383unsigned long round_jiffies(unsigned long j)
384{
385 return round_jiffies_common(j, raw_smp_processor_id(), false);
386}
387EXPORT_SYMBOL_GPL(round_jiffies);
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404unsigned long round_jiffies_relative(unsigned long j)
405{
406 return __round_jiffies_relative(j, raw_smp_processor_id());
407}
408EXPORT_SYMBOL_GPL(round_jiffies_relative);
409
410
411
412
413
414
415
416
417
418
419
420unsigned long __round_jiffies_up(unsigned long j, int cpu)
421{
422 return round_jiffies_common(j, cpu, true);
423}
424EXPORT_SYMBOL_GPL(__round_jiffies_up);
425
426
427
428
429
430
431
432
433
434
435
436unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
437{
438 unsigned long j0 = jiffies;
439
440
441 return round_jiffies_common(j + j0, cpu, true) - j0;
442}
443EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
444
445
446
447
448
449
450
451
452
453
454unsigned long round_jiffies_up(unsigned long j)
455{
456 return round_jiffies_common(j, raw_smp_processor_id(), true);
457}
458EXPORT_SYMBOL_GPL(round_jiffies_up);
459
460
461
462
463
464
465
466
467
468
469unsigned long round_jiffies_up_relative(unsigned long j)
470{
471 return __round_jiffies_up_relative(j, raw_smp_processor_id());
472}
473EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
474
475
476static inline unsigned int timer_get_idx(struct timer_list *timer)
477{
478 return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
479}
480
481static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
482{
483 timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
484 idx << TIMER_ARRAYSHIFT;
485}
486
487
488
489
490
491static inline unsigned calc_index(unsigned expires, unsigned lvl)
492{
493 expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
494 return LVL_OFFS(lvl) + (expires & LVL_MASK);
495}
496
497static int calc_wheel_index(unsigned long expires, unsigned long clk)
498{
499 unsigned long delta = expires - clk;
500 unsigned int idx;
501
502 if (delta < LVL_START(1)) {
503 idx = calc_index(expires, 0);
504 } else if (delta < LVL_START(2)) {
505 idx = calc_index(expires, 1);
506 } else if (delta < LVL_START(3)) {
507 idx = calc_index(expires, 2);
508 } else if (delta < LVL_START(4)) {
509 idx = calc_index(expires, 3);
510 } else if (delta < LVL_START(5)) {
511 idx = calc_index(expires, 4);
512 } else if (delta < LVL_START(6)) {
513 idx = calc_index(expires, 5);
514 } else if (delta < LVL_START(7)) {
515 idx = calc_index(expires, 6);
516 } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
517 idx = calc_index(expires, 7);
518 } else if ((long) delta < 0) {
519 idx = clk & LVL_MASK;
520 } else {
521
522
523
524
525 if (expires >= WHEEL_TIMEOUT_CUTOFF)
526 expires = WHEEL_TIMEOUT_MAX;
527
528 idx = calc_index(expires, LVL_DEPTH - 1);
529 }
530 return idx;
531}
532
533
534
535
536
537static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
538 unsigned int idx)
539{
540 hlist_add_head(&timer->entry, base->vectors + idx);
541 __set_bit(idx, base->pending_map);
542 timer_set_idx(timer, idx);
543
544 trace_timer_start(timer, timer->expires, timer->flags);
545}
546
547static void
548__internal_add_timer(struct timer_base *base, struct timer_list *timer)
549{
550 unsigned int idx;
551
552 idx = calc_wheel_index(timer->expires, base->clk);
553 enqueue_timer(base, timer, idx);
554}
555
556static void
557trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
558{
559 if (!is_timers_nohz_active())
560 return;
561
562
563
564
565
566 if (timer->flags & TIMER_DEFERRABLE) {
567 if (tick_nohz_full_cpu(base->cpu))
568 wake_up_nohz_cpu(base->cpu);
569 return;
570 }
571
572
573
574
575
576
577 if (!base->is_idle)
578 return;
579
580
581 if (time_after_eq(timer->expires, base->next_expiry))
582 return;
583
584
585
586
587
588 base->next_expiry = timer->expires;
589 wake_up_nohz_cpu(base->cpu);
590}
591
592static void
593internal_add_timer(struct timer_base *base, struct timer_list *timer)
594{
595 __internal_add_timer(base, timer);
596 trigger_dyntick_cpu(base, timer);
597}
598
599#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
600
601static struct debug_obj_descr timer_debug_descr;
602
603static void *timer_debug_hint(void *addr)
604{
605 return ((struct timer_list *) addr)->function;
606}
607
608static bool timer_is_static_object(void *addr)
609{
610 struct timer_list *timer = addr;
611
612 return (timer->entry.pprev == NULL &&
613 timer->entry.next == TIMER_ENTRY_STATIC);
614}
615
616
617
618
619
620static bool timer_fixup_init(void *addr, enum debug_obj_state state)
621{
622 struct timer_list *timer = addr;
623
624 switch (state) {
625 case ODEBUG_STATE_ACTIVE:
626 del_timer_sync(timer);
627 debug_object_init(timer, &timer_debug_descr);
628 return true;
629 default:
630 return false;
631 }
632}
633
634
635static void stub_timer(struct timer_list *unused)
636{
637 WARN_ON(1);
638}
639
640
641
642
643
644
645static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
646{
647 struct timer_list *timer = addr;
648
649 switch (state) {
650 case ODEBUG_STATE_NOTAVAILABLE:
651 timer_setup(timer, stub_timer, 0);
652 return true;
653
654 case ODEBUG_STATE_ACTIVE:
655 WARN_ON(1);
656
657 default:
658 return false;
659 }
660}
661
662
663
664
665
666static bool timer_fixup_free(void *addr, enum debug_obj_state state)
667{
668 struct timer_list *timer = addr;
669
670 switch (state) {
671 case ODEBUG_STATE_ACTIVE:
672 del_timer_sync(timer);
673 debug_object_free(timer, &timer_debug_descr);
674 return true;
675 default:
676 return false;
677 }
678}
679
680
681
682
683
684static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
685{
686 struct timer_list *timer = addr;
687
688 switch (state) {
689 case ODEBUG_STATE_NOTAVAILABLE:
690 timer_setup(timer, stub_timer, 0);
691 return true;
692 default:
693 return false;
694 }
695}
696
697static struct debug_obj_descr timer_debug_descr = {
698 .name = "timer_list",
699 .debug_hint = timer_debug_hint,
700 .is_static_object = timer_is_static_object,
701 .fixup_init = timer_fixup_init,
702 .fixup_activate = timer_fixup_activate,
703 .fixup_free = timer_fixup_free,
704 .fixup_assert_init = timer_fixup_assert_init,
705};
706
707static inline void debug_timer_init(struct timer_list *timer)
708{
709 debug_object_init(timer, &timer_debug_descr);
710}
711
712static inline void debug_timer_activate(struct timer_list *timer)
713{
714 debug_object_activate(timer, &timer_debug_descr);
715}
716
717static inline void debug_timer_deactivate(struct timer_list *timer)
718{
719 debug_object_deactivate(timer, &timer_debug_descr);
720}
721
722static inline void debug_timer_free(struct timer_list *timer)
723{
724 debug_object_free(timer, &timer_debug_descr);
725}
726
727static inline void debug_timer_assert_init(struct timer_list *timer)
728{
729 debug_object_assert_init(timer, &timer_debug_descr);
730}
731
732static void do_init_timer(struct timer_list *timer,
733 void (*func)(struct timer_list *),
734 unsigned int flags,
735 const char *name, struct lock_class_key *key);
736
737void init_timer_on_stack_key(struct timer_list *timer,
738 void (*func)(struct timer_list *),
739 unsigned int flags,
740 const char *name, struct lock_class_key *key)
741{
742 debug_object_init_on_stack(timer, &timer_debug_descr);
743 do_init_timer(timer, func, flags, name, key);
744}
745EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
746
747void destroy_timer_on_stack(struct timer_list *timer)
748{
749 debug_object_free(timer, &timer_debug_descr);
750}
751EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
752
753#else
754static inline void debug_timer_init(struct timer_list *timer) { }
755static inline void debug_timer_activate(struct timer_list *timer) { }
756static inline void debug_timer_deactivate(struct timer_list *timer) { }
757static inline void debug_timer_assert_init(struct timer_list *timer) { }
758#endif
759
760static inline void debug_init(struct timer_list *timer)
761{
762 debug_timer_init(timer);
763 trace_timer_init(timer);
764}
765
766static inline void debug_deactivate(struct timer_list *timer)
767{
768 debug_timer_deactivate(timer);
769 trace_timer_cancel(timer);
770}
771
772static inline void debug_assert_init(struct timer_list *timer)
773{
774 debug_timer_assert_init(timer);
775}
776
777static void do_init_timer(struct timer_list *timer,
778 void (*func)(struct timer_list *),
779 unsigned int flags,
780 const char *name, struct lock_class_key *key)
781{
782 timer->entry.pprev = NULL;
783 timer->function = func;
784 timer->flags = flags | raw_smp_processor_id();
785 lockdep_init_map(&timer->lockdep_map, name, key, 0);
786}
787
788
789
790
791
792
793
794
795
796
797
798
799
800void init_timer_key(struct timer_list *timer,
801 void (*func)(struct timer_list *), unsigned int flags,
802 const char *name, struct lock_class_key *key)
803{
804 debug_init(timer);
805 do_init_timer(timer, func, flags, name, key);
806}
807EXPORT_SYMBOL(init_timer_key);
808
809static inline void detach_timer(struct timer_list *timer, bool clear_pending)
810{
811 struct hlist_node *entry = &timer->entry;
812
813 debug_deactivate(timer);
814
815 __hlist_del(entry);
816 if (clear_pending)
817 entry->pprev = NULL;
818 entry->next = LIST_POISON2;
819}
820
821static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
822 bool clear_pending)
823{
824 unsigned idx = timer_get_idx(timer);
825
826 if (!timer_pending(timer))
827 return 0;
828
829 if (hlist_is_singular_node(&timer->entry, base->vectors + idx))
830 __clear_bit(idx, base->pending_map);
831
832 detach_timer(timer, clear_pending);
833 return 1;
834}
835
836static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
837{
838 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
839
840
841
842
843
844 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
845 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
846 return base;
847}
848
849static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
850{
851 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
852
853
854
855
856
857 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
858 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
859 return base;
860}
861
862static inline struct timer_base *get_timer_base(u32 tflags)
863{
864 return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
865}
866
867static inline struct timer_base *
868get_target_base(struct timer_base *base, unsigned tflags)
869{
870#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
871 if (static_branch_likely(&timers_migration_enabled) &&
872 !(tflags & TIMER_PINNED))
873 return get_timer_cpu_base(tflags, get_nohz_timer_target());
874#endif
875 return get_timer_this_cpu_base(tflags);
876}
877
878static inline void forward_timer_base(struct timer_base *base)
879{
880#ifdef CONFIG_NO_HZ_COMMON
881 unsigned long jnow;
882
883
884
885
886
887
888 if (likely(!base->must_forward_clk))
889 return;
890
891 jnow = READ_ONCE(jiffies);
892 base->must_forward_clk = base->is_idle;
893 if ((long)(jnow - base->clk) < 2)
894 return;
895
896
897
898
899
900 if (time_after(base->next_expiry, jnow))
901 base->clk = jnow;
902 else
903 base->clk = base->next_expiry;
904#endif
905}
906
907
908
909
910
911
912
913
914
915
916
917
918
919static struct timer_base *lock_timer_base(struct timer_list *timer,
920 unsigned long *flags)
921 __acquires(timer->base->lock)
922{
923 for (;;) {
924 struct timer_base *base;
925 u32 tf;
926
927
928
929
930
931
932 tf = READ_ONCE(timer->flags);
933
934 if (!(tf & TIMER_MIGRATING)) {
935 base = get_timer_base(tf);
936 raw_spin_lock_irqsave(&base->lock, *flags);
937 if (timer->flags == tf)
938 return base;
939 raw_spin_unlock_irqrestore(&base->lock, *flags);
940 }
941 cpu_relax();
942 }
943}
944
945#define MOD_TIMER_PENDING_ONLY 0x01
946#define MOD_TIMER_REDUCE 0x02
947#define MOD_TIMER_NOTPENDING 0x04
948
949static inline int
950__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
951{
952 struct timer_base *base, *new_base;
953 unsigned int idx = UINT_MAX;
954 unsigned long clk = 0, flags;
955 int ret = 0;
956
957 BUG_ON(!timer->function);
958
959
960
961
962
963
964 if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
965
966
967
968
969
970 long diff = timer->expires - expires;
971
972 if (!diff)
973 return 1;
974 if (options & MOD_TIMER_REDUCE && diff <= 0)
975 return 1;
976
977
978
979
980
981
982
983 base = lock_timer_base(timer, &flags);
984 forward_timer_base(base);
985
986 if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
987 time_before_eq(timer->expires, expires)) {
988 ret = 1;
989 goto out_unlock;
990 }
991
992 clk = base->clk;
993 idx = calc_wheel_index(expires, clk);
994
995
996
997
998
999
1000 if (idx == timer_get_idx(timer)) {
1001 if (!(options & MOD_TIMER_REDUCE))
1002 timer->expires = expires;
1003 else if (time_after(timer->expires, expires))
1004 timer->expires = expires;
1005 ret = 1;
1006 goto out_unlock;
1007 }
1008 } else {
1009 base = lock_timer_base(timer, &flags);
1010 forward_timer_base(base);
1011 }
1012
1013 ret = detach_if_pending(timer, base, false);
1014 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1015 goto out_unlock;
1016
1017 new_base = get_target_base(base, timer->flags);
1018
1019 if (base != new_base) {
1020
1021
1022
1023
1024
1025
1026
1027 if (likely(base->running_timer != timer)) {
1028
1029 timer->flags |= TIMER_MIGRATING;
1030
1031 raw_spin_unlock(&base->lock);
1032 base = new_base;
1033 raw_spin_lock(&base->lock);
1034 WRITE_ONCE(timer->flags,
1035 (timer->flags & ~TIMER_BASEMASK) | base->cpu);
1036 forward_timer_base(base);
1037 }
1038 }
1039
1040 debug_timer_activate(timer);
1041
1042 timer->expires = expires;
1043
1044
1045
1046
1047
1048
1049
1050 if (idx != UINT_MAX && clk == base->clk) {
1051 enqueue_timer(base, timer, idx);
1052 trigger_dyntick_cpu(base, timer);
1053 } else {
1054 internal_add_timer(base, timer);
1055 }
1056
1057out_unlock:
1058 raw_spin_unlock_irqrestore(&base->lock, flags);
1059
1060 return ret;
1061}
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073int mod_timer_pending(struct timer_list *timer, unsigned long expires)
1074{
1075 return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
1076}
1077EXPORT_SYMBOL(mod_timer_pending);
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099int mod_timer(struct timer_list *timer, unsigned long expires)
1100{
1101 return __mod_timer(timer, expires, 0);
1102}
1103EXPORT_SYMBOL(mod_timer);
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114int timer_reduce(struct timer_list *timer, unsigned long expires)
1115{
1116 return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
1117}
1118EXPORT_SYMBOL(timer_reduce);
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134void add_timer(struct timer_list *timer)
1135{
1136 BUG_ON(timer_pending(timer));
1137 __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
1138}
1139EXPORT_SYMBOL(add_timer);
1140
1141
1142
1143
1144
1145
1146
1147
1148void add_timer_on(struct timer_list *timer, int cpu)
1149{
1150 struct timer_base *new_base, *base;
1151 unsigned long flags;
1152
1153 BUG_ON(timer_pending(timer) || !timer->function);
1154
1155 new_base = get_timer_cpu_base(timer->flags, cpu);
1156
1157
1158
1159
1160
1161
1162 base = lock_timer_base(timer, &flags);
1163 if (base != new_base) {
1164 timer->flags |= TIMER_MIGRATING;
1165
1166 raw_spin_unlock(&base->lock);
1167 base = new_base;
1168 raw_spin_lock(&base->lock);
1169 WRITE_ONCE(timer->flags,
1170 (timer->flags & ~TIMER_BASEMASK) | cpu);
1171 }
1172 forward_timer_base(base);
1173
1174 debug_timer_activate(timer);
1175 internal_add_timer(base, timer);
1176 raw_spin_unlock_irqrestore(&base->lock, flags);
1177}
1178EXPORT_SYMBOL_GPL(add_timer_on);
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191int del_timer(struct timer_list *timer)
1192{
1193 struct timer_base *base;
1194 unsigned long flags;
1195 int ret = 0;
1196
1197 debug_assert_init(timer);
1198
1199 if (timer_pending(timer)) {
1200 base = lock_timer_base(timer, &flags);
1201 ret = detach_if_pending(timer, base, true);
1202 raw_spin_unlock_irqrestore(&base->lock, flags);
1203 }
1204
1205 return ret;
1206}
1207EXPORT_SYMBOL(del_timer);
1208
1209
1210
1211
1212
1213
1214
1215
1216int try_to_del_timer_sync(struct timer_list *timer)
1217{
1218 struct timer_base *base;
1219 unsigned long flags;
1220 int ret = -1;
1221
1222 debug_assert_init(timer);
1223
1224 base = lock_timer_base(timer, &flags);
1225
1226 if (base->running_timer != timer)
1227 ret = detach_if_pending(timer, base, true);
1228
1229 raw_spin_unlock_irqrestore(&base->lock, flags);
1230
1231 return ret;
1232}
1233EXPORT_SYMBOL(try_to_del_timer_sync);
1234
1235#ifdef CONFIG_PREEMPT_RT
1236static __init void timer_base_init_expiry_lock(struct timer_base *base)
1237{
1238 spin_lock_init(&base->expiry_lock);
1239}
1240
1241static inline void timer_base_lock_expiry(struct timer_base *base)
1242{
1243 spin_lock(&base->expiry_lock);
1244}
1245
1246static inline void timer_base_unlock_expiry(struct timer_base *base)
1247{
1248 spin_unlock(&base->expiry_lock);
1249}
1250
1251
1252
1253
1254
1255
1256
1257
1258static void timer_sync_wait_running(struct timer_base *base)
1259{
1260 if (atomic_read(&base->timer_waiters)) {
1261 spin_unlock(&base->expiry_lock);
1262 spin_lock(&base->expiry_lock);
1263 }
1264}
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276static void del_timer_wait_running(struct timer_list *timer)
1277{
1278 u32 tf;
1279
1280 tf = READ_ONCE(timer->flags);
1281 if (!(tf & TIMER_MIGRATING)) {
1282 struct timer_base *base = get_timer_base(tf);
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292 atomic_inc(&base->timer_waiters);
1293 spin_lock_bh(&base->expiry_lock);
1294 atomic_dec(&base->timer_waiters);
1295 spin_unlock_bh(&base->expiry_lock);
1296 }
1297}
1298#else
1299static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
1300static inline void timer_base_lock_expiry(struct timer_base *base) { }
1301static inline void timer_base_unlock_expiry(struct timer_base *base) { }
1302static inline void timer_sync_wait_running(struct timer_base *base) { }
1303static inline void del_timer_wait_running(struct timer_list *timer) { }
1304#endif
1305
1306#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343int del_timer_sync(struct timer_list *timer)
1344{
1345 int ret;
1346
1347#ifdef CONFIG_LOCKDEP
1348 unsigned long flags;
1349
1350
1351
1352
1353
1354 local_irq_save(flags);
1355 lock_map_acquire(&timer->lockdep_map);
1356 lock_map_release(&timer->lockdep_map);
1357 local_irq_restore(flags);
1358#endif
1359
1360
1361
1362
1363 WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1364
1365 do {
1366 ret = try_to_del_timer_sync(timer);
1367
1368 if (unlikely(ret < 0)) {
1369 del_timer_wait_running(timer);
1370 cpu_relax();
1371 }
1372 } while (ret < 0);
1373
1374 return ret;
1375}
1376EXPORT_SYMBOL(del_timer_sync);
1377#endif
1378
1379static void call_timer_fn(struct timer_list *timer,
1380 void (*fn)(struct timer_list *),
1381 unsigned long baseclk)
1382{
1383 int count = preempt_count();
1384
1385#ifdef CONFIG_LOCKDEP
1386
1387
1388
1389
1390
1391
1392
1393 struct lockdep_map lockdep_map;
1394
1395 lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1396#endif
1397
1398
1399
1400
1401
1402 lock_map_acquire(&lockdep_map);
1403
1404 trace_timer_expire_entry(timer, baseclk);
1405 fn(timer);
1406 trace_timer_expire_exit(timer);
1407
1408 lock_map_release(&lockdep_map);
1409
1410 if (count != preempt_count()) {
1411 WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
1412 fn, count, preempt_count());
1413
1414
1415
1416
1417
1418
1419 preempt_count_set(count);
1420 }
1421}
1422
1423static void expire_timers(struct timer_base *base, struct hlist_head *head)
1424{
1425
1426
1427
1428
1429
1430 unsigned long baseclk = base->clk - 1;
1431
1432 while (!hlist_empty(head)) {
1433 struct timer_list *timer;
1434 void (*fn)(struct timer_list *);
1435
1436 timer = hlist_entry(head->first, struct timer_list, entry);
1437
1438 base->running_timer = timer;
1439 detach_timer(timer, true);
1440
1441 fn = timer->function;
1442
1443 if (timer->flags & TIMER_IRQSAFE) {
1444 raw_spin_unlock(&base->lock);
1445 call_timer_fn(timer, fn, baseclk);
1446 base->running_timer = NULL;
1447 raw_spin_lock(&base->lock);
1448 } else {
1449 raw_spin_unlock_irq(&base->lock);
1450 call_timer_fn(timer, fn, baseclk);
1451 base->running_timer = NULL;
1452 timer_sync_wait_running(base);
1453 raw_spin_lock_irq(&base->lock);
1454 }
1455 }
1456}
1457
1458static int __collect_expired_timers(struct timer_base *base,
1459 struct hlist_head *heads)
1460{
1461 unsigned long clk = base->clk;
1462 struct hlist_head *vec;
1463 int i, levels = 0;
1464 unsigned int idx;
1465
1466 for (i = 0; i < LVL_DEPTH; i++) {
1467 idx = (clk & LVL_MASK) + i * LVL_SIZE;
1468
1469 if (__test_and_clear_bit(idx, base->pending_map)) {
1470 vec = base->vectors + idx;
1471 hlist_move_list(vec, heads++);
1472 levels++;
1473 }
1474
1475 if (clk & LVL_CLK_MASK)
1476 break;
1477
1478 clk >>= LVL_CLK_SHIFT;
1479 }
1480 return levels;
1481}
1482
1483#ifdef CONFIG_NO_HZ_COMMON
1484
1485
1486
1487
1488
1489static int next_pending_bucket(struct timer_base *base, unsigned offset,
1490 unsigned clk)
1491{
1492 unsigned pos, start = offset + clk;
1493 unsigned end = offset + LVL_SIZE;
1494
1495 pos = find_next_bit(base->pending_map, end, start);
1496 if (pos < end)
1497 return pos - start;
1498
1499 pos = find_next_bit(base->pending_map, start, offset);
1500 return pos < start ? pos + LVL_SIZE - start : -1;
1501}
1502
1503
1504
1505
1506
1507static unsigned long __next_timer_interrupt(struct timer_base *base)
1508{
1509 unsigned long clk, next, adj;
1510 unsigned lvl, offset = 0;
1511
1512 next = base->clk + NEXT_TIMER_MAX_DELTA;
1513 clk = base->clk;
1514 for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
1515 int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
1516
1517 if (pos >= 0) {
1518 unsigned long tmp = clk + (unsigned long) pos;
1519
1520 tmp <<= LVL_SHIFT(lvl);
1521 if (time_before(tmp, next))
1522 next = tmp;
1523 }
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560 adj = clk & LVL_CLK_MASK ? 1 : 0;
1561 clk >>= LVL_CLK_SHIFT;
1562 clk += adj;
1563 }
1564 return next;
1565}
1566
1567
1568
1569
1570
1571static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1572{
1573 u64 nextevt = hrtimer_get_next_event();
1574
1575
1576
1577
1578
1579 if (expires <= nextevt)
1580 return expires;
1581
1582
1583
1584
1585
1586 if (nextevt <= basem)
1587 return basem;
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597 return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1598}
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1609{
1610 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1611 u64 expires = KTIME_MAX;
1612 unsigned long nextevt;
1613 bool is_max_delta;
1614
1615
1616
1617
1618
1619 if (cpu_is_offline(smp_processor_id()))
1620 return expires;
1621
1622 raw_spin_lock(&base->lock);
1623 nextevt = __next_timer_interrupt(base);
1624 is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
1625 base->next_expiry = nextevt;
1626
1627
1628
1629
1630
1631 if (time_after(basej, base->clk)) {
1632 if (time_after(nextevt, basej))
1633 base->clk = basej;
1634 else if (time_after(nextevt, base->clk))
1635 base->clk = nextevt;
1636 }
1637
1638 if (time_before_eq(nextevt, basej)) {
1639 expires = basem;
1640 base->is_idle = false;
1641 } else {
1642 if (!is_max_delta)
1643 expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
1644
1645
1646
1647
1648
1649
1650
1651 if ((expires - basem) > TICK_NSEC) {
1652 base->must_forward_clk = true;
1653 base->is_idle = true;
1654 }
1655 }
1656 raw_spin_unlock(&base->lock);
1657
1658 return cmp_next_hrtimer_event(basem, expires);
1659}
1660
1661
1662
1663
1664
1665
1666void timer_clear_idle(void)
1667{
1668 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1669
1670
1671
1672
1673
1674
1675
1676 base->is_idle = false;
1677}
1678
1679static int collect_expired_timers(struct timer_base *base,
1680 struct hlist_head *heads)
1681{
1682 unsigned long now = READ_ONCE(jiffies);
1683
1684
1685
1686
1687
1688
1689 if ((long)(now - base->clk) > 2) {
1690 unsigned long next = __next_timer_interrupt(base);
1691
1692
1693
1694
1695
1696 if (time_after(next, now)) {
1697
1698
1699
1700
1701 base->clk = now;
1702 return 0;
1703 }
1704 base->clk = next;
1705 }
1706 return __collect_expired_timers(base, heads);
1707}
1708#else
1709static inline int collect_expired_timers(struct timer_base *base,
1710 struct hlist_head *heads)
1711{
1712 return __collect_expired_timers(base, heads);
1713}
1714#endif
1715
1716
1717
1718
1719
1720void update_process_times(int user_tick)
1721{
1722 struct task_struct *p = current;
1723
1724
1725 account_process_tick(p, user_tick);
1726 run_local_timers();
1727 rcu_sched_clock_irq(user_tick);
1728#ifdef CONFIG_IRQ_WORK
1729 if (in_irq())
1730 irq_work_tick();
1731#endif
1732 scheduler_tick();
1733 if (IS_ENABLED(CONFIG_POSIX_TIMERS))
1734 run_posix_cpu_timers();
1735}
1736
1737
1738
1739
1740
1741static inline void __run_timers(struct timer_base *base)
1742{
1743 struct hlist_head heads[LVL_DEPTH];
1744 int levels;
1745
1746 if (!time_after_eq(jiffies, base->clk))
1747 return;
1748
1749 timer_base_lock_expiry(base);
1750 raw_spin_lock_irq(&base->lock);
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766 base->must_forward_clk = false;
1767
1768 while (time_after_eq(jiffies, base->clk)) {
1769
1770 levels = collect_expired_timers(base, heads);
1771 base->clk++;
1772
1773 while (levels--)
1774 expire_timers(base, heads + levels);
1775 }
1776 raw_spin_unlock_irq(&base->lock);
1777 timer_base_unlock_expiry(base);
1778}
1779
1780
1781
1782
1783static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1784{
1785 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1786
1787 __run_timers(base);
1788 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1789 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1790}
1791
1792
1793
1794
1795void run_local_timers(void)
1796{
1797 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1798
1799 hrtimer_run_queues();
1800
1801 if (time_before(jiffies, base->clk)) {
1802 if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
1803 return;
1804
1805 base++;
1806 if (time_before(jiffies, base->clk))
1807 return;
1808 }
1809 raise_softirq(TIMER_SOFTIRQ);
1810}
1811
1812
1813
1814
1815
1816struct process_timer {
1817 struct timer_list timer;
1818 struct task_struct *task;
1819};
1820
1821static void process_timeout(struct timer_list *t)
1822{
1823 struct process_timer *timeout = from_timer(timeout, t, timer);
1824
1825 wake_up_process(timeout->task);
1826}
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859signed long __sched schedule_timeout(signed long timeout)
1860{
1861 struct process_timer timer;
1862 unsigned long expire;
1863
1864 switch (timeout)
1865 {
1866 case MAX_SCHEDULE_TIMEOUT:
1867
1868
1869
1870
1871
1872
1873
1874 schedule();
1875 goto out;
1876 default:
1877
1878
1879
1880
1881
1882
1883
1884 if (timeout < 0) {
1885 printk(KERN_ERR "schedule_timeout: wrong timeout "
1886 "value %lx\n", timeout);
1887 dump_stack();
1888 current->state = TASK_RUNNING;
1889 goto out;
1890 }
1891 }
1892
1893 expire = timeout + jiffies;
1894
1895 timer.task = current;
1896 timer_setup_on_stack(&timer.timer, process_timeout, 0);
1897 __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
1898 schedule();
1899 del_singleshot_timer_sync(&timer.timer);
1900
1901
1902 destroy_timer_on_stack(&timer.timer);
1903
1904 timeout = expire - jiffies;
1905
1906 out:
1907 return timeout < 0 ? 0 : timeout;
1908}
1909EXPORT_SYMBOL(schedule_timeout);
1910
1911
1912
1913
1914
1915signed long __sched schedule_timeout_interruptible(signed long timeout)
1916{
1917 __set_current_state(TASK_INTERRUPTIBLE);
1918 return schedule_timeout(timeout);
1919}
1920EXPORT_SYMBOL(schedule_timeout_interruptible);
1921
1922signed long __sched schedule_timeout_killable(signed long timeout)
1923{
1924 __set_current_state(TASK_KILLABLE);
1925 return schedule_timeout(timeout);
1926}
1927EXPORT_SYMBOL(schedule_timeout_killable);
1928
1929signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1930{
1931 __set_current_state(TASK_UNINTERRUPTIBLE);
1932 return schedule_timeout(timeout);
1933}
1934EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1935
1936
1937
1938
1939
1940signed long __sched schedule_timeout_idle(signed long timeout)
1941{
1942 __set_current_state(TASK_IDLE);
1943 return schedule_timeout(timeout);
1944}
1945EXPORT_SYMBOL(schedule_timeout_idle);
1946
1947#ifdef CONFIG_HOTPLUG_CPU
1948static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
1949{
1950 struct timer_list *timer;
1951 int cpu = new_base->cpu;
1952
1953 while (!hlist_empty(head)) {
1954 timer = hlist_entry(head->first, struct timer_list, entry);
1955 detach_timer(timer, false);
1956 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1957 internal_add_timer(new_base, timer);
1958 }
1959}
1960
1961int timers_prepare_cpu(unsigned int cpu)
1962{
1963 struct timer_base *base;
1964 int b;
1965
1966 for (b = 0; b < NR_BASES; b++) {
1967 base = per_cpu_ptr(&timer_bases[b], cpu);
1968 base->clk = jiffies;
1969 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1970 base->is_idle = false;
1971 base->must_forward_clk = true;
1972 }
1973 return 0;
1974}
1975
1976int timers_dead_cpu(unsigned int cpu)
1977{
1978 struct timer_base *old_base;
1979 struct timer_base *new_base;
1980 int b, i;
1981
1982 BUG_ON(cpu_online(cpu));
1983
1984 for (b = 0; b < NR_BASES; b++) {
1985 old_base = per_cpu_ptr(&timer_bases[b], cpu);
1986 new_base = get_cpu_ptr(&timer_bases[b]);
1987
1988
1989
1990
1991 raw_spin_lock_irq(&new_base->lock);
1992 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1993
1994
1995
1996
1997
1998 forward_timer_base(new_base);
1999
2000 BUG_ON(old_base->running_timer);
2001
2002 for (i = 0; i < WHEEL_SIZE; i++)
2003 migrate_timer_list(new_base, old_base->vectors + i);
2004
2005 raw_spin_unlock(&old_base->lock);
2006 raw_spin_unlock_irq(&new_base->lock);
2007 put_cpu_ptr(&timer_bases);
2008 }
2009 return 0;
2010}
2011
2012#endif
2013
2014static void __init init_timer_cpu(int cpu)
2015{
2016 struct timer_base *base;
2017 int i;
2018
2019 for (i = 0; i < NR_BASES; i++) {
2020 base = per_cpu_ptr(&timer_bases[i], cpu);
2021 base->cpu = cpu;
2022 raw_spin_lock_init(&base->lock);
2023 base->clk = jiffies;
2024 timer_base_init_expiry_lock(base);
2025 }
2026}
2027
2028static void __init init_timer_cpus(void)
2029{
2030 int cpu;
2031
2032 for_each_possible_cpu(cpu)
2033 init_timer_cpu(cpu);
2034}
2035
2036void __init init_timers(void)
2037{
2038 init_timer_cpus();
2039 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
2040}
2041
2042
2043
2044
2045
2046void msleep(unsigned int msecs)
2047{
2048 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2049
2050 while (timeout)
2051 timeout = schedule_timeout_uninterruptible(timeout);
2052}
2053
2054EXPORT_SYMBOL(msleep);
2055
2056
2057
2058
2059
2060unsigned long msleep_interruptible(unsigned int msecs)
2061{
2062 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2063
2064 while (timeout && !signal_pending(current))
2065 timeout = schedule_timeout_interruptible(timeout);
2066 return jiffies_to_msecs(timeout);
2067}
2068
2069EXPORT_SYMBOL(msleep_interruptible);
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082void __sched usleep_range(unsigned long min, unsigned long max)
2083{
2084 ktime_t exp = ktime_add_us(ktime_get(), min);
2085 u64 delta = (u64)(max - min) * NSEC_PER_USEC;
2086
2087 for (;;) {
2088 __set_current_state(TASK_UNINTERRUPTIBLE);
2089
2090 if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
2091 break;
2092 }
2093}
2094EXPORT_SYMBOL(usleep_range);
2095