1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel_stat.h>
22#include <linux/export.h>
23#include <linux/interrupt.h>
24#include <linux/percpu.h>
25#include <linux/init.h>
26#include <linux/mm.h>
27#include <linux/swap.h>
28#include <linux/pid_namespace.h>
29#include <linux/notifier.h>
30#include <linux/thread_info.h>
31#include <linux/time.h>
32#include <linux/jiffies.h>
33#include <linux/posix-timers.h>
34#include <linux/cpu.h>
35#include <linux/syscalls.h>
36#include <linux/delay.h>
37#include <linux/tick.h>
38#include <linux/kallsyms.h>
39#include <linux/irq_work.h>
40#include <linux/sched/signal.h>
41#include <linux/sched/sysctl.h>
42#include <linux/sched/nohz.h>
43#include <linux/sched/debug.h>
44#include <linux/slab.h>
45#include <linux/compat.h>
46#include <linux/random.h>
47
48#include <linux/uaccess.h>
49#include <asm/unistd.h>
50#include <asm/div64.h>
51#include <asm/timex.h>
52#include <asm/io.h>
53
54#include "tick-internal.h"
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/timer.h>
58
59__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
60
61EXPORT_SYMBOL(jiffies_64);
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152#define LVL_CLK_SHIFT 3
153#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT)
154#define LVL_CLK_MASK (LVL_CLK_DIV - 1)
155#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT)
156#define LVL_GRAN(n) (1UL << LVL_SHIFT(n))
157
158
159
160
161
162
163#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
164
165
166#define LVL_BITS 6
167#define LVL_SIZE (1UL << LVL_BITS)
168#define LVL_MASK (LVL_SIZE - 1)
169#define LVL_OFFS(n) ((n) * LVL_SIZE)
170
171
172#if HZ > 100
173# define LVL_DEPTH 9
174# else
175# define LVL_DEPTH 8
176#endif
177
178
179#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH))
180#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
181
182
183
184
185
186#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
187
188#ifdef CONFIG_NO_HZ_COMMON
189# define NR_BASES 2
190# define BASE_STD 0
191# define BASE_DEF 1
192#else
193# define NR_BASES 1
194# define BASE_STD 0
195# define BASE_DEF 0
196#endif
197
198struct timer_base {
199 raw_spinlock_t lock;
200 struct timer_list *running_timer;
201#ifdef CONFIG_PREEMPT_RT
202 spinlock_t expiry_lock;
203 atomic_t timer_waiters;
204#endif
205 unsigned long clk;
206 unsigned long next_expiry;
207 unsigned int cpu;
208 bool next_expiry_recalc;
209 bool is_idle;
210 bool timers_pending;
211 DECLARE_BITMAP(pending_map, WHEEL_SIZE);
212 struct hlist_head vectors[WHEEL_SIZE];
213} ____cacheline_aligned;
214
215static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
216
217#ifdef CONFIG_NO_HZ_COMMON
218
219static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
220static DEFINE_MUTEX(timer_keys_mutex);
221
222static void timer_update_keys(struct work_struct *work);
223static DECLARE_WORK(timer_update_work, timer_update_keys);
224
225#ifdef CONFIG_SMP
226unsigned int sysctl_timer_migration = 1;
227
228DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
229
230static void timers_update_migration(void)
231{
232 if (sysctl_timer_migration && tick_nohz_active)
233 static_branch_enable(&timers_migration_enabled);
234 else
235 static_branch_disable(&timers_migration_enabled);
236}
237#else
238static inline void timers_update_migration(void) { }
239#endif
240
241static void timer_update_keys(struct work_struct *work)
242{
243 mutex_lock(&timer_keys_mutex);
244 timers_update_migration();
245 static_branch_enable(&timers_nohz_active);
246 mutex_unlock(&timer_keys_mutex);
247}
248
249void timers_update_nohz(void)
250{
251 schedule_work(&timer_update_work);
252}
253
254int timer_migration_handler(struct ctl_table *table, int write,
255 void *buffer, size_t *lenp, loff_t *ppos)
256{
257 int ret;
258
259 mutex_lock(&timer_keys_mutex);
260 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
261 if (!ret && write)
262 timers_update_migration();
263 mutex_unlock(&timer_keys_mutex);
264 return ret;
265}
266
267static inline bool is_timers_nohz_active(void)
268{
269 return static_branch_unlikely(&timers_nohz_active);
270}
271#else
272static inline bool is_timers_nohz_active(void) { return false; }
273#endif
274
275static unsigned long round_jiffies_common(unsigned long j, int cpu,
276 bool force_up)
277{
278 int rem;
279 unsigned long original = j;
280
281
282
283
284
285
286
287
288
289 j += cpu * 3;
290
291 rem = j % HZ;
292
293
294
295
296
297
298
299
300 if (rem < HZ/4 && !force_up)
301 j = j - rem;
302 else
303 j = j - rem + HZ;
304
305
306 j -= cpu * 3;
307
308
309
310
311
312 return time_is_after_jiffies(j) ? j : original;
313}
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335unsigned long __round_jiffies(unsigned long j, int cpu)
336{
337 return round_jiffies_common(j, cpu, false);
338}
339EXPORT_SYMBOL_GPL(__round_jiffies);
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361unsigned long __round_jiffies_relative(unsigned long j, int cpu)
362{
363 unsigned long j0 = jiffies;
364
365
366 return round_jiffies_common(j + j0, cpu, false) - j0;
367}
368EXPORT_SYMBOL_GPL(__round_jiffies_relative);
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385unsigned long round_jiffies(unsigned long j)
386{
387 return round_jiffies_common(j, raw_smp_processor_id(), false);
388}
389EXPORT_SYMBOL_GPL(round_jiffies);
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406unsigned long round_jiffies_relative(unsigned long j)
407{
408 return __round_jiffies_relative(j, raw_smp_processor_id());
409}
410EXPORT_SYMBOL_GPL(round_jiffies_relative);
411
412
413
414
415
416
417
418
419
420
421
422unsigned long __round_jiffies_up(unsigned long j, int cpu)
423{
424 return round_jiffies_common(j, cpu, true);
425}
426EXPORT_SYMBOL_GPL(__round_jiffies_up);
427
428
429
430
431
432
433
434
435
436
437
438unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
439{
440 unsigned long j0 = jiffies;
441
442
443 return round_jiffies_common(j + j0, cpu, true) - j0;
444}
445EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
446
447
448
449
450
451
452
453
454
455
456unsigned long round_jiffies_up(unsigned long j)
457{
458 return round_jiffies_common(j, raw_smp_processor_id(), true);
459}
460EXPORT_SYMBOL_GPL(round_jiffies_up);
461
462
463
464
465
466
467
468
469
470
471unsigned long round_jiffies_up_relative(unsigned long j)
472{
473 return __round_jiffies_up_relative(j, raw_smp_processor_id());
474}
475EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
476
477
478static inline unsigned int timer_get_idx(struct timer_list *timer)
479{
480 return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
481}
482
483static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
484{
485 timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
486 idx << TIMER_ARRAYSHIFT;
487}
488
489
490
491
492
493static inline unsigned calc_index(unsigned long expires, unsigned lvl,
494 unsigned long *bucket_expiry)
495{
496
497
498
499
500
501
502
503
504
505 expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
506 *bucket_expiry = expires << LVL_SHIFT(lvl);
507 return LVL_OFFS(lvl) + (expires & LVL_MASK);
508}
509
510static int calc_wheel_index(unsigned long expires, unsigned long clk,
511 unsigned long *bucket_expiry)
512{
513 unsigned long delta = expires - clk;
514 unsigned int idx;
515
516 if (delta < LVL_START(1)) {
517 idx = calc_index(expires, 0, bucket_expiry);
518 } else if (delta < LVL_START(2)) {
519 idx = calc_index(expires, 1, bucket_expiry);
520 } else if (delta < LVL_START(3)) {
521 idx = calc_index(expires, 2, bucket_expiry);
522 } else if (delta < LVL_START(4)) {
523 idx = calc_index(expires, 3, bucket_expiry);
524 } else if (delta < LVL_START(5)) {
525 idx = calc_index(expires, 4, bucket_expiry);
526 } else if (delta < LVL_START(6)) {
527 idx = calc_index(expires, 5, bucket_expiry);
528 } else if (delta < LVL_START(7)) {
529 idx = calc_index(expires, 6, bucket_expiry);
530 } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
531 idx = calc_index(expires, 7, bucket_expiry);
532 } else if ((long) delta < 0) {
533 idx = clk & LVL_MASK;
534 *bucket_expiry = clk;
535 } else {
536
537
538
539
540 if (delta >= WHEEL_TIMEOUT_CUTOFF)
541 expires = clk + WHEEL_TIMEOUT_MAX;
542
543 idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
544 }
545 return idx;
546}
547
548static void
549trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
550{
551 if (!is_timers_nohz_active())
552 return;
553
554
555
556
557
558 if (timer->flags & TIMER_DEFERRABLE) {
559 if (tick_nohz_full_cpu(base->cpu))
560 wake_up_nohz_cpu(base->cpu);
561 return;
562 }
563
564
565
566
567
568
569 if (base->is_idle)
570 wake_up_nohz_cpu(base->cpu);
571}
572
573
574
575
576
577
578static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
579 unsigned int idx, unsigned long bucket_expiry)
580{
581
582 hlist_add_head(&timer->entry, base->vectors + idx);
583 __set_bit(idx, base->pending_map);
584 timer_set_idx(timer, idx);
585
586 trace_timer_start(timer, timer->expires, timer->flags);
587
588
589
590
591
592
593 if (time_before(bucket_expiry, base->next_expiry)) {
594
595
596
597
598 base->next_expiry = bucket_expiry;
599 base->timers_pending = true;
600 base->next_expiry_recalc = false;
601 trigger_dyntick_cpu(base, timer);
602 }
603}
604
605static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
606{
607 unsigned long bucket_expiry;
608 unsigned int idx;
609
610 idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
611 enqueue_timer(base, timer, idx, bucket_expiry);
612}
613
614#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
615
616static const struct debug_obj_descr timer_debug_descr;
617
618static void *timer_debug_hint(void *addr)
619{
620 return ((struct timer_list *) addr)->function;
621}
622
623static bool timer_is_static_object(void *addr)
624{
625 struct timer_list *timer = addr;
626
627 return (timer->entry.pprev == NULL &&
628 timer->entry.next == TIMER_ENTRY_STATIC);
629}
630
631
632
633
634
635static bool timer_fixup_init(void *addr, enum debug_obj_state state)
636{
637 struct timer_list *timer = addr;
638
639 switch (state) {
640 case ODEBUG_STATE_ACTIVE:
641 del_timer_sync(timer);
642 debug_object_init(timer, &timer_debug_descr);
643 return true;
644 default:
645 return false;
646 }
647}
648
649
650static void stub_timer(struct timer_list *unused)
651{
652 WARN_ON(1);
653}
654
655
656
657
658
659
660static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
661{
662 struct timer_list *timer = addr;
663
664 switch (state) {
665 case ODEBUG_STATE_NOTAVAILABLE:
666 timer_setup(timer, stub_timer, 0);
667 return true;
668
669 case ODEBUG_STATE_ACTIVE:
670 WARN_ON(1);
671 fallthrough;
672 default:
673 return false;
674 }
675}
676
677
678
679
680
681static bool timer_fixup_free(void *addr, enum debug_obj_state state)
682{
683 struct timer_list *timer = addr;
684
685 switch (state) {
686 case ODEBUG_STATE_ACTIVE:
687 del_timer_sync(timer);
688 debug_object_free(timer, &timer_debug_descr);
689 return true;
690 default:
691 return false;
692 }
693}
694
695
696
697
698
699static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
700{
701 struct timer_list *timer = addr;
702
703 switch (state) {
704 case ODEBUG_STATE_NOTAVAILABLE:
705 timer_setup(timer, stub_timer, 0);
706 return true;
707 default:
708 return false;
709 }
710}
711
712static const struct debug_obj_descr timer_debug_descr = {
713 .name = "timer_list",
714 .debug_hint = timer_debug_hint,
715 .is_static_object = timer_is_static_object,
716 .fixup_init = timer_fixup_init,
717 .fixup_activate = timer_fixup_activate,
718 .fixup_free = timer_fixup_free,
719 .fixup_assert_init = timer_fixup_assert_init,
720};
721
722static inline void debug_timer_init(struct timer_list *timer)
723{
724 debug_object_init(timer, &timer_debug_descr);
725}
726
727static inline void debug_timer_activate(struct timer_list *timer)
728{
729 debug_object_activate(timer, &timer_debug_descr);
730}
731
732static inline void debug_timer_deactivate(struct timer_list *timer)
733{
734 debug_object_deactivate(timer, &timer_debug_descr);
735}
736
737static inline void debug_timer_assert_init(struct timer_list *timer)
738{
739 debug_object_assert_init(timer, &timer_debug_descr);
740}
741
742static void do_init_timer(struct timer_list *timer,
743 void (*func)(struct timer_list *),
744 unsigned int flags,
745 const char *name, struct lock_class_key *key);
746
747void init_timer_on_stack_key(struct timer_list *timer,
748 void (*func)(struct timer_list *),
749 unsigned int flags,
750 const char *name, struct lock_class_key *key)
751{
752 debug_object_init_on_stack(timer, &timer_debug_descr);
753 do_init_timer(timer, func, flags, name, key);
754}
755EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
756
757void destroy_timer_on_stack(struct timer_list *timer)
758{
759 debug_object_free(timer, &timer_debug_descr);
760}
761EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
762
763#else
764static inline void debug_timer_init(struct timer_list *timer) { }
765static inline void debug_timer_activate(struct timer_list *timer) { }
766static inline void debug_timer_deactivate(struct timer_list *timer) { }
767static inline void debug_timer_assert_init(struct timer_list *timer) { }
768#endif
769
770static inline void debug_init(struct timer_list *timer)
771{
772 debug_timer_init(timer);
773 trace_timer_init(timer);
774}
775
776static inline void debug_deactivate(struct timer_list *timer)
777{
778 debug_timer_deactivate(timer);
779 trace_timer_cancel(timer);
780}
781
782static inline void debug_assert_init(struct timer_list *timer)
783{
784 debug_timer_assert_init(timer);
785}
786
787static void do_init_timer(struct timer_list *timer,
788 void (*func)(struct timer_list *),
789 unsigned int flags,
790 const char *name, struct lock_class_key *key)
791{
792 timer->entry.pprev = NULL;
793 timer->function = func;
794 if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))
795 flags &= TIMER_INIT_FLAGS;
796 timer->flags = flags | raw_smp_processor_id();
797 lockdep_init_map(&timer->lockdep_map, name, key, 0);
798}
799
800
801
802
803
804
805
806
807
808
809
810
811
812void init_timer_key(struct timer_list *timer,
813 void (*func)(struct timer_list *), unsigned int flags,
814 const char *name, struct lock_class_key *key)
815{
816 debug_init(timer);
817 do_init_timer(timer, func, flags, name, key);
818}
819EXPORT_SYMBOL(init_timer_key);
820
821static inline void detach_timer(struct timer_list *timer, bool clear_pending)
822{
823 struct hlist_node *entry = &timer->entry;
824
825 debug_deactivate(timer);
826
827 __hlist_del(entry);
828 if (clear_pending)
829 entry->pprev = NULL;
830 entry->next = LIST_POISON2;
831}
832
833static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
834 bool clear_pending)
835{
836 unsigned idx = timer_get_idx(timer);
837
838 if (!timer_pending(timer))
839 return 0;
840
841 if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
842 __clear_bit(idx, base->pending_map);
843 base->next_expiry_recalc = true;
844 }
845
846 detach_timer(timer, clear_pending);
847 return 1;
848}
849
850static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
851{
852 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
853
854
855
856
857
858 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
859 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
860 return base;
861}
862
863static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
864{
865 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
866
867
868
869
870
871 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
872 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
873 return base;
874}
875
876static inline struct timer_base *get_timer_base(u32 tflags)
877{
878 return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
879}
880
881static inline struct timer_base *
882get_target_base(struct timer_base *base, unsigned tflags)
883{
884#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
885 if (static_branch_likely(&timers_migration_enabled) &&
886 !(tflags & TIMER_PINNED))
887 return get_timer_cpu_base(tflags, get_nohz_timer_target());
888#endif
889 return get_timer_this_cpu_base(tflags);
890}
891
892static inline void forward_timer_base(struct timer_base *base)
893{
894 unsigned long jnow = READ_ONCE(jiffies);
895
896
897
898
899
900
901 if ((long)(jnow - base->clk) < 1)
902 return;
903
904
905
906
907
908 if (time_after(base->next_expiry, jnow)) {
909 base->clk = jnow;
910 } else {
911 if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
912 return;
913 base->clk = base->next_expiry;
914 }
915}
916
917
918
919
920
921
922
923
924
925
926
927
928
929static struct timer_base *lock_timer_base(struct timer_list *timer,
930 unsigned long *flags)
931 __acquires(timer->base->lock)
932{
933 for (;;) {
934 struct timer_base *base;
935 u32 tf;
936
937
938
939
940
941
942 tf = READ_ONCE(timer->flags);
943
944 if (!(tf & TIMER_MIGRATING)) {
945 base = get_timer_base(tf);
946 raw_spin_lock_irqsave(&base->lock, *flags);
947 if (timer->flags == tf)
948 return base;
949 raw_spin_unlock_irqrestore(&base->lock, *flags);
950 }
951 cpu_relax();
952 }
953}
954
955#define MOD_TIMER_PENDING_ONLY 0x01
956#define MOD_TIMER_REDUCE 0x02
957#define MOD_TIMER_NOTPENDING 0x04
958
959static inline int
960__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
961{
962 unsigned long clk = 0, flags, bucket_expiry;
963 struct timer_base *base, *new_base;
964 unsigned int idx = UINT_MAX;
965 int ret = 0;
966
967 BUG_ON(!timer->function);
968
969
970
971
972
973
974 if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
975
976
977
978
979
980 long diff = timer->expires - expires;
981
982 if (!diff)
983 return 1;
984 if (options & MOD_TIMER_REDUCE && diff <= 0)
985 return 1;
986
987
988
989
990
991
992
993 base = lock_timer_base(timer, &flags);
994 forward_timer_base(base);
995
996 if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
997 time_before_eq(timer->expires, expires)) {
998 ret = 1;
999 goto out_unlock;
1000 }
1001
1002 clk = base->clk;
1003 idx = calc_wheel_index(expires, clk, &bucket_expiry);
1004
1005
1006
1007
1008
1009
1010 if (idx == timer_get_idx(timer)) {
1011 if (!(options & MOD_TIMER_REDUCE))
1012 timer->expires = expires;
1013 else if (time_after(timer->expires, expires))
1014 timer->expires = expires;
1015 ret = 1;
1016 goto out_unlock;
1017 }
1018 } else {
1019 base = lock_timer_base(timer, &flags);
1020 forward_timer_base(base);
1021 }
1022
1023 ret = detach_if_pending(timer, base, false);
1024 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1025 goto out_unlock;
1026
1027 new_base = get_target_base(base, timer->flags);
1028
1029 if (base != new_base) {
1030
1031
1032
1033
1034
1035
1036
1037 if (likely(base->running_timer != timer)) {
1038
1039 timer->flags |= TIMER_MIGRATING;
1040
1041 raw_spin_unlock(&base->lock);
1042 base = new_base;
1043 raw_spin_lock(&base->lock);
1044 WRITE_ONCE(timer->flags,
1045 (timer->flags & ~TIMER_BASEMASK) | base->cpu);
1046 forward_timer_base(base);
1047 }
1048 }
1049
1050 debug_timer_activate(timer);
1051
1052 timer->expires = expires;
1053
1054
1055
1056
1057
1058
1059 if (idx != UINT_MAX && clk == base->clk)
1060 enqueue_timer(base, timer, idx, bucket_expiry);
1061 else
1062 internal_add_timer(base, timer);
1063
1064out_unlock:
1065 raw_spin_unlock_irqrestore(&base->lock, flags);
1066
1067 return ret;
1068}
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080int mod_timer_pending(struct timer_list *timer, unsigned long expires)
1081{
1082 return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
1083}
1084EXPORT_SYMBOL(mod_timer_pending);
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106int mod_timer(struct timer_list *timer, unsigned long expires)
1107{
1108 return __mod_timer(timer, expires, 0);
1109}
1110EXPORT_SYMBOL(mod_timer);
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121int timer_reduce(struct timer_list *timer, unsigned long expires)
1122{
1123 return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
1124}
1125EXPORT_SYMBOL(timer_reduce);
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141void add_timer(struct timer_list *timer)
1142{
1143 BUG_ON(timer_pending(timer));
1144 __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
1145}
1146EXPORT_SYMBOL(add_timer);
1147
1148
1149
1150
1151
1152
1153
1154
1155void add_timer_on(struct timer_list *timer, int cpu)
1156{
1157 struct timer_base *new_base, *base;
1158 unsigned long flags;
1159
1160 BUG_ON(timer_pending(timer) || !timer->function);
1161
1162 new_base = get_timer_cpu_base(timer->flags, cpu);
1163
1164
1165
1166
1167
1168
1169 base = lock_timer_base(timer, &flags);
1170 if (base != new_base) {
1171 timer->flags |= TIMER_MIGRATING;
1172
1173 raw_spin_unlock(&base->lock);
1174 base = new_base;
1175 raw_spin_lock(&base->lock);
1176 WRITE_ONCE(timer->flags,
1177 (timer->flags & ~TIMER_BASEMASK) | cpu);
1178 }
1179 forward_timer_base(base);
1180
1181 debug_timer_activate(timer);
1182 internal_add_timer(base, timer);
1183 raw_spin_unlock_irqrestore(&base->lock, flags);
1184}
1185EXPORT_SYMBOL_GPL(add_timer_on);
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198int del_timer(struct timer_list *timer)
1199{
1200 struct timer_base *base;
1201 unsigned long flags;
1202 int ret = 0;
1203
1204 debug_assert_init(timer);
1205
1206 if (timer_pending(timer)) {
1207 base = lock_timer_base(timer, &flags);
1208 ret = detach_if_pending(timer, base, true);
1209 raw_spin_unlock_irqrestore(&base->lock, flags);
1210 }
1211
1212 return ret;
1213}
1214EXPORT_SYMBOL(del_timer);
1215
1216
1217
1218
1219
1220
1221
1222
1223int try_to_del_timer_sync(struct timer_list *timer)
1224{
1225 struct timer_base *base;
1226 unsigned long flags;
1227 int ret = -1;
1228
1229 debug_assert_init(timer);
1230
1231 base = lock_timer_base(timer, &flags);
1232
1233 if (base->running_timer != timer)
1234 ret = detach_if_pending(timer, base, true);
1235
1236 raw_spin_unlock_irqrestore(&base->lock, flags);
1237
1238 return ret;
1239}
1240EXPORT_SYMBOL(try_to_del_timer_sync);
1241
1242#ifdef CONFIG_PREEMPT_RT
1243static __init void timer_base_init_expiry_lock(struct timer_base *base)
1244{
1245 spin_lock_init(&base->expiry_lock);
1246}
1247
1248static inline void timer_base_lock_expiry(struct timer_base *base)
1249{
1250 spin_lock(&base->expiry_lock);
1251}
1252
1253static inline void timer_base_unlock_expiry(struct timer_base *base)
1254{
1255 spin_unlock(&base->expiry_lock);
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265static void timer_sync_wait_running(struct timer_base *base)
1266{
1267 if (atomic_read(&base->timer_waiters)) {
1268 raw_spin_unlock_irq(&base->lock);
1269 spin_unlock(&base->expiry_lock);
1270 spin_lock(&base->expiry_lock);
1271 raw_spin_lock_irq(&base->lock);
1272 }
1273}
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285static void del_timer_wait_running(struct timer_list *timer)
1286{
1287 u32 tf;
1288
1289 tf = READ_ONCE(timer->flags);
1290 if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) {
1291 struct timer_base *base = get_timer_base(tf);
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301 atomic_inc(&base->timer_waiters);
1302 spin_lock_bh(&base->expiry_lock);
1303 atomic_dec(&base->timer_waiters);
1304 spin_unlock_bh(&base->expiry_lock);
1305 }
1306}
1307#else
1308static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
1309static inline void timer_base_lock_expiry(struct timer_base *base) { }
1310static inline void timer_base_unlock_expiry(struct timer_base *base) { }
1311static inline void timer_sync_wait_running(struct timer_base *base) { }
1312static inline void del_timer_wait_running(struct timer_list *timer) { }
1313#endif
1314
1315#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352int del_timer_sync(struct timer_list *timer)
1353{
1354 int ret;
1355
1356#ifdef CONFIG_LOCKDEP
1357 unsigned long flags;
1358
1359
1360
1361
1362
1363 local_irq_save(flags);
1364 lock_map_acquire(&timer->lockdep_map);
1365 lock_map_release(&timer->lockdep_map);
1366 local_irq_restore(flags);
1367#endif
1368
1369
1370
1371
1372 WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1373
1374
1375
1376
1377
1378 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE))
1379 lockdep_assert_preemption_enabled();
1380
1381 do {
1382 ret = try_to_del_timer_sync(timer);
1383
1384 if (unlikely(ret < 0)) {
1385 del_timer_wait_running(timer);
1386 cpu_relax();
1387 }
1388 } while (ret < 0);
1389
1390 return ret;
1391}
1392EXPORT_SYMBOL(del_timer_sync);
1393#endif
1394
1395static void call_timer_fn(struct timer_list *timer,
1396 void (*fn)(struct timer_list *),
1397 unsigned long baseclk)
1398{
1399 int count = preempt_count();
1400
1401#ifdef CONFIG_LOCKDEP
1402
1403
1404
1405
1406
1407
1408
1409 struct lockdep_map lockdep_map;
1410
1411 lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1412#endif
1413
1414
1415
1416
1417
1418 lock_map_acquire(&lockdep_map);
1419
1420 trace_timer_expire_entry(timer, baseclk);
1421 fn(timer);
1422 trace_timer_expire_exit(timer);
1423
1424 lock_map_release(&lockdep_map);
1425
1426 if (count != preempt_count()) {
1427 WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
1428 fn, count, preempt_count());
1429
1430
1431
1432
1433
1434
1435 preempt_count_set(count);
1436 }
1437}
1438
1439static void expire_timers(struct timer_base *base, struct hlist_head *head)
1440{
1441
1442
1443
1444
1445
1446 unsigned long baseclk = base->clk - 1;
1447
1448 while (!hlist_empty(head)) {
1449 struct timer_list *timer;
1450 void (*fn)(struct timer_list *);
1451
1452 timer = hlist_entry(head->first, struct timer_list, entry);
1453
1454 base->running_timer = timer;
1455 detach_timer(timer, true);
1456
1457 fn = timer->function;
1458
1459 if (timer->flags & TIMER_IRQSAFE) {
1460 raw_spin_unlock(&base->lock);
1461 call_timer_fn(timer, fn, baseclk);
1462 raw_spin_lock(&base->lock);
1463 base->running_timer = NULL;
1464 } else {
1465 raw_spin_unlock_irq(&base->lock);
1466 call_timer_fn(timer, fn, baseclk);
1467 raw_spin_lock_irq(&base->lock);
1468 base->running_timer = NULL;
1469 timer_sync_wait_running(base);
1470 }
1471 }
1472}
1473
1474static int collect_expired_timers(struct timer_base *base,
1475 struct hlist_head *heads)
1476{
1477 unsigned long clk = base->clk = base->next_expiry;
1478 struct hlist_head *vec;
1479 int i, levels = 0;
1480 unsigned int idx;
1481
1482 for (i = 0; i < LVL_DEPTH; i++) {
1483 idx = (clk & LVL_MASK) + i * LVL_SIZE;
1484
1485 if (__test_and_clear_bit(idx, base->pending_map)) {
1486 vec = base->vectors + idx;
1487 hlist_move_list(vec, heads++);
1488 levels++;
1489 }
1490
1491 if (clk & LVL_CLK_MASK)
1492 break;
1493
1494 clk >>= LVL_CLK_SHIFT;
1495 }
1496 return levels;
1497}
1498
1499
1500
1501
1502
1503
1504static int next_pending_bucket(struct timer_base *base, unsigned offset,
1505 unsigned clk)
1506{
1507 unsigned pos, start = offset + clk;
1508 unsigned end = offset + LVL_SIZE;
1509
1510 pos = find_next_bit(base->pending_map, end, start);
1511 if (pos < end)
1512 return pos - start;
1513
1514 pos = find_next_bit(base->pending_map, start, offset);
1515 return pos < start ? pos + LVL_SIZE - start : -1;
1516}
1517
1518
1519
1520
1521
1522static unsigned long __next_timer_interrupt(struct timer_base *base)
1523{
1524 unsigned long clk, next, adj;
1525 unsigned lvl, offset = 0;
1526
1527 next = base->clk + NEXT_TIMER_MAX_DELTA;
1528 clk = base->clk;
1529 for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
1530 int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
1531 unsigned long lvl_clk = clk & LVL_CLK_MASK;
1532
1533 if (pos >= 0) {
1534 unsigned long tmp = clk + (unsigned long) pos;
1535
1536 tmp <<= LVL_SHIFT(lvl);
1537 if (time_before(tmp, next))
1538 next = tmp;
1539
1540
1541
1542
1543
1544 if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
1545 break;
1546 }
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583 adj = lvl_clk ? 1 : 0;
1584 clk >>= LVL_CLK_SHIFT;
1585 clk += adj;
1586 }
1587
1588 base->next_expiry_recalc = false;
1589 base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
1590
1591 return next;
1592}
1593
1594#ifdef CONFIG_NO_HZ_COMMON
1595
1596
1597
1598
1599static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1600{
1601 u64 nextevt = hrtimer_get_next_event();
1602
1603
1604
1605
1606
1607 if (expires <= nextevt)
1608 return expires;
1609
1610
1611
1612
1613
1614 if (nextevt <= basem)
1615 return basem;
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625 return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1626}
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1637{
1638 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1639 u64 expires = KTIME_MAX;
1640 unsigned long nextevt;
1641
1642
1643
1644
1645
1646 if (cpu_is_offline(smp_processor_id()))
1647 return expires;
1648
1649 raw_spin_lock(&base->lock);
1650 if (base->next_expiry_recalc)
1651 base->next_expiry = __next_timer_interrupt(base);
1652 nextevt = base->next_expiry;
1653
1654
1655
1656
1657
1658
1659 if (time_after(basej, base->clk)) {
1660 if (time_after(nextevt, basej))
1661 base->clk = basej;
1662 else if (time_after(nextevt, base->clk))
1663 base->clk = nextevt;
1664 }
1665
1666 if (time_before_eq(nextevt, basej)) {
1667 expires = basem;
1668 base->is_idle = false;
1669 } else {
1670 if (base->timers_pending)
1671 expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
1672
1673
1674
1675
1676
1677
1678
1679 if ((expires - basem) > TICK_NSEC)
1680 base->is_idle = true;
1681 }
1682 raw_spin_unlock(&base->lock);
1683
1684 return cmp_next_hrtimer_event(basem, expires);
1685}
1686
1687
1688
1689
1690
1691
1692void timer_clear_idle(void)
1693{
1694 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1695
1696
1697
1698
1699
1700
1701
1702 base->is_idle = false;
1703}
1704#endif
1705
1706
1707
1708
1709
1710static inline void __run_timers(struct timer_base *base)
1711{
1712 struct hlist_head heads[LVL_DEPTH];
1713 int levels;
1714
1715 if (time_before(jiffies, base->next_expiry))
1716 return;
1717
1718 timer_base_lock_expiry(base);
1719 raw_spin_lock_irq(&base->lock);
1720
1721 while (time_after_eq(jiffies, base->clk) &&
1722 time_after_eq(jiffies, base->next_expiry)) {
1723 levels = collect_expired_timers(base, heads);
1724
1725
1726
1727
1728
1729 WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
1730 base->clk++;
1731 base->next_expiry = __next_timer_interrupt(base);
1732
1733 while (levels--)
1734 expire_timers(base, heads + levels);
1735 }
1736 raw_spin_unlock_irq(&base->lock);
1737 timer_base_unlock_expiry(base);
1738}
1739
1740
1741
1742
1743static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1744{
1745 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1746
1747 __run_timers(base);
1748 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1749 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1750}
1751
1752
1753
1754
1755static void run_local_timers(void)
1756{
1757 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1758
1759 hrtimer_run_queues();
1760
1761 if (time_before(jiffies, base->next_expiry)) {
1762 if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
1763 return;
1764
1765 base++;
1766 if (time_before(jiffies, base->next_expiry))
1767 return;
1768 }
1769 raise_softirq(TIMER_SOFTIRQ);
1770}
1771
1772
1773
1774
1775
1776void update_process_times(int user_tick)
1777{
1778 struct task_struct *p = current;
1779
1780 PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
1781
1782
1783 account_process_tick(p, user_tick);
1784 run_local_timers();
1785 rcu_sched_clock_irq(user_tick);
1786#ifdef CONFIG_IRQ_WORK
1787 if (in_irq())
1788 irq_work_tick();
1789#endif
1790 scheduler_tick();
1791 if (IS_ENABLED(CONFIG_POSIX_TIMERS))
1792 run_posix_cpu_timers();
1793}
1794
1795
1796
1797
1798
1799struct process_timer {
1800 struct timer_list timer;
1801 struct task_struct *task;
1802};
1803
1804static void process_timeout(struct timer_list *t)
1805{
1806 struct process_timer *timeout = from_timer(timeout, t, timer);
1807
1808 wake_up_process(timeout->task);
1809}
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842signed long __sched schedule_timeout(signed long timeout)
1843{
1844 struct process_timer timer;
1845 unsigned long expire;
1846
1847 switch (timeout)
1848 {
1849 case MAX_SCHEDULE_TIMEOUT:
1850
1851
1852
1853
1854
1855
1856
1857 schedule();
1858 goto out;
1859 default:
1860
1861
1862
1863
1864
1865
1866
1867 if (timeout < 0) {
1868 printk(KERN_ERR "schedule_timeout: wrong timeout "
1869 "value %lx\n", timeout);
1870 dump_stack();
1871 __set_current_state(TASK_RUNNING);
1872 goto out;
1873 }
1874 }
1875
1876 expire = timeout + jiffies;
1877
1878 timer.task = current;
1879 timer_setup_on_stack(&timer.timer, process_timeout, 0);
1880 __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
1881 schedule();
1882 del_singleshot_timer_sync(&timer.timer);
1883
1884
1885 destroy_timer_on_stack(&timer.timer);
1886
1887 timeout = expire - jiffies;
1888
1889 out:
1890 return timeout < 0 ? 0 : timeout;
1891}
1892EXPORT_SYMBOL(schedule_timeout);
1893
1894
1895
1896
1897
1898signed long __sched schedule_timeout_interruptible(signed long timeout)
1899{
1900 __set_current_state(TASK_INTERRUPTIBLE);
1901 return schedule_timeout(timeout);
1902}
1903EXPORT_SYMBOL(schedule_timeout_interruptible);
1904
1905signed long __sched schedule_timeout_killable(signed long timeout)
1906{
1907 __set_current_state(TASK_KILLABLE);
1908 return schedule_timeout(timeout);
1909}
1910EXPORT_SYMBOL(schedule_timeout_killable);
1911
1912signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1913{
1914 __set_current_state(TASK_UNINTERRUPTIBLE);
1915 return schedule_timeout(timeout);
1916}
1917EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1918
1919
1920
1921
1922
1923signed long __sched schedule_timeout_idle(signed long timeout)
1924{
1925 __set_current_state(TASK_IDLE);
1926 return schedule_timeout(timeout);
1927}
1928EXPORT_SYMBOL(schedule_timeout_idle);
1929
1930#ifdef CONFIG_HOTPLUG_CPU
1931static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
1932{
1933 struct timer_list *timer;
1934 int cpu = new_base->cpu;
1935
1936 while (!hlist_empty(head)) {
1937 timer = hlist_entry(head->first, struct timer_list, entry);
1938 detach_timer(timer, false);
1939 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1940 internal_add_timer(new_base, timer);
1941 }
1942}
1943
1944int timers_prepare_cpu(unsigned int cpu)
1945{
1946 struct timer_base *base;
1947 int b;
1948
1949 for (b = 0; b < NR_BASES; b++) {
1950 base = per_cpu_ptr(&timer_bases[b], cpu);
1951 base->clk = jiffies;
1952 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1953 base->timers_pending = false;
1954 base->is_idle = false;
1955 }
1956 return 0;
1957}
1958
1959int timers_dead_cpu(unsigned int cpu)
1960{
1961 struct timer_base *old_base;
1962 struct timer_base *new_base;
1963 int b, i;
1964
1965 BUG_ON(cpu_online(cpu));
1966
1967 for (b = 0; b < NR_BASES; b++) {
1968 old_base = per_cpu_ptr(&timer_bases[b], cpu);
1969 new_base = get_cpu_ptr(&timer_bases[b]);
1970
1971
1972
1973
1974 raw_spin_lock_irq(&new_base->lock);
1975 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1976
1977
1978
1979
1980
1981 forward_timer_base(new_base);
1982
1983 BUG_ON(old_base->running_timer);
1984
1985 for (i = 0; i < WHEEL_SIZE; i++)
1986 migrate_timer_list(new_base, old_base->vectors + i);
1987
1988 raw_spin_unlock(&old_base->lock);
1989 raw_spin_unlock_irq(&new_base->lock);
1990 put_cpu_ptr(&timer_bases);
1991 }
1992 return 0;
1993}
1994
1995#endif
1996
1997static void __init init_timer_cpu(int cpu)
1998{
1999 struct timer_base *base;
2000 int i;
2001
2002 for (i = 0; i < NR_BASES; i++) {
2003 base = per_cpu_ptr(&timer_bases[i], cpu);
2004 base->cpu = cpu;
2005 raw_spin_lock_init(&base->lock);
2006 base->clk = jiffies;
2007 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
2008 timer_base_init_expiry_lock(base);
2009 }
2010}
2011
2012static void __init init_timer_cpus(void)
2013{
2014 int cpu;
2015
2016 for_each_possible_cpu(cpu)
2017 init_timer_cpu(cpu);
2018}
2019
2020void __init init_timers(void)
2021{
2022 init_timer_cpus();
2023 posix_cputimers_init_work();
2024 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
2025}
2026
2027
2028
2029
2030
2031void msleep(unsigned int msecs)
2032{
2033 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2034
2035 while (timeout)
2036 timeout = schedule_timeout_uninterruptible(timeout);
2037}
2038
2039EXPORT_SYMBOL(msleep);
2040
2041
2042
2043
2044
2045unsigned long msleep_interruptible(unsigned int msecs)
2046{
2047 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2048
2049 while (timeout && !signal_pending(current))
2050 timeout = schedule_timeout_interruptible(timeout);
2051 return jiffies_to_msecs(timeout);
2052}
2053
2054EXPORT_SYMBOL(msleep_interruptible);
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067void __sched usleep_range(unsigned long min, unsigned long max)
2068{
2069 ktime_t exp = ktime_add_us(ktime_get(), min);
2070 u64 delta = (u64)(max - min) * NSEC_PER_USEC;
2071
2072 for (;;) {
2073 __set_current_state(TASK_UNINTERRUPTIBLE);
2074
2075 if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
2076 break;
2077 }
2078}
2079EXPORT_SYMBOL(usleep_range);
2080