1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel_stat.h>
22#include <linux/export.h>
23#include <linux/interrupt.h>
24#include <linux/percpu.h>
25#include <linux/init.h>
26#include <linux/mm.h>
27#include <linux/swap.h>
28#include <linux/pid_namespace.h>
29#include <linux/notifier.h>
30#include <linux/thread_info.h>
31#include <linux/time.h>
32#include <linux/jiffies.h>
33#include <linux/posix-timers.h>
34#include <linux/cpu.h>
35#include <linux/syscalls.h>
36#include <linux/delay.h>
37#include <linux/tick.h>
38#include <linux/kallsyms.h>
39#include <linux/irq_work.h>
40#include <linux/sched/signal.h>
41#include <linux/sched/sysctl.h>
42#include <linux/sched/nohz.h>
43#include <linux/sched/debug.h>
44#include <linux/slab.h>
45#include <linux/compat.h>
46#include <linux/random.h>
47
48#include <linux/uaccess.h>
49#include <asm/unistd.h>
50#include <asm/div64.h>
51#include <asm/timex.h>
52#include <asm/io.h>
53
54#include "tick-internal.h"
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/timer.h>
58
59__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
60
61EXPORT_SYMBOL(jiffies_64);
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152#define LVL_CLK_SHIFT 3
153#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT)
154#define LVL_CLK_MASK (LVL_CLK_DIV - 1)
155#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT)
156#define LVL_GRAN(n) (1UL << LVL_SHIFT(n))
157
158
159
160
161
162
163#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
164
165
166#define LVL_BITS 6
167#define LVL_SIZE (1UL << LVL_BITS)
168#define LVL_MASK (LVL_SIZE - 1)
169#define LVL_OFFS(n) ((n) * LVL_SIZE)
170
171
172#if HZ > 100
173# define LVL_DEPTH 9
174# else
175# define LVL_DEPTH 8
176#endif
177
178
179#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH))
180#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
181
182
183
184
185
186#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
187
188#ifdef CONFIG_NO_HZ_COMMON
189# define NR_BASES 2
190# define BASE_STD 0
191# define BASE_DEF 1
192#else
193# define NR_BASES 1
194# define BASE_STD 0
195# define BASE_DEF 0
196#endif
197
198struct timer_base {
199 raw_spinlock_t lock;
200 struct timer_list *running_timer;
201#ifdef CONFIG_PREEMPT_RT
202 spinlock_t expiry_lock;
203 atomic_t timer_waiters;
204#endif
205 unsigned long clk;
206 unsigned long next_expiry;
207 unsigned int cpu;
208 bool next_expiry_recalc;
209 bool is_idle;
210 DECLARE_BITMAP(pending_map, WHEEL_SIZE);
211 struct hlist_head vectors[WHEEL_SIZE];
212} ____cacheline_aligned;
213
214static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
215
216#ifdef CONFIG_NO_HZ_COMMON
217
218static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
219static DEFINE_MUTEX(timer_keys_mutex);
220
221static void timer_update_keys(struct work_struct *work);
222static DECLARE_WORK(timer_update_work, timer_update_keys);
223
224#ifdef CONFIG_SMP
225unsigned int sysctl_timer_migration = 1;
226
227DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
228
229static void timers_update_migration(void)
230{
231 if (sysctl_timer_migration && tick_nohz_active)
232 static_branch_enable(&timers_migration_enabled);
233 else
234 static_branch_disable(&timers_migration_enabled);
235}
236#else
237static inline void timers_update_migration(void) { }
238#endif
239
240static void timer_update_keys(struct work_struct *work)
241{
242 mutex_lock(&timer_keys_mutex);
243 timers_update_migration();
244 static_branch_enable(&timers_nohz_active);
245 mutex_unlock(&timer_keys_mutex);
246}
247
248void timers_update_nohz(void)
249{
250 schedule_work(&timer_update_work);
251}
252
253int timer_migration_handler(struct ctl_table *table, int write,
254 void *buffer, size_t *lenp, loff_t *ppos)
255{
256 int ret;
257
258 mutex_lock(&timer_keys_mutex);
259 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
260 if (!ret && write)
261 timers_update_migration();
262 mutex_unlock(&timer_keys_mutex);
263 return ret;
264}
265
266static inline bool is_timers_nohz_active(void)
267{
268 return static_branch_unlikely(&timers_nohz_active);
269}
270#else
271static inline bool is_timers_nohz_active(void) { return false; }
272#endif
273
274static unsigned long round_jiffies_common(unsigned long j, int cpu,
275 bool force_up)
276{
277 int rem;
278 unsigned long original = j;
279
280
281
282
283
284
285
286
287
288 j += cpu * 3;
289
290 rem = j % HZ;
291
292
293
294
295
296
297
298
299 if (rem < HZ/4 && !force_up)
300 j = j - rem;
301 else
302 j = j - rem + HZ;
303
304
305 j -= cpu * 3;
306
307
308
309
310
311 return time_is_after_jiffies(j) ? j : original;
312}
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334unsigned long __round_jiffies(unsigned long j, int cpu)
335{
336 return round_jiffies_common(j, cpu, false);
337}
338EXPORT_SYMBOL_GPL(__round_jiffies);
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360unsigned long __round_jiffies_relative(unsigned long j, int cpu)
361{
362 unsigned long j0 = jiffies;
363
364
365 return round_jiffies_common(j + j0, cpu, false) - j0;
366}
367EXPORT_SYMBOL_GPL(__round_jiffies_relative);
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384unsigned long round_jiffies(unsigned long j)
385{
386 return round_jiffies_common(j, raw_smp_processor_id(), false);
387}
388EXPORT_SYMBOL_GPL(round_jiffies);
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405unsigned long round_jiffies_relative(unsigned long j)
406{
407 return __round_jiffies_relative(j, raw_smp_processor_id());
408}
409EXPORT_SYMBOL_GPL(round_jiffies_relative);
410
411
412
413
414
415
416
417
418
419
420
421unsigned long __round_jiffies_up(unsigned long j, int cpu)
422{
423 return round_jiffies_common(j, cpu, true);
424}
425EXPORT_SYMBOL_GPL(__round_jiffies_up);
426
427
428
429
430
431
432
433
434
435
436
437unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
438{
439 unsigned long j0 = jiffies;
440
441
442 return round_jiffies_common(j + j0, cpu, true) - j0;
443}
444EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
445
446
447
448
449
450
451
452
453
454
455unsigned long round_jiffies_up(unsigned long j)
456{
457 return round_jiffies_common(j, raw_smp_processor_id(), true);
458}
459EXPORT_SYMBOL_GPL(round_jiffies_up);
460
461
462
463
464
465
466
467
468
469
470unsigned long round_jiffies_up_relative(unsigned long j)
471{
472 return __round_jiffies_up_relative(j, raw_smp_processor_id());
473}
474EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
475
476
477static inline unsigned int timer_get_idx(struct timer_list *timer)
478{
479 return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
480}
481
482static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
483{
484 timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
485 idx << TIMER_ARRAYSHIFT;
486}
487
488
489
490
491
492static inline unsigned calc_index(unsigned long expires, unsigned lvl,
493 unsigned long *bucket_expiry)
494{
495
496
497
498
499
500
501
502
503
504 expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
505 *bucket_expiry = expires << LVL_SHIFT(lvl);
506 return LVL_OFFS(lvl) + (expires & LVL_MASK);
507}
508
509static int calc_wheel_index(unsigned long expires, unsigned long clk,
510 unsigned long *bucket_expiry)
511{
512 unsigned long delta = expires - clk;
513 unsigned int idx;
514
515 if (delta < LVL_START(1)) {
516 idx = calc_index(expires, 0, bucket_expiry);
517 } else if (delta < LVL_START(2)) {
518 idx = calc_index(expires, 1, bucket_expiry);
519 } else if (delta < LVL_START(3)) {
520 idx = calc_index(expires, 2, bucket_expiry);
521 } else if (delta < LVL_START(4)) {
522 idx = calc_index(expires, 3, bucket_expiry);
523 } else if (delta < LVL_START(5)) {
524 idx = calc_index(expires, 4, bucket_expiry);
525 } else if (delta < LVL_START(6)) {
526 idx = calc_index(expires, 5, bucket_expiry);
527 } else if (delta < LVL_START(7)) {
528 idx = calc_index(expires, 6, bucket_expiry);
529 } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
530 idx = calc_index(expires, 7, bucket_expiry);
531 } else if ((long) delta < 0) {
532 idx = clk & LVL_MASK;
533 *bucket_expiry = clk;
534 } else {
535
536
537
538
539 if (delta >= WHEEL_TIMEOUT_CUTOFF)
540 expires = clk + WHEEL_TIMEOUT_MAX;
541
542 idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
543 }
544 return idx;
545}
546
547static void
548trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
549{
550 if (!is_timers_nohz_active())
551 return;
552
553
554
555
556
557 if (timer->flags & TIMER_DEFERRABLE) {
558 if (tick_nohz_full_cpu(base->cpu))
559 wake_up_nohz_cpu(base->cpu);
560 return;
561 }
562
563
564
565
566
567
568 if (base->is_idle)
569 wake_up_nohz_cpu(base->cpu);
570}
571
572
573
574
575
576
577static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
578 unsigned int idx, unsigned long bucket_expiry)
579{
580
581 hlist_add_head(&timer->entry, base->vectors + idx);
582 __set_bit(idx, base->pending_map);
583 timer_set_idx(timer, idx);
584
585 trace_timer_start(timer, timer->expires, timer->flags);
586
587
588
589
590
591
592 if (time_before(bucket_expiry, base->next_expiry)) {
593
594
595
596
597 base->next_expiry = bucket_expiry;
598 base->next_expiry_recalc = false;
599 trigger_dyntick_cpu(base, timer);
600 }
601}
602
603static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
604{
605 unsigned long bucket_expiry;
606 unsigned int idx;
607
608 idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
609 enqueue_timer(base, timer, idx, bucket_expiry);
610}
611
612#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
613
614static struct debug_obj_descr timer_debug_descr;
615
616static void *timer_debug_hint(void *addr)
617{
618 return ((struct timer_list *) addr)->function;
619}
620
621static bool timer_is_static_object(void *addr)
622{
623 struct timer_list *timer = addr;
624
625 return (timer->entry.pprev == NULL &&
626 timer->entry.next == TIMER_ENTRY_STATIC);
627}
628
629
630
631
632
633static bool timer_fixup_init(void *addr, enum debug_obj_state state)
634{
635 struct timer_list *timer = addr;
636
637 switch (state) {
638 case ODEBUG_STATE_ACTIVE:
639 del_timer_sync(timer);
640 debug_object_init(timer, &timer_debug_descr);
641 return true;
642 default:
643 return false;
644 }
645}
646
647
648static void stub_timer(struct timer_list *unused)
649{
650 WARN_ON(1);
651}
652
653
654
655
656
657
658static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
659{
660 struct timer_list *timer = addr;
661
662 switch (state) {
663 case ODEBUG_STATE_NOTAVAILABLE:
664 timer_setup(timer, stub_timer, 0);
665 return true;
666
667 case ODEBUG_STATE_ACTIVE:
668 WARN_ON(1);
669 fallthrough;
670 default:
671 return false;
672 }
673}
674
675
676
677
678
679static bool timer_fixup_free(void *addr, enum debug_obj_state state)
680{
681 struct timer_list *timer = addr;
682
683 switch (state) {
684 case ODEBUG_STATE_ACTIVE:
685 del_timer_sync(timer);
686 debug_object_free(timer, &timer_debug_descr);
687 return true;
688 default:
689 return false;
690 }
691}
692
693
694
695
696
697static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
698{
699 struct timer_list *timer = addr;
700
701 switch (state) {
702 case ODEBUG_STATE_NOTAVAILABLE:
703 timer_setup(timer, stub_timer, 0);
704 return true;
705 default:
706 return false;
707 }
708}
709
710static struct debug_obj_descr timer_debug_descr = {
711 .name = "timer_list",
712 .debug_hint = timer_debug_hint,
713 .is_static_object = timer_is_static_object,
714 .fixup_init = timer_fixup_init,
715 .fixup_activate = timer_fixup_activate,
716 .fixup_free = timer_fixup_free,
717 .fixup_assert_init = timer_fixup_assert_init,
718};
719
720static inline void debug_timer_init(struct timer_list *timer)
721{
722 debug_object_init(timer, &timer_debug_descr);
723}
724
725static inline void debug_timer_activate(struct timer_list *timer)
726{
727 debug_object_activate(timer, &timer_debug_descr);
728}
729
730static inline void debug_timer_deactivate(struct timer_list *timer)
731{
732 debug_object_deactivate(timer, &timer_debug_descr);
733}
734
735static inline void debug_timer_free(struct timer_list *timer)
736{
737 debug_object_free(timer, &timer_debug_descr);
738}
739
740static inline void debug_timer_assert_init(struct timer_list *timer)
741{
742 debug_object_assert_init(timer, &timer_debug_descr);
743}
744
745static void do_init_timer(struct timer_list *timer,
746 void (*func)(struct timer_list *),
747 unsigned int flags,
748 const char *name, struct lock_class_key *key);
749
750void init_timer_on_stack_key(struct timer_list *timer,
751 void (*func)(struct timer_list *),
752 unsigned int flags,
753 const char *name, struct lock_class_key *key)
754{
755 debug_object_init_on_stack(timer, &timer_debug_descr);
756 do_init_timer(timer, func, flags, name, key);
757}
758EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
759
760void destroy_timer_on_stack(struct timer_list *timer)
761{
762 debug_object_free(timer, &timer_debug_descr);
763}
764EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
765
766#else
767static inline void debug_timer_init(struct timer_list *timer) { }
768static inline void debug_timer_activate(struct timer_list *timer) { }
769static inline void debug_timer_deactivate(struct timer_list *timer) { }
770static inline void debug_timer_assert_init(struct timer_list *timer) { }
771#endif
772
773static inline void debug_init(struct timer_list *timer)
774{
775 debug_timer_init(timer);
776 trace_timer_init(timer);
777}
778
779static inline void debug_deactivate(struct timer_list *timer)
780{
781 debug_timer_deactivate(timer);
782 trace_timer_cancel(timer);
783}
784
785static inline void debug_assert_init(struct timer_list *timer)
786{
787 debug_timer_assert_init(timer);
788}
789
790static void do_init_timer(struct timer_list *timer,
791 void (*func)(struct timer_list *),
792 unsigned int flags,
793 const char *name, struct lock_class_key *key)
794{
795 timer->entry.pprev = NULL;
796 timer->function = func;
797 timer->flags = flags | raw_smp_processor_id();
798 lockdep_init_map(&timer->lockdep_map, name, key, 0);
799}
800
801
802
803
804
805
806
807
808
809
810
811
812
813void init_timer_key(struct timer_list *timer,
814 void (*func)(struct timer_list *), unsigned int flags,
815 const char *name, struct lock_class_key *key)
816{
817 debug_init(timer);
818 do_init_timer(timer, func, flags, name, key);
819}
820EXPORT_SYMBOL(init_timer_key);
821
822static inline void detach_timer(struct timer_list *timer, bool clear_pending)
823{
824 struct hlist_node *entry = &timer->entry;
825
826 debug_deactivate(timer);
827
828 __hlist_del(entry);
829 if (clear_pending)
830 entry->pprev = NULL;
831 entry->next = LIST_POISON2;
832}
833
834static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
835 bool clear_pending)
836{
837 unsigned idx = timer_get_idx(timer);
838
839 if (!timer_pending(timer))
840 return 0;
841
842 if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
843 __clear_bit(idx, base->pending_map);
844 base->next_expiry_recalc = true;
845 }
846
847 detach_timer(timer, clear_pending);
848 return 1;
849}
850
851static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
852{
853 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
854
855
856
857
858
859 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
860 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
861 return base;
862}
863
864static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
865{
866 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
867
868
869
870
871
872 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
873 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
874 return base;
875}
876
877static inline struct timer_base *get_timer_base(u32 tflags)
878{
879 return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
880}
881
882static inline struct timer_base *
883get_target_base(struct timer_base *base, unsigned tflags)
884{
885#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
886 if (static_branch_likely(&timers_migration_enabled) &&
887 !(tflags & TIMER_PINNED))
888 return get_timer_cpu_base(tflags, get_nohz_timer_target());
889#endif
890 return get_timer_this_cpu_base(tflags);
891}
892
893static inline void forward_timer_base(struct timer_base *base)
894{
895 unsigned long jnow = READ_ONCE(jiffies);
896
897
898
899
900
901
902 if ((long)(jnow - base->clk) < 1)
903 return;
904
905
906
907
908
909 if (time_after(base->next_expiry, jnow)) {
910 base->clk = jnow;
911 } else {
912 if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
913 return;
914 base->clk = base->next_expiry;
915 }
916}
917
918
919
920
921
922
923
924
925
926
927
928
929
930static struct timer_base *lock_timer_base(struct timer_list *timer,
931 unsigned long *flags)
932 __acquires(timer->base->lock)
933{
934 for (;;) {
935 struct timer_base *base;
936 u32 tf;
937
938
939
940
941
942
943 tf = READ_ONCE(timer->flags);
944
945 if (!(tf & TIMER_MIGRATING)) {
946 base = get_timer_base(tf);
947 raw_spin_lock_irqsave(&base->lock, *flags);
948 if (timer->flags == tf)
949 return base;
950 raw_spin_unlock_irqrestore(&base->lock, *flags);
951 }
952 cpu_relax();
953 }
954}
955
956#define MOD_TIMER_PENDING_ONLY 0x01
957#define MOD_TIMER_REDUCE 0x02
958#define MOD_TIMER_NOTPENDING 0x04
959
960static inline int
961__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
962{
963 unsigned long clk = 0, flags, bucket_expiry;
964 struct timer_base *base, *new_base;
965 unsigned int idx = UINT_MAX;
966 int ret = 0;
967
968 BUG_ON(!timer->function);
969
970
971
972
973
974
975 if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
976
977
978
979
980
981 long diff = timer->expires - expires;
982
983 if (!diff)
984 return 1;
985 if (options & MOD_TIMER_REDUCE && diff <= 0)
986 return 1;
987
988
989
990
991
992
993
994 base = lock_timer_base(timer, &flags);
995 forward_timer_base(base);
996
997 if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
998 time_before_eq(timer->expires, expires)) {
999 ret = 1;
1000 goto out_unlock;
1001 }
1002
1003 clk = base->clk;
1004 idx = calc_wheel_index(expires, clk, &bucket_expiry);
1005
1006
1007
1008
1009
1010
1011 if (idx == timer_get_idx(timer)) {
1012 if (!(options & MOD_TIMER_REDUCE))
1013 timer->expires = expires;
1014 else if (time_after(timer->expires, expires))
1015 timer->expires = expires;
1016 ret = 1;
1017 goto out_unlock;
1018 }
1019 } else {
1020 base = lock_timer_base(timer, &flags);
1021 forward_timer_base(base);
1022 }
1023
1024 ret = detach_if_pending(timer, base, false);
1025 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1026 goto out_unlock;
1027
1028 new_base = get_target_base(base, timer->flags);
1029
1030 if (base != new_base) {
1031
1032
1033
1034
1035
1036
1037
1038 if (likely(base->running_timer != timer)) {
1039
1040 timer->flags |= TIMER_MIGRATING;
1041
1042 raw_spin_unlock(&base->lock);
1043 base = new_base;
1044 raw_spin_lock(&base->lock);
1045 WRITE_ONCE(timer->flags,
1046 (timer->flags & ~TIMER_BASEMASK) | base->cpu);
1047 forward_timer_base(base);
1048 }
1049 }
1050
1051 debug_timer_activate(timer);
1052
1053 timer->expires = expires;
1054
1055
1056
1057
1058
1059
1060 if (idx != UINT_MAX && clk == base->clk)
1061 enqueue_timer(base, timer, idx, bucket_expiry);
1062 else
1063 internal_add_timer(base, timer);
1064
1065out_unlock:
1066 raw_spin_unlock_irqrestore(&base->lock, flags);
1067
1068 return ret;
1069}
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081int mod_timer_pending(struct timer_list *timer, unsigned long expires)
1082{
1083 return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
1084}
1085EXPORT_SYMBOL(mod_timer_pending);
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107int mod_timer(struct timer_list *timer, unsigned long expires)
1108{
1109 return __mod_timer(timer, expires, 0);
1110}
1111EXPORT_SYMBOL(mod_timer);
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122int timer_reduce(struct timer_list *timer, unsigned long expires)
1123{
1124 return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
1125}
1126EXPORT_SYMBOL(timer_reduce);
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142void add_timer(struct timer_list *timer)
1143{
1144 BUG_ON(timer_pending(timer));
1145 __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
1146}
1147EXPORT_SYMBOL(add_timer);
1148
1149
1150
1151
1152
1153
1154
1155
1156void add_timer_on(struct timer_list *timer, int cpu)
1157{
1158 struct timer_base *new_base, *base;
1159 unsigned long flags;
1160
1161 BUG_ON(timer_pending(timer) || !timer->function);
1162
1163 new_base = get_timer_cpu_base(timer->flags, cpu);
1164
1165
1166
1167
1168
1169
1170 base = lock_timer_base(timer, &flags);
1171 if (base != new_base) {
1172 timer->flags |= TIMER_MIGRATING;
1173
1174 raw_spin_unlock(&base->lock);
1175 base = new_base;
1176 raw_spin_lock(&base->lock);
1177 WRITE_ONCE(timer->flags,
1178 (timer->flags & ~TIMER_BASEMASK) | cpu);
1179 }
1180 forward_timer_base(base);
1181
1182 debug_timer_activate(timer);
1183 internal_add_timer(base, timer);
1184 raw_spin_unlock_irqrestore(&base->lock, flags);
1185}
1186EXPORT_SYMBOL_GPL(add_timer_on);
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199int del_timer(struct timer_list *timer)
1200{
1201 struct timer_base *base;
1202 unsigned long flags;
1203 int ret = 0;
1204
1205 debug_assert_init(timer);
1206
1207 if (timer_pending(timer)) {
1208 base = lock_timer_base(timer, &flags);
1209 ret = detach_if_pending(timer, base, true);
1210 raw_spin_unlock_irqrestore(&base->lock, flags);
1211 }
1212
1213 return ret;
1214}
1215EXPORT_SYMBOL(del_timer);
1216
1217
1218
1219
1220
1221
1222
1223
1224int try_to_del_timer_sync(struct timer_list *timer)
1225{
1226 struct timer_base *base;
1227 unsigned long flags;
1228 int ret = -1;
1229
1230 debug_assert_init(timer);
1231
1232 base = lock_timer_base(timer, &flags);
1233
1234 if (base->running_timer != timer)
1235 ret = detach_if_pending(timer, base, true);
1236
1237 raw_spin_unlock_irqrestore(&base->lock, flags);
1238
1239 return ret;
1240}
1241EXPORT_SYMBOL(try_to_del_timer_sync);
1242
1243#ifdef CONFIG_PREEMPT_RT
1244static __init void timer_base_init_expiry_lock(struct timer_base *base)
1245{
1246 spin_lock_init(&base->expiry_lock);
1247}
1248
1249static inline void timer_base_lock_expiry(struct timer_base *base)
1250{
1251 spin_lock(&base->expiry_lock);
1252}
1253
1254static inline void timer_base_unlock_expiry(struct timer_base *base)
1255{
1256 spin_unlock(&base->expiry_lock);
1257}
1258
1259
1260
1261
1262
1263
1264
1265
1266static void timer_sync_wait_running(struct timer_base *base)
1267{
1268 if (atomic_read(&base->timer_waiters)) {
1269 spin_unlock(&base->expiry_lock);
1270 spin_lock(&base->expiry_lock);
1271 }
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284static void del_timer_wait_running(struct timer_list *timer)
1285{
1286 u32 tf;
1287
1288 tf = READ_ONCE(timer->flags);
1289 if (!(tf & TIMER_MIGRATING)) {
1290 struct timer_base *base = get_timer_base(tf);
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300 atomic_inc(&base->timer_waiters);
1301 spin_lock_bh(&base->expiry_lock);
1302 atomic_dec(&base->timer_waiters);
1303 spin_unlock_bh(&base->expiry_lock);
1304 }
1305}
1306#else
1307static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
1308static inline void timer_base_lock_expiry(struct timer_base *base) { }
1309static inline void timer_base_unlock_expiry(struct timer_base *base) { }
1310static inline void timer_sync_wait_running(struct timer_base *base) { }
1311static inline void del_timer_wait_running(struct timer_list *timer) { }
1312#endif
1313
1314#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351int del_timer_sync(struct timer_list *timer)
1352{
1353 int ret;
1354
1355#ifdef CONFIG_LOCKDEP
1356 unsigned long flags;
1357
1358
1359
1360
1361
1362 local_irq_save(flags);
1363 lock_map_acquire(&timer->lockdep_map);
1364 lock_map_release(&timer->lockdep_map);
1365 local_irq_restore(flags);
1366#endif
1367
1368
1369
1370
1371 WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1372
1373 do {
1374 ret = try_to_del_timer_sync(timer);
1375
1376 if (unlikely(ret < 0)) {
1377 del_timer_wait_running(timer);
1378 cpu_relax();
1379 }
1380 } while (ret < 0);
1381
1382 return ret;
1383}
1384EXPORT_SYMBOL(del_timer_sync);
1385#endif
1386
1387static void call_timer_fn(struct timer_list *timer,
1388 void (*fn)(struct timer_list *),
1389 unsigned long baseclk)
1390{
1391 int count = preempt_count();
1392
1393#ifdef CONFIG_LOCKDEP
1394
1395
1396
1397
1398
1399
1400
1401 struct lockdep_map lockdep_map;
1402
1403 lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1404#endif
1405
1406
1407
1408
1409
1410 lock_map_acquire(&lockdep_map);
1411
1412 trace_timer_expire_entry(timer, baseclk);
1413 fn(timer);
1414 trace_timer_expire_exit(timer);
1415
1416 lock_map_release(&lockdep_map);
1417
1418 if (count != preempt_count()) {
1419 WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
1420 fn, count, preempt_count());
1421
1422
1423
1424
1425
1426
1427 preempt_count_set(count);
1428 }
1429}
1430
1431static void expire_timers(struct timer_base *base, struct hlist_head *head)
1432{
1433
1434
1435
1436
1437
1438 unsigned long baseclk = base->clk - 1;
1439
1440 while (!hlist_empty(head)) {
1441 struct timer_list *timer;
1442 void (*fn)(struct timer_list *);
1443
1444 timer = hlist_entry(head->first, struct timer_list, entry);
1445
1446 base->running_timer = timer;
1447 detach_timer(timer, true);
1448
1449 fn = timer->function;
1450
1451 if (timer->flags & TIMER_IRQSAFE) {
1452 raw_spin_unlock(&base->lock);
1453 call_timer_fn(timer, fn, baseclk);
1454 base->running_timer = NULL;
1455 raw_spin_lock(&base->lock);
1456 } else {
1457 raw_spin_unlock_irq(&base->lock);
1458 call_timer_fn(timer, fn, baseclk);
1459 base->running_timer = NULL;
1460 timer_sync_wait_running(base);
1461 raw_spin_lock_irq(&base->lock);
1462 }
1463 }
1464}
1465
1466static int collect_expired_timers(struct timer_base *base,
1467 struct hlist_head *heads)
1468{
1469 unsigned long clk = base->clk = base->next_expiry;
1470 struct hlist_head *vec;
1471 int i, levels = 0;
1472 unsigned int idx;
1473
1474 for (i = 0; i < LVL_DEPTH; i++) {
1475 idx = (clk & LVL_MASK) + i * LVL_SIZE;
1476
1477 if (__test_and_clear_bit(idx, base->pending_map)) {
1478 vec = base->vectors + idx;
1479 hlist_move_list(vec, heads++);
1480 levels++;
1481 }
1482
1483 if (clk & LVL_CLK_MASK)
1484 break;
1485
1486 clk >>= LVL_CLK_SHIFT;
1487 }
1488 return levels;
1489}
1490
1491
1492
1493
1494
1495
1496static int next_pending_bucket(struct timer_base *base, unsigned offset,
1497 unsigned clk)
1498{
1499 unsigned pos, start = offset + clk;
1500 unsigned end = offset + LVL_SIZE;
1501
1502 pos = find_next_bit(base->pending_map, end, start);
1503 if (pos < end)
1504 return pos - start;
1505
1506 pos = find_next_bit(base->pending_map, start, offset);
1507 return pos < start ? pos + LVL_SIZE - start : -1;
1508}
1509
1510
1511
1512
1513
1514static unsigned long __next_timer_interrupt(struct timer_base *base)
1515{
1516 unsigned long clk, next, adj;
1517 unsigned lvl, offset = 0;
1518
1519 next = base->clk + NEXT_TIMER_MAX_DELTA;
1520 clk = base->clk;
1521 for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
1522 int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
1523 unsigned long lvl_clk = clk & LVL_CLK_MASK;
1524
1525 if (pos >= 0) {
1526 unsigned long tmp = clk + (unsigned long) pos;
1527
1528 tmp <<= LVL_SHIFT(lvl);
1529 if (time_before(tmp, next))
1530 next = tmp;
1531
1532
1533
1534
1535
1536 if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
1537 break;
1538 }
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575 adj = lvl_clk ? 1 : 0;
1576 clk >>= LVL_CLK_SHIFT;
1577 clk += adj;
1578 }
1579
1580 base->next_expiry_recalc = false;
1581
1582 return next;
1583}
1584
1585#ifdef CONFIG_NO_HZ_COMMON
1586
1587
1588
1589
1590static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1591{
1592 u64 nextevt = hrtimer_get_next_event();
1593
1594
1595
1596
1597
1598 if (expires <= nextevt)
1599 return expires;
1600
1601
1602
1603
1604
1605 if (nextevt <= basem)
1606 return basem;
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616 return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1617}
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1628{
1629 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1630 u64 expires = KTIME_MAX;
1631 unsigned long nextevt;
1632 bool is_max_delta;
1633
1634
1635
1636
1637
1638 if (cpu_is_offline(smp_processor_id()))
1639 return expires;
1640
1641 raw_spin_lock(&base->lock);
1642 if (base->next_expiry_recalc)
1643 base->next_expiry = __next_timer_interrupt(base);
1644 nextevt = base->next_expiry;
1645 is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
1646
1647
1648
1649
1650
1651
1652 if (time_after(basej, base->clk)) {
1653 if (time_after(nextevt, basej))
1654 base->clk = basej;
1655 else if (time_after(nextevt, base->clk))
1656 base->clk = nextevt;
1657 }
1658
1659 if (time_before_eq(nextevt, basej)) {
1660 expires = basem;
1661 base->is_idle = false;
1662 } else {
1663 if (!is_max_delta)
1664 expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
1665
1666
1667
1668
1669
1670
1671
1672 if ((expires - basem) > TICK_NSEC)
1673 base->is_idle = true;
1674 }
1675 raw_spin_unlock(&base->lock);
1676
1677 return cmp_next_hrtimer_event(basem, expires);
1678}
1679
1680
1681
1682
1683
1684
1685void timer_clear_idle(void)
1686{
1687 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1688
1689
1690
1691
1692
1693
1694
1695 base->is_idle = false;
1696}
1697#endif
1698
1699
1700
1701
1702
1703void update_process_times(int user_tick)
1704{
1705 struct task_struct *p = current;
1706
1707
1708 account_process_tick(p, user_tick);
1709 run_local_timers();
1710 rcu_sched_clock_irq(user_tick);
1711#ifdef CONFIG_IRQ_WORK
1712 if (in_irq())
1713 irq_work_tick();
1714#endif
1715 scheduler_tick();
1716 if (IS_ENABLED(CONFIG_POSIX_TIMERS))
1717 run_posix_cpu_timers();
1718
1719
1720
1721
1722
1723
1724 this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
1725}
1726
1727
1728
1729
1730
1731static inline void __run_timers(struct timer_base *base)
1732{
1733 struct hlist_head heads[LVL_DEPTH];
1734 int levels;
1735
1736 if (time_before(jiffies, base->next_expiry))
1737 return;
1738
1739 timer_base_lock_expiry(base);
1740 raw_spin_lock_irq(&base->lock);
1741
1742 while (time_after_eq(jiffies, base->clk) &&
1743 time_after_eq(jiffies, base->next_expiry)) {
1744 levels = collect_expired_timers(base, heads);
1745
1746
1747
1748
1749
1750 WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
1751 base->clk++;
1752 base->next_expiry = __next_timer_interrupt(base);
1753
1754 while (levels--)
1755 expire_timers(base, heads + levels);
1756 }
1757 raw_spin_unlock_irq(&base->lock);
1758 timer_base_unlock_expiry(base);
1759}
1760
1761
1762
1763
1764static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1765{
1766 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1767
1768 __run_timers(base);
1769 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1770 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1771}
1772
1773
1774
1775
1776void run_local_timers(void)
1777{
1778 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1779
1780 hrtimer_run_queues();
1781
1782 if (time_before(jiffies, base->next_expiry)) {
1783 if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
1784 return;
1785
1786 base++;
1787 if (time_before(jiffies, base->next_expiry))
1788 return;
1789 }
1790 raise_softirq(TIMER_SOFTIRQ);
1791}
1792
1793
1794
1795
1796
1797struct process_timer {
1798 struct timer_list timer;
1799 struct task_struct *task;
1800};
1801
1802static void process_timeout(struct timer_list *t)
1803{
1804 struct process_timer *timeout = from_timer(timeout, t, timer);
1805
1806 wake_up_process(timeout->task);
1807}
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840signed long __sched schedule_timeout(signed long timeout)
1841{
1842 struct process_timer timer;
1843 unsigned long expire;
1844
1845 switch (timeout)
1846 {
1847 case MAX_SCHEDULE_TIMEOUT:
1848
1849
1850
1851
1852
1853
1854
1855 schedule();
1856 goto out;
1857 default:
1858
1859
1860
1861
1862
1863
1864
1865 if (timeout < 0) {
1866 printk(KERN_ERR "schedule_timeout: wrong timeout "
1867 "value %lx\n", timeout);
1868 dump_stack();
1869 current->state = TASK_RUNNING;
1870 goto out;
1871 }
1872 }
1873
1874 expire = timeout + jiffies;
1875
1876 timer.task = current;
1877 timer_setup_on_stack(&timer.timer, process_timeout, 0);
1878 __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
1879 schedule();
1880 del_singleshot_timer_sync(&timer.timer);
1881
1882
1883 destroy_timer_on_stack(&timer.timer);
1884
1885 timeout = expire - jiffies;
1886
1887 out:
1888 return timeout < 0 ? 0 : timeout;
1889}
1890EXPORT_SYMBOL(schedule_timeout);
1891
1892
1893
1894
1895
1896signed long __sched schedule_timeout_interruptible(signed long timeout)
1897{
1898 __set_current_state(TASK_INTERRUPTIBLE);
1899 return schedule_timeout(timeout);
1900}
1901EXPORT_SYMBOL(schedule_timeout_interruptible);
1902
1903signed long __sched schedule_timeout_killable(signed long timeout)
1904{
1905 __set_current_state(TASK_KILLABLE);
1906 return schedule_timeout(timeout);
1907}
1908EXPORT_SYMBOL(schedule_timeout_killable);
1909
1910signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1911{
1912 __set_current_state(TASK_UNINTERRUPTIBLE);
1913 return schedule_timeout(timeout);
1914}
1915EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1916
1917
1918
1919
1920
1921signed long __sched schedule_timeout_idle(signed long timeout)
1922{
1923 __set_current_state(TASK_IDLE);
1924 return schedule_timeout(timeout);
1925}
1926EXPORT_SYMBOL(schedule_timeout_idle);
1927
1928#ifdef CONFIG_HOTPLUG_CPU
1929static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
1930{
1931 struct timer_list *timer;
1932 int cpu = new_base->cpu;
1933
1934 while (!hlist_empty(head)) {
1935 timer = hlist_entry(head->first, struct timer_list, entry);
1936 detach_timer(timer, false);
1937 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1938 internal_add_timer(new_base, timer);
1939 }
1940}
1941
1942int timers_prepare_cpu(unsigned int cpu)
1943{
1944 struct timer_base *base;
1945 int b;
1946
1947 for (b = 0; b < NR_BASES; b++) {
1948 base = per_cpu_ptr(&timer_bases[b], cpu);
1949 base->clk = jiffies;
1950 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1951 base->is_idle = false;
1952 }
1953 return 0;
1954}
1955
1956int timers_dead_cpu(unsigned int cpu)
1957{
1958 struct timer_base *old_base;
1959 struct timer_base *new_base;
1960 int b, i;
1961
1962 BUG_ON(cpu_online(cpu));
1963
1964 for (b = 0; b < NR_BASES; b++) {
1965 old_base = per_cpu_ptr(&timer_bases[b], cpu);
1966 new_base = get_cpu_ptr(&timer_bases[b]);
1967
1968
1969
1970
1971 raw_spin_lock_irq(&new_base->lock);
1972 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1973
1974
1975
1976
1977
1978 forward_timer_base(new_base);
1979
1980 BUG_ON(old_base->running_timer);
1981
1982 for (i = 0; i < WHEEL_SIZE; i++)
1983 migrate_timer_list(new_base, old_base->vectors + i);
1984
1985 raw_spin_unlock(&old_base->lock);
1986 raw_spin_unlock_irq(&new_base->lock);
1987 put_cpu_ptr(&timer_bases);
1988 }
1989 return 0;
1990}
1991
1992#endif
1993
1994static void __init init_timer_cpu(int cpu)
1995{
1996 struct timer_base *base;
1997 int i;
1998
1999 for (i = 0; i < NR_BASES; i++) {
2000 base = per_cpu_ptr(&timer_bases[i], cpu);
2001 base->cpu = cpu;
2002 raw_spin_lock_init(&base->lock);
2003 base->clk = jiffies;
2004 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
2005 timer_base_init_expiry_lock(base);
2006 }
2007}
2008
2009static void __init init_timer_cpus(void)
2010{
2011 int cpu;
2012
2013 for_each_possible_cpu(cpu)
2014 init_timer_cpu(cpu);
2015}
2016
2017void __init init_timers(void)
2018{
2019 init_timer_cpus();
2020 posix_cputimers_init_work();
2021 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
2022}
2023
2024
2025
2026
2027
2028void msleep(unsigned int msecs)
2029{
2030 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2031
2032 while (timeout)
2033 timeout = schedule_timeout_uninterruptible(timeout);
2034}
2035
2036EXPORT_SYMBOL(msleep);
2037
2038
2039
2040
2041
2042unsigned long msleep_interruptible(unsigned int msecs)
2043{
2044 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2045
2046 while (timeout && !signal_pending(current))
2047 timeout = schedule_timeout_interruptible(timeout);
2048 return jiffies_to_msecs(timeout);
2049}
2050
2051EXPORT_SYMBOL(msleep_interruptible);
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064void __sched usleep_range(unsigned long min, unsigned long max)
2065{
2066 ktime_t exp = ktime_add_us(ktime_get(), min);
2067 u64 delta = (u64)(max - min) * NSEC_PER_USEC;
2068
2069 for (;;) {
2070 __set_current_state(TASK_UNINTERRUPTIBLE);
2071
2072 if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
2073 break;
2074 }
2075}
2076EXPORT_SYMBOL(usleep_range);
2077