1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/kernel_stat.h>
22#include <linux/export.h>
23#include <linux/interrupt.h>
24#include <linux/percpu.h>
25#include <linux/init.h>
26#include <linux/mm.h>
27#include <linux/swap.h>
28#include <linux/pid_namespace.h>
29#include <linux/notifier.h>
30#include <linux/thread_info.h>
31#include <linux/time.h>
32#include <linux/jiffies.h>
33#include <linux/posix-timers.h>
34#include <linux/cpu.h>
35#include <linux/syscalls.h>
36#include <linux/delay.h>
37#include <linux/tick.h>
38#include <linux/kallsyms.h>
39#include <linux/irq_work.h>
40#include <linux/sched/signal.h>
41#include <linux/sched/sysctl.h>
42#include <linux/sched/nohz.h>
43#include <linux/sched/debug.h>
44#include <linux/slab.h>
45#include <linux/compat.h>
46#include <linux/random.h>
47
48#include <linux/uaccess.h>
49#include <asm/unistd.h>
50#include <asm/div64.h>
51#include <asm/timex.h>
52#include <asm/io.h>
53
54#include "tick-internal.h"
55
56#define CREATE_TRACE_POINTS
57#include <trace/events/timer.h>
58
59__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
60
61EXPORT_SYMBOL(jiffies_64);
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152#define LVL_CLK_SHIFT 3
153#define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT)
154#define LVL_CLK_MASK (LVL_CLK_DIV - 1)
155#define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT)
156#define LVL_GRAN(n) (1UL << LVL_SHIFT(n))
157
158
159
160
161
162
163#define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
164
165
166#define LVL_BITS 6
167#define LVL_SIZE (1UL << LVL_BITS)
168#define LVL_MASK (LVL_SIZE - 1)
169#define LVL_OFFS(n) ((n) * LVL_SIZE)
170
171
172#if HZ > 100
173# define LVL_DEPTH 9
174# else
175# define LVL_DEPTH 8
176#endif
177
178
179#define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH))
180#define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
181
182
183
184
185
186#define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH)
187
188#ifdef CONFIG_NO_HZ_COMMON
189# define NR_BASES 2
190# define BASE_STD 0
191# define BASE_DEF 1
192#else
193# define NR_BASES 1
194# define BASE_STD 0
195# define BASE_DEF 0
196#endif
197
198struct timer_base {
199 raw_spinlock_t lock;
200 struct timer_list *running_timer;
201#ifdef CONFIG_PREEMPT_RT
202 spinlock_t expiry_lock;
203 atomic_t timer_waiters;
204#endif
205 unsigned long clk;
206 unsigned long next_expiry;
207 unsigned int cpu;
208 bool next_expiry_recalc;
209 bool is_idle;
210 DECLARE_BITMAP(pending_map, WHEEL_SIZE);
211 struct hlist_head vectors[WHEEL_SIZE];
212} ____cacheline_aligned;
213
214static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
215
216#ifdef CONFIG_NO_HZ_COMMON
217
218static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
219static DEFINE_MUTEX(timer_keys_mutex);
220
221static void timer_update_keys(struct work_struct *work);
222static DECLARE_WORK(timer_update_work, timer_update_keys);
223
224#ifdef CONFIG_SMP
225unsigned int sysctl_timer_migration = 1;
226
227DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
228
229static void timers_update_migration(void)
230{
231 if (sysctl_timer_migration && tick_nohz_active)
232 static_branch_enable(&timers_migration_enabled);
233 else
234 static_branch_disable(&timers_migration_enabled);
235}
236#else
237static inline void timers_update_migration(void) { }
238#endif
239
240static void timer_update_keys(struct work_struct *work)
241{
242 mutex_lock(&timer_keys_mutex);
243 timers_update_migration();
244 static_branch_enable(&timers_nohz_active);
245 mutex_unlock(&timer_keys_mutex);
246}
247
248void timers_update_nohz(void)
249{
250 schedule_work(&timer_update_work);
251}
252
253int timer_migration_handler(struct ctl_table *table, int write,
254 void *buffer, size_t *lenp, loff_t *ppos)
255{
256 int ret;
257
258 mutex_lock(&timer_keys_mutex);
259 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
260 if (!ret && write)
261 timers_update_migration();
262 mutex_unlock(&timer_keys_mutex);
263 return ret;
264}
265
266static inline bool is_timers_nohz_active(void)
267{
268 return static_branch_unlikely(&timers_nohz_active);
269}
270#else
271static inline bool is_timers_nohz_active(void) { return false; }
272#endif
273
274static unsigned long round_jiffies_common(unsigned long j, int cpu,
275 bool force_up)
276{
277 int rem;
278 unsigned long original = j;
279
280
281
282
283
284
285
286
287
288 j += cpu * 3;
289
290 rem = j % HZ;
291
292
293
294
295
296
297
298
299 if (rem < HZ/4 && !force_up)
300 j = j - rem;
301 else
302 j = j - rem + HZ;
303
304
305 j -= cpu * 3;
306
307
308
309
310
311 return time_is_after_jiffies(j) ? j : original;
312}
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334unsigned long __round_jiffies(unsigned long j, int cpu)
335{
336 return round_jiffies_common(j, cpu, false);
337}
338EXPORT_SYMBOL_GPL(__round_jiffies);
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360unsigned long __round_jiffies_relative(unsigned long j, int cpu)
361{
362 unsigned long j0 = jiffies;
363
364
365 return round_jiffies_common(j + j0, cpu, false) - j0;
366}
367EXPORT_SYMBOL_GPL(__round_jiffies_relative);
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384unsigned long round_jiffies(unsigned long j)
385{
386 return round_jiffies_common(j, raw_smp_processor_id(), false);
387}
388EXPORT_SYMBOL_GPL(round_jiffies);
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405unsigned long round_jiffies_relative(unsigned long j)
406{
407 return __round_jiffies_relative(j, raw_smp_processor_id());
408}
409EXPORT_SYMBOL_GPL(round_jiffies_relative);
410
411
412
413
414
415
416
417
418
419
420
421unsigned long __round_jiffies_up(unsigned long j, int cpu)
422{
423 return round_jiffies_common(j, cpu, true);
424}
425EXPORT_SYMBOL_GPL(__round_jiffies_up);
426
427
428
429
430
431
432
433
434
435
436
437unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
438{
439 unsigned long j0 = jiffies;
440
441
442 return round_jiffies_common(j + j0, cpu, true) - j0;
443}
444EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
445
446
447
448
449
450
451
452
453
454
455unsigned long round_jiffies_up(unsigned long j)
456{
457 return round_jiffies_common(j, raw_smp_processor_id(), true);
458}
459EXPORT_SYMBOL_GPL(round_jiffies_up);
460
461
462
463
464
465
466
467
468
469
470unsigned long round_jiffies_up_relative(unsigned long j)
471{
472 return __round_jiffies_up_relative(j, raw_smp_processor_id());
473}
474EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
475
476
477static inline unsigned int timer_get_idx(struct timer_list *timer)
478{
479 return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
480}
481
482static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
483{
484 timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
485 idx << TIMER_ARRAYSHIFT;
486}
487
488
489
490
491
492static inline unsigned calc_index(unsigned long expires, unsigned lvl,
493 unsigned long *bucket_expiry)
494{
495
496
497
498
499
500
501
502
503
504 expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
505 *bucket_expiry = expires << LVL_SHIFT(lvl);
506 return LVL_OFFS(lvl) + (expires & LVL_MASK);
507}
508
509static int calc_wheel_index(unsigned long expires, unsigned long clk,
510 unsigned long *bucket_expiry)
511{
512 unsigned long delta = expires - clk;
513 unsigned int idx;
514
515 if (delta < LVL_START(1)) {
516 idx = calc_index(expires, 0, bucket_expiry);
517 } else if (delta < LVL_START(2)) {
518 idx = calc_index(expires, 1, bucket_expiry);
519 } else if (delta < LVL_START(3)) {
520 idx = calc_index(expires, 2, bucket_expiry);
521 } else if (delta < LVL_START(4)) {
522 idx = calc_index(expires, 3, bucket_expiry);
523 } else if (delta < LVL_START(5)) {
524 idx = calc_index(expires, 4, bucket_expiry);
525 } else if (delta < LVL_START(6)) {
526 idx = calc_index(expires, 5, bucket_expiry);
527 } else if (delta < LVL_START(7)) {
528 idx = calc_index(expires, 6, bucket_expiry);
529 } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
530 idx = calc_index(expires, 7, bucket_expiry);
531 } else if ((long) delta < 0) {
532 idx = clk & LVL_MASK;
533 *bucket_expiry = clk;
534 } else {
535
536
537
538
539 if (delta >= WHEEL_TIMEOUT_CUTOFF)
540 expires = clk + WHEEL_TIMEOUT_MAX;
541
542 idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
543 }
544 return idx;
545}
546
547static void
548trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
549{
550 if (!is_timers_nohz_active())
551 return;
552
553
554
555
556
557 if (timer->flags & TIMER_DEFERRABLE) {
558 if (tick_nohz_full_cpu(base->cpu))
559 wake_up_nohz_cpu(base->cpu);
560 return;
561 }
562
563
564
565
566
567
568 if (base->is_idle)
569 wake_up_nohz_cpu(base->cpu);
570}
571
572
573
574
575
576
577static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
578 unsigned int idx, unsigned long bucket_expiry)
579{
580
581 hlist_add_head(&timer->entry, base->vectors + idx);
582 __set_bit(idx, base->pending_map);
583 timer_set_idx(timer, idx);
584
585 trace_timer_start(timer, timer->expires, timer->flags);
586
587
588
589
590
591
592 if (time_before(bucket_expiry, base->next_expiry)) {
593
594
595
596
597 base->next_expiry = bucket_expiry;
598 base->next_expiry_recalc = false;
599 trigger_dyntick_cpu(base, timer);
600 }
601}
602
603static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
604{
605 unsigned long bucket_expiry;
606 unsigned int idx;
607
608 idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
609 enqueue_timer(base, timer, idx, bucket_expiry);
610}
611
612#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
613
614static const struct debug_obj_descr timer_debug_descr;
615
616static void *timer_debug_hint(void *addr)
617{
618 return ((struct timer_list *) addr)->function;
619}
620
621static bool timer_is_static_object(void *addr)
622{
623 struct timer_list *timer = addr;
624
625 return (timer->entry.pprev == NULL &&
626 timer->entry.next == TIMER_ENTRY_STATIC);
627}
628
629
630
631
632
633static bool timer_fixup_init(void *addr, enum debug_obj_state state)
634{
635 struct timer_list *timer = addr;
636
637 switch (state) {
638 case ODEBUG_STATE_ACTIVE:
639 del_timer_sync(timer);
640 debug_object_init(timer, &timer_debug_descr);
641 return true;
642 default:
643 return false;
644 }
645}
646
647
648static void stub_timer(struct timer_list *unused)
649{
650 WARN_ON(1);
651}
652
653
654
655
656
657
658static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
659{
660 struct timer_list *timer = addr;
661
662 switch (state) {
663 case ODEBUG_STATE_NOTAVAILABLE:
664 timer_setup(timer, stub_timer, 0);
665 return true;
666
667 case ODEBUG_STATE_ACTIVE:
668 WARN_ON(1);
669 fallthrough;
670 default:
671 return false;
672 }
673}
674
675
676
677
678
679static bool timer_fixup_free(void *addr, enum debug_obj_state state)
680{
681 struct timer_list *timer = addr;
682
683 switch (state) {
684 case ODEBUG_STATE_ACTIVE:
685 del_timer_sync(timer);
686 debug_object_free(timer, &timer_debug_descr);
687 return true;
688 default:
689 return false;
690 }
691}
692
693
694
695
696
697static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
698{
699 struct timer_list *timer = addr;
700
701 switch (state) {
702 case ODEBUG_STATE_NOTAVAILABLE:
703 timer_setup(timer, stub_timer, 0);
704 return true;
705 default:
706 return false;
707 }
708}
709
710static const struct debug_obj_descr timer_debug_descr = {
711 .name = "timer_list",
712 .debug_hint = timer_debug_hint,
713 .is_static_object = timer_is_static_object,
714 .fixup_init = timer_fixup_init,
715 .fixup_activate = timer_fixup_activate,
716 .fixup_free = timer_fixup_free,
717 .fixup_assert_init = timer_fixup_assert_init,
718};
719
720static inline void debug_timer_init(struct timer_list *timer)
721{
722 debug_object_init(timer, &timer_debug_descr);
723}
724
725static inline void debug_timer_activate(struct timer_list *timer)
726{
727 debug_object_activate(timer, &timer_debug_descr);
728}
729
730static inline void debug_timer_deactivate(struct timer_list *timer)
731{
732 debug_object_deactivate(timer, &timer_debug_descr);
733}
734
735static inline void debug_timer_assert_init(struct timer_list *timer)
736{
737 debug_object_assert_init(timer, &timer_debug_descr);
738}
739
740static void do_init_timer(struct timer_list *timer,
741 void (*func)(struct timer_list *),
742 unsigned int flags,
743 const char *name, struct lock_class_key *key);
744
745void init_timer_on_stack_key(struct timer_list *timer,
746 void (*func)(struct timer_list *),
747 unsigned int flags,
748 const char *name, struct lock_class_key *key)
749{
750 debug_object_init_on_stack(timer, &timer_debug_descr);
751 do_init_timer(timer, func, flags, name, key);
752}
753EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
754
755void destroy_timer_on_stack(struct timer_list *timer)
756{
757 debug_object_free(timer, &timer_debug_descr);
758}
759EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
760
761#else
762static inline void debug_timer_init(struct timer_list *timer) { }
763static inline void debug_timer_activate(struct timer_list *timer) { }
764static inline void debug_timer_deactivate(struct timer_list *timer) { }
765static inline void debug_timer_assert_init(struct timer_list *timer) { }
766#endif
767
768static inline void debug_init(struct timer_list *timer)
769{
770 debug_timer_init(timer);
771 trace_timer_init(timer);
772}
773
774static inline void debug_deactivate(struct timer_list *timer)
775{
776 debug_timer_deactivate(timer);
777 trace_timer_cancel(timer);
778}
779
780static inline void debug_assert_init(struct timer_list *timer)
781{
782 debug_timer_assert_init(timer);
783}
784
785static void do_init_timer(struct timer_list *timer,
786 void (*func)(struct timer_list *),
787 unsigned int flags,
788 const char *name, struct lock_class_key *key)
789{
790 timer->entry.pprev = NULL;
791 timer->function = func;
792 if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))
793 flags &= TIMER_INIT_FLAGS;
794 timer->flags = flags | raw_smp_processor_id();
795 lockdep_init_map(&timer->lockdep_map, name, key, 0);
796}
797
798
799
800
801
802
803
804
805
806
807
808
809
810void init_timer_key(struct timer_list *timer,
811 void (*func)(struct timer_list *), unsigned int flags,
812 const char *name, struct lock_class_key *key)
813{
814 debug_init(timer);
815 do_init_timer(timer, func, flags, name, key);
816}
817EXPORT_SYMBOL(init_timer_key);
818
819static inline void detach_timer(struct timer_list *timer, bool clear_pending)
820{
821 struct hlist_node *entry = &timer->entry;
822
823 debug_deactivate(timer);
824
825 __hlist_del(entry);
826 if (clear_pending)
827 entry->pprev = NULL;
828 entry->next = LIST_POISON2;
829}
830
831static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
832 bool clear_pending)
833{
834 unsigned idx = timer_get_idx(timer);
835
836 if (!timer_pending(timer))
837 return 0;
838
839 if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
840 __clear_bit(idx, base->pending_map);
841 base->next_expiry_recalc = true;
842 }
843
844 detach_timer(timer, clear_pending);
845 return 1;
846}
847
848static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
849{
850 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
851
852
853
854
855
856 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
857 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
858 return base;
859}
860
861static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
862{
863 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
864
865
866
867
868
869 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
870 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
871 return base;
872}
873
874static inline struct timer_base *get_timer_base(u32 tflags)
875{
876 return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
877}
878
879static inline struct timer_base *
880get_target_base(struct timer_base *base, unsigned tflags)
881{
882#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
883 if (static_branch_likely(&timers_migration_enabled) &&
884 !(tflags & TIMER_PINNED))
885 return get_timer_cpu_base(tflags, get_nohz_timer_target());
886#endif
887 return get_timer_this_cpu_base(tflags);
888}
889
890static inline void forward_timer_base(struct timer_base *base)
891{
892 unsigned long jnow = READ_ONCE(jiffies);
893
894
895
896
897
898
899 if ((long)(jnow - base->clk) < 1)
900 return;
901
902
903
904
905
906 if (time_after(base->next_expiry, jnow)) {
907 base->clk = jnow;
908 } else {
909 if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
910 return;
911 base->clk = base->next_expiry;
912 }
913}
914
915
916
917
918
919
920
921
922
923
924
925
926
927static struct timer_base *lock_timer_base(struct timer_list *timer,
928 unsigned long *flags)
929 __acquires(timer->base->lock)
930{
931 for (;;) {
932 struct timer_base *base;
933 u32 tf;
934
935
936
937
938
939
940 tf = READ_ONCE(timer->flags);
941
942 if (!(tf & TIMER_MIGRATING)) {
943 base = get_timer_base(tf);
944 raw_spin_lock_irqsave(&base->lock, *flags);
945 if (timer->flags == tf)
946 return base;
947 raw_spin_unlock_irqrestore(&base->lock, *flags);
948 }
949 cpu_relax();
950 }
951}
952
953#define MOD_TIMER_PENDING_ONLY 0x01
954#define MOD_TIMER_REDUCE 0x02
955#define MOD_TIMER_NOTPENDING 0x04
956
957static inline int
958__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
959{
960 unsigned long clk = 0, flags, bucket_expiry;
961 struct timer_base *base, *new_base;
962 unsigned int idx = UINT_MAX;
963 int ret = 0;
964
965 BUG_ON(!timer->function);
966
967
968
969
970
971
972 if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
973
974
975
976
977
978 long diff = timer->expires - expires;
979
980 if (!diff)
981 return 1;
982 if (options & MOD_TIMER_REDUCE && diff <= 0)
983 return 1;
984
985
986
987
988
989
990
991 base = lock_timer_base(timer, &flags);
992 forward_timer_base(base);
993
994 if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
995 time_before_eq(timer->expires, expires)) {
996 ret = 1;
997 goto out_unlock;
998 }
999
1000 clk = base->clk;
1001 idx = calc_wheel_index(expires, clk, &bucket_expiry);
1002
1003
1004
1005
1006
1007
1008 if (idx == timer_get_idx(timer)) {
1009 if (!(options & MOD_TIMER_REDUCE))
1010 timer->expires = expires;
1011 else if (time_after(timer->expires, expires))
1012 timer->expires = expires;
1013 ret = 1;
1014 goto out_unlock;
1015 }
1016 } else {
1017 base = lock_timer_base(timer, &flags);
1018 forward_timer_base(base);
1019 }
1020
1021 ret = detach_if_pending(timer, base, false);
1022 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1023 goto out_unlock;
1024
1025 new_base = get_target_base(base, timer->flags);
1026
1027 if (base != new_base) {
1028
1029
1030
1031
1032
1033
1034
1035 if (likely(base->running_timer != timer)) {
1036
1037 timer->flags |= TIMER_MIGRATING;
1038
1039 raw_spin_unlock(&base->lock);
1040 base = new_base;
1041 raw_spin_lock(&base->lock);
1042 WRITE_ONCE(timer->flags,
1043 (timer->flags & ~TIMER_BASEMASK) | base->cpu);
1044 forward_timer_base(base);
1045 }
1046 }
1047
1048 debug_timer_activate(timer);
1049
1050 timer->expires = expires;
1051
1052
1053
1054
1055
1056
1057 if (idx != UINT_MAX && clk == base->clk)
1058 enqueue_timer(base, timer, idx, bucket_expiry);
1059 else
1060 internal_add_timer(base, timer);
1061
1062out_unlock:
1063 raw_spin_unlock_irqrestore(&base->lock, flags);
1064
1065 return ret;
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078int mod_timer_pending(struct timer_list *timer, unsigned long expires)
1079{
1080 return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
1081}
1082EXPORT_SYMBOL(mod_timer_pending);
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104int mod_timer(struct timer_list *timer, unsigned long expires)
1105{
1106 return __mod_timer(timer, expires, 0);
1107}
1108EXPORT_SYMBOL(mod_timer);
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119int timer_reduce(struct timer_list *timer, unsigned long expires)
1120{
1121 return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
1122}
1123EXPORT_SYMBOL(timer_reduce);
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139void add_timer(struct timer_list *timer)
1140{
1141 BUG_ON(timer_pending(timer));
1142 __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
1143}
1144EXPORT_SYMBOL(add_timer);
1145
1146
1147
1148
1149
1150
1151
1152
1153void add_timer_on(struct timer_list *timer, int cpu)
1154{
1155 struct timer_base *new_base, *base;
1156 unsigned long flags;
1157
1158 BUG_ON(timer_pending(timer) || !timer->function);
1159
1160 new_base = get_timer_cpu_base(timer->flags, cpu);
1161
1162
1163
1164
1165
1166
1167 base = lock_timer_base(timer, &flags);
1168 if (base != new_base) {
1169 timer->flags |= TIMER_MIGRATING;
1170
1171 raw_spin_unlock(&base->lock);
1172 base = new_base;
1173 raw_spin_lock(&base->lock);
1174 WRITE_ONCE(timer->flags,
1175 (timer->flags & ~TIMER_BASEMASK) | cpu);
1176 }
1177 forward_timer_base(base);
1178
1179 debug_timer_activate(timer);
1180 internal_add_timer(base, timer);
1181 raw_spin_unlock_irqrestore(&base->lock, flags);
1182}
1183EXPORT_SYMBOL_GPL(add_timer_on);
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196int del_timer(struct timer_list *timer)
1197{
1198 struct timer_base *base;
1199 unsigned long flags;
1200 int ret = 0;
1201
1202 debug_assert_init(timer);
1203
1204 if (timer_pending(timer)) {
1205 base = lock_timer_base(timer, &flags);
1206 ret = detach_if_pending(timer, base, true);
1207 raw_spin_unlock_irqrestore(&base->lock, flags);
1208 }
1209
1210 return ret;
1211}
1212EXPORT_SYMBOL(del_timer);
1213
1214
1215
1216
1217
1218
1219
1220
1221int try_to_del_timer_sync(struct timer_list *timer)
1222{
1223 struct timer_base *base;
1224 unsigned long flags;
1225 int ret = -1;
1226
1227 debug_assert_init(timer);
1228
1229 base = lock_timer_base(timer, &flags);
1230
1231 if (base->running_timer != timer)
1232 ret = detach_if_pending(timer, base, true);
1233
1234 raw_spin_unlock_irqrestore(&base->lock, flags);
1235
1236 return ret;
1237}
1238EXPORT_SYMBOL(try_to_del_timer_sync);
1239
1240bool timer_curr_running(struct timer_list *timer)
1241{
1242 int i;
1243
1244 for (i = 0; i < NR_BASES; i++) {
1245 struct timer_base *base = this_cpu_ptr(&timer_bases[i]);
1246
1247 if (base->running_timer == timer)
1248 return true;
1249 }
1250
1251 return false;
1252}
1253
1254#ifdef CONFIG_PREEMPT_RT
1255static __init void timer_base_init_expiry_lock(struct timer_base *base)
1256{
1257 spin_lock_init(&base->expiry_lock);
1258}
1259
1260static inline void timer_base_lock_expiry(struct timer_base *base)
1261{
1262 spin_lock(&base->expiry_lock);
1263}
1264
1265static inline void timer_base_unlock_expiry(struct timer_base *base)
1266{
1267 spin_unlock(&base->expiry_lock);
1268}
1269
1270
1271
1272
1273
1274
1275
1276
1277static void timer_sync_wait_running(struct timer_base *base)
1278{
1279 if (atomic_read(&base->timer_waiters)) {
1280 spin_unlock(&base->expiry_lock);
1281 spin_lock(&base->expiry_lock);
1282 }
1283}
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295static void del_timer_wait_running(struct timer_list *timer)
1296{
1297 u32 tf;
1298
1299 tf = READ_ONCE(timer->flags);
1300 if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) {
1301 struct timer_base *base = get_timer_base(tf);
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 atomic_inc(&base->timer_waiters);
1312 spin_lock_bh(&base->expiry_lock);
1313 atomic_dec(&base->timer_waiters);
1314 spin_unlock_bh(&base->expiry_lock);
1315 }
1316}
1317#else
1318static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
1319static inline void timer_base_lock_expiry(struct timer_base *base) { }
1320static inline void timer_base_unlock_expiry(struct timer_base *base) { }
1321static inline void timer_sync_wait_running(struct timer_base *base) { }
1322static inline void del_timer_wait_running(struct timer_list *timer) { }
1323#endif
1324
1325#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362int del_timer_sync(struct timer_list *timer)
1363{
1364 int ret;
1365
1366#ifdef CONFIG_LOCKDEP
1367 unsigned long flags;
1368
1369
1370
1371
1372
1373 local_irq_save(flags);
1374 lock_map_acquire(&timer->lockdep_map);
1375 lock_map_release(&timer->lockdep_map);
1376 local_irq_restore(flags);
1377#endif
1378
1379
1380
1381
1382 WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1383
1384
1385
1386
1387
1388 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE))
1389 lockdep_assert_preemption_enabled();
1390
1391 do {
1392 ret = try_to_del_timer_sync(timer);
1393
1394 if (unlikely(ret < 0)) {
1395 del_timer_wait_running(timer);
1396 cpu_relax();
1397 }
1398 } while (ret < 0);
1399
1400 return ret;
1401}
1402EXPORT_SYMBOL(del_timer_sync);
1403#endif
1404
1405static void call_timer_fn(struct timer_list *timer,
1406 void (*fn)(struct timer_list *),
1407 unsigned long baseclk)
1408{
1409 int count = preempt_count();
1410
1411#ifdef CONFIG_LOCKDEP
1412
1413
1414
1415
1416
1417
1418
1419 struct lockdep_map lockdep_map;
1420
1421 lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1422#endif
1423
1424
1425
1426
1427
1428 lock_map_acquire(&lockdep_map);
1429
1430 trace_timer_expire_entry(timer, baseclk);
1431 fn(timer);
1432 trace_timer_expire_exit(timer);
1433
1434 lock_map_release(&lockdep_map);
1435
1436 if (count != preempt_count()) {
1437 WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
1438 fn, count, preempt_count());
1439
1440
1441
1442
1443
1444
1445 preempt_count_set(count);
1446 }
1447}
1448
1449static void expire_timers(struct timer_base *base, struct hlist_head *head)
1450{
1451
1452
1453
1454
1455
1456 unsigned long baseclk = base->clk - 1;
1457
1458 while (!hlist_empty(head)) {
1459 struct timer_list *timer;
1460 void (*fn)(struct timer_list *);
1461
1462 timer = hlist_entry(head->first, struct timer_list, entry);
1463
1464 base->running_timer = timer;
1465 detach_timer(timer, true);
1466
1467 fn = timer->function;
1468
1469 if (timer->flags & TIMER_IRQSAFE) {
1470 raw_spin_unlock(&base->lock);
1471 call_timer_fn(timer, fn, baseclk);
1472 base->running_timer = NULL;
1473 raw_spin_lock(&base->lock);
1474 } else {
1475 raw_spin_unlock_irq(&base->lock);
1476 call_timer_fn(timer, fn, baseclk);
1477 base->running_timer = NULL;
1478 timer_sync_wait_running(base);
1479 raw_spin_lock_irq(&base->lock);
1480 }
1481 }
1482}
1483
1484static int collect_expired_timers(struct timer_base *base,
1485 struct hlist_head *heads)
1486{
1487 unsigned long clk = base->clk = base->next_expiry;
1488 struct hlist_head *vec;
1489 int i, levels = 0;
1490 unsigned int idx;
1491
1492 for (i = 0; i < LVL_DEPTH; i++) {
1493 idx = (clk & LVL_MASK) + i * LVL_SIZE;
1494
1495 if (__test_and_clear_bit(idx, base->pending_map)) {
1496 vec = base->vectors + idx;
1497 hlist_move_list(vec, heads++);
1498 levels++;
1499 }
1500
1501 if (clk & LVL_CLK_MASK)
1502 break;
1503
1504 clk >>= LVL_CLK_SHIFT;
1505 }
1506 return levels;
1507}
1508
1509
1510
1511
1512
1513
1514static int next_pending_bucket(struct timer_base *base, unsigned offset,
1515 unsigned clk)
1516{
1517 unsigned pos, start = offset + clk;
1518 unsigned end = offset + LVL_SIZE;
1519
1520 pos = find_next_bit(base->pending_map, end, start);
1521 if (pos < end)
1522 return pos - start;
1523
1524 pos = find_next_bit(base->pending_map, start, offset);
1525 return pos < start ? pos + LVL_SIZE - start : -1;
1526}
1527
1528
1529
1530
1531
1532static unsigned long __next_timer_interrupt(struct timer_base *base)
1533{
1534 unsigned long clk, next, adj;
1535 unsigned lvl, offset = 0;
1536
1537 next = base->clk + NEXT_TIMER_MAX_DELTA;
1538 clk = base->clk;
1539 for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
1540 int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
1541 unsigned long lvl_clk = clk & LVL_CLK_MASK;
1542
1543 if (pos >= 0) {
1544 unsigned long tmp = clk + (unsigned long) pos;
1545
1546 tmp <<= LVL_SHIFT(lvl);
1547 if (time_before(tmp, next))
1548 next = tmp;
1549
1550
1551
1552
1553
1554 if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
1555 break;
1556 }
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593 adj = lvl_clk ? 1 : 0;
1594 clk >>= LVL_CLK_SHIFT;
1595 clk += adj;
1596 }
1597
1598 base->next_expiry_recalc = false;
1599
1600 return next;
1601}
1602
1603#ifdef CONFIG_NO_HZ_COMMON
1604
1605
1606
1607
1608static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1609{
1610 u64 nextevt = hrtimer_get_next_event();
1611
1612
1613
1614
1615
1616 if (expires <= nextevt)
1617 return expires;
1618
1619
1620
1621
1622
1623 if (nextevt <= basem)
1624 return basem;
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634 return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1635}
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1646{
1647 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1648 u64 expires = KTIME_MAX;
1649 unsigned long nextevt;
1650 bool is_max_delta;
1651
1652
1653
1654
1655
1656 if (cpu_is_offline(smp_processor_id()))
1657 return expires;
1658
1659 raw_spin_lock(&base->lock);
1660 if (base->next_expiry_recalc)
1661 base->next_expiry = __next_timer_interrupt(base);
1662 nextevt = base->next_expiry;
1663 is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
1664
1665
1666
1667
1668
1669
1670 if (time_after(basej, base->clk)) {
1671 if (time_after(nextevt, basej))
1672 base->clk = basej;
1673 else if (time_after(nextevt, base->clk))
1674 base->clk = nextevt;
1675 }
1676
1677 if (time_before_eq(nextevt, basej)) {
1678 expires = basem;
1679 base->is_idle = false;
1680 } else {
1681 if (!is_max_delta)
1682 expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
1683
1684
1685
1686
1687
1688
1689
1690 if ((expires - basem) > TICK_NSEC)
1691 base->is_idle = true;
1692 }
1693 raw_spin_unlock(&base->lock);
1694
1695 return cmp_next_hrtimer_event(basem, expires);
1696}
1697
1698
1699
1700
1701
1702
1703void timer_clear_idle(void)
1704{
1705 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1706
1707
1708
1709
1710
1711
1712
1713 base->is_idle = false;
1714}
1715#endif
1716
1717
1718
1719
1720
1721static inline void __run_timers(struct timer_base *base)
1722{
1723 struct hlist_head heads[LVL_DEPTH];
1724 int levels;
1725
1726 if (time_before(jiffies, base->next_expiry))
1727 return;
1728
1729 timer_base_lock_expiry(base);
1730 raw_spin_lock_irq(&base->lock);
1731
1732 while (time_after_eq(jiffies, base->clk) &&
1733 time_after_eq(jiffies, base->next_expiry)) {
1734 levels = collect_expired_timers(base, heads);
1735
1736
1737
1738
1739
1740 WARN_ON_ONCE(!levels && !base->next_expiry_recalc);
1741 base->clk++;
1742 base->next_expiry = __next_timer_interrupt(base);
1743
1744 while (levels--)
1745 expire_timers(base, heads + levels);
1746 }
1747 raw_spin_unlock_irq(&base->lock);
1748 timer_base_unlock_expiry(base);
1749}
1750
1751
1752
1753
1754static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1755{
1756 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1757
1758 __run_timers(base);
1759 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1760 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1761}
1762
1763
1764
1765
1766static void run_local_timers(void)
1767{
1768 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1769
1770 hrtimer_run_queues();
1771
1772 if (time_before(jiffies, base->next_expiry)) {
1773 if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
1774 return;
1775
1776 base++;
1777 if (time_before(jiffies, base->next_expiry))
1778 return;
1779 }
1780 raise_softirq(TIMER_SOFTIRQ);
1781}
1782
1783
1784
1785
1786
1787void update_process_times(int user_tick)
1788{
1789 struct task_struct *p = current;
1790
1791 PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
1792
1793
1794 account_process_tick(p, user_tick);
1795 run_local_timers();
1796 rcu_sched_clock_irq(user_tick);
1797#ifdef CONFIG_IRQ_WORK
1798 if (in_irq())
1799 irq_work_tick();
1800#endif
1801 scheduler_tick();
1802 if (IS_ENABLED(CONFIG_POSIX_TIMERS))
1803 run_posix_cpu_timers();
1804}
1805
1806
1807
1808
1809
1810struct process_timer {
1811 struct timer_list timer;
1812 struct task_struct *task;
1813};
1814
1815static void process_timeout(struct timer_list *t)
1816{
1817 struct process_timer *timeout = from_timer(timeout, t, timer);
1818
1819 wake_up_process(timeout->task);
1820}
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853signed long __sched schedule_timeout(signed long timeout)
1854{
1855 struct process_timer timer;
1856 unsigned long expire;
1857
1858 switch (timeout)
1859 {
1860 case MAX_SCHEDULE_TIMEOUT:
1861
1862
1863
1864
1865
1866
1867
1868 schedule();
1869 goto out;
1870 default:
1871
1872
1873
1874
1875
1876
1877
1878 if (timeout < 0) {
1879 printk(KERN_ERR "schedule_timeout: wrong timeout "
1880 "value %lx\n", timeout);
1881 dump_stack();
1882 current->state = TASK_RUNNING;
1883 goto out;
1884 }
1885 }
1886
1887 expire = timeout + jiffies;
1888
1889 timer.task = current;
1890 timer_setup_on_stack(&timer.timer, process_timeout, 0);
1891 __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
1892 schedule();
1893 del_singleshot_timer_sync(&timer.timer);
1894
1895
1896 destroy_timer_on_stack(&timer.timer);
1897
1898 timeout = expire - jiffies;
1899
1900 out:
1901 return timeout < 0 ? 0 : timeout;
1902}
1903EXPORT_SYMBOL(schedule_timeout);
1904
1905
1906
1907
1908
1909signed long __sched schedule_timeout_interruptible(signed long timeout)
1910{
1911 __set_current_state(TASK_INTERRUPTIBLE);
1912 return schedule_timeout(timeout);
1913}
1914EXPORT_SYMBOL(schedule_timeout_interruptible);
1915
1916signed long __sched schedule_timeout_killable(signed long timeout)
1917{
1918 __set_current_state(TASK_KILLABLE);
1919 return schedule_timeout(timeout);
1920}
1921EXPORT_SYMBOL(schedule_timeout_killable);
1922
1923signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1924{
1925 __set_current_state(TASK_UNINTERRUPTIBLE);
1926 return schedule_timeout(timeout);
1927}
1928EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1929
1930
1931
1932
1933
1934signed long __sched schedule_timeout_idle(signed long timeout)
1935{
1936 __set_current_state(TASK_IDLE);
1937 return schedule_timeout(timeout);
1938}
1939EXPORT_SYMBOL(schedule_timeout_idle);
1940
1941#ifdef CONFIG_HOTPLUG_CPU
1942static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
1943{
1944 struct timer_list *timer;
1945 int cpu = new_base->cpu;
1946
1947 while (!hlist_empty(head)) {
1948 timer = hlist_entry(head->first, struct timer_list, entry);
1949 detach_timer(timer, false);
1950 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1951 internal_add_timer(new_base, timer);
1952 }
1953}
1954
1955int timers_prepare_cpu(unsigned int cpu)
1956{
1957 struct timer_base *base;
1958 int b;
1959
1960 for (b = 0; b < NR_BASES; b++) {
1961 base = per_cpu_ptr(&timer_bases[b], cpu);
1962 base->clk = jiffies;
1963 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1964 base->is_idle = false;
1965 }
1966 return 0;
1967}
1968
1969int timers_dead_cpu(unsigned int cpu)
1970{
1971 struct timer_base *old_base;
1972 struct timer_base *new_base;
1973 int b, i;
1974
1975 BUG_ON(cpu_online(cpu));
1976
1977 for (b = 0; b < NR_BASES; b++) {
1978 old_base = per_cpu_ptr(&timer_bases[b], cpu);
1979 new_base = get_cpu_ptr(&timer_bases[b]);
1980
1981
1982
1983
1984 raw_spin_lock_irq(&new_base->lock);
1985 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1986
1987
1988
1989
1990
1991 forward_timer_base(new_base);
1992
1993 BUG_ON(old_base->running_timer);
1994
1995 for (i = 0; i < WHEEL_SIZE; i++)
1996 migrate_timer_list(new_base, old_base->vectors + i);
1997
1998 raw_spin_unlock(&old_base->lock);
1999 raw_spin_unlock_irq(&new_base->lock);
2000 put_cpu_ptr(&timer_bases);
2001 }
2002 return 0;
2003}
2004
2005#endif
2006
2007static void __init init_timer_cpu(int cpu)
2008{
2009 struct timer_base *base;
2010 int i;
2011
2012 for (i = 0; i < NR_BASES; i++) {
2013 base = per_cpu_ptr(&timer_bases[i], cpu);
2014 base->cpu = cpu;
2015 raw_spin_lock_init(&base->lock);
2016 base->clk = jiffies;
2017 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
2018 timer_base_init_expiry_lock(base);
2019 }
2020}
2021
2022static void __init init_timer_cpus(void)
2023{
2024 int cpu;
2025
2026 for_each_possible_cpu(cpu)
2027 init_timer_cpu(cpu);
2028}
2029
2030void __init init_timers(void)
2031{
2032 init_timer_cpus();
2033 posix_cputimers_init_work();
2034 open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
2035}
2036
2037
2038
2039
2040
2041void msleep(unsigned int msecs)
2042{
2043 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2044
2045 while (timeout)
2046 timeout = schedule_timeout_uninterruptible(timeout);
2047}
2048
2049EXPORT_SYMBOL(msleep);
2050
2051
2052
2053
2054
2055unsigned long msleep_interruptible(unsigned int msecs)
2056{
2057 unsigned long timeout = msecs_to_jiffies(msecs) + 1;
2058
2059 while (timeout && !signal_pending(current))
2060 timeout = schedule_timeout_interruptible(timeout);
2061 return jiffies_to_msecs(timeout);
2062}
2063
2064EXPORT_SYMBOL(msleep_interruptible);
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077void __sched usleep_range(unsigned long min, unsigned long max)
2078{
2079 ktime_t exp = ktime_add_us(ktime_get(), min);
2080 u64 delta = (u64)(max - min) * NSEC_PER_USEC;
2081
2082 for (;;) {
2083 __set_current_state(TASK_UNINTERRUPTIBLE);
2084
2085 if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
2086 break;
2087 }
2088}
2089EXPORT_SYMBOL(usleep_range);
2090