1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/export.h>
29#include <linux/kernel.h>
30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/signal.h>
33#include <linux/completion.h>
34#include <linux/workqueue.h>
35#include <linux/slab.h>
36#include <linux/cpu.h>
37#include <linux/notifier.h>
38#include <linux/kthread.h>
39#include <linux/hardirq.h>
40#include <linux/mempolicy.h>
41#include <linux/freezer.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51#include <linux/sched/isolation.h>
52#include <linux/nmi.h>
53#include <linux/kvm_para.h>
54
55#include "workqueue_internal.h"
56
57enum {
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 POOL_MANAGER_ACTIVE = 1 << 0,
75 POOL_DISASSOCIATED = 1 << 2,
76
77
78 WORKER_DIE = 1 << 1,
79 WORKER_IDLE = 1 << 2,
80 WORKER_PREP = 1 << 3,
81 WORKER_CPU_INTENSIVE = 1 << 6,
82 WORKER_UNBOUND = 1 << 7,
83 WORKER_REBOUND = 1 << 8,
84
85 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
86 WORKER_UNBOUND | WORKER_REBOUND,
87
88 NR_STD_WORKER_POOLS = 2,
89
90 UNBOUND_POOL_HASH_ORDER = 6,
91 BUSY_WORKER_HASH_ORDER = 6,
92
93 MAX_IDLE_WORKERS_RATIO = 4,
94 IDLE_WORKER_TIMEOUT = 300 * HZ,
95
96 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
97
98
99 MAYDAY_INTERVAL = HZ / 10,
100 CREATE_COOLDOWN = HZ,
101
102
103
104
105
106 RESCUER_NICE_LEVEL = MIN_NICE,
107 HIGHPRI_NICE_LEVEL = MIN_NICE,
108
109 WQ_NAME_LEN = 24,
110};
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148struct worker_pool {
149 raw_spinlock_t lock;
150 int cpu;
151 int node;
152 int id;
153 unsigned int flags;
154
155 unsigned long watchdog_ts;
156
157 struct list_head worklist;
158
159 int nr_workers;
160 int nr_idle;
161
162 struct list_head idle_list;
163 struct timer_list idle_timer;
164 struct timer_list mayday_timer;
165
166
167 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
168
169
170 struct worker *manager;
171 struct list_head workers;
172 struct completion *detach_completion;
173
174 struct ida worker_ida;
175
176 struct workqueue_attrs *attrs;
177 struct hlist_node hash_node;
178 int refcnt;
179
180
181
182
183
184
185 atomic_t nr_running ____cacheline_aligned_in_smp;
186
187
188
189
190
191 struct rcu_head rcu;
192} ____cacheline_aligned_in_smp;
193
194
195
196
197
198
199
200struct pool_workqueue {
201 struct worker_pool *pool;
202 struct workqueue_struct *wq;
203 int work_color;
204 int flush_color;
205 int refcnt;
206 int nr_in_flight[WORK_NR_COLORS];
207
208 int nr_active;
209 int max_active;
210 struct list_head delayed_works;
211 struct list_head pwqs_node;
212 struct list_head mayday_node;
213
214
215
216
217
218
219
220 struct work_struct unbound_release_work;
221 struct rcu_head rcu;
222} __aligned(1 << WORK_STRUCT_FLAG_BITS);
223
224
225
226
227struct wq_flusher {
228 struct list_head list;
229 int flush_color;
230 struct completion done;
231};
232
233struct wq_device;
234
235
236
237
238
239struct workqueue_struct {
240 struct list_head pwqs;
241 struct list_head list;
242
243 struct mutex mutex;
244 int work_color;
245 int flush_color;
246 atomic_t nr_pwqs_to_flush;
247 struct wq_flusher *first_flusher;
248 struct list_head flusher_queue;
249 struct list_head flusher_overflow;
250
251 struct list_head maydays;
252 struct worker *rescuer;
253
254 int nr_drainers;
255 int saved_max_active;
256
257 struct workqueue_attrs *unbound_attrs;
258 struct pool_workqueue *dfl_pwq;
259
260#ifdef CONFIG_SYSFS
261 struct wq_device *wq_dev;
262#endif
263#ifdef CONFIG_LOCKDEP
264 char *lock_name;
265 struct lock_class_key key;
266 struct lockdep_map lockdep_map;
267#endif
268 char name[WQ_NAME_LEN];
269
270
271
272
273
274
275 struct rcu_head rcu;
276
277
278 unsigned int flags ____cacheline_aligned;
279 struct pool_workqueue __percpu *cpu_pwqs;
280 struct pool_workqueue __rcu *numa_pwq_tbl[];
281};
282
283static struct kmem_cache *pwq_cache;
284
285static cpumask_var_t *wq_numa_possible_cpumask;
286
287
288static bool wq_disable_numa;
289module_param_named(disable_numa, wq_disable_numa, bool, 0444);
290
291
292static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
293module_param_named(power_efficient, wq_power_efficient, bool, 0444);
294
295static bool wq_online;
296
297static bool wq_numa_enabled;
298
299
300static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
301
302static DEFINE_MUTEX(wq_pool_mutex);
303static DEFINE_MUTEX(wq_pool_attach_mutex);
304static DEFINE_RAW_SPINLOCK(wq_mayday_lock);
305
306static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
307
308static LIST_HEAD(workqueues);
309static bool workqueue_freezing;
310
311
312static cpumask_var_t wq_unbound_cpumask;
313
314
315static DEFINE_PER_CPU(int, wq_rr_cpu_last);
316
317
318
319
320
321
322#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
323static bool wq_debug_force_rr_cpu = true;
324#else
325static bool wq_debug_force_rr_cpu = false;
326#endif
327module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
328
329
330static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
331
332static DEFINE_IDR(worker_pool_idr);
333
334
335static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
336
337
338static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
339
340
341static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
342
343struct workqueue_struct *system_wq __read_mostly;
344EXPORT_SYMBOL(system_wq);
345struct workqueue_struct *system_highpri_wq __read_mostly;
346EXPORT_SYMBOL_GPL(system_highpri_wq);
347struct workqueue_struct *system_long_wq __read_mostly;
348EXPORT_SYMBOL_GPL(system_long_wq);
349struct workqueue_struct *system_unbound_wq __read_mostly;
350EXPORT_SYMBOL_GPL(system_unbound_wq);
351struct workqueue_struct *system_freezable_wq __read_mostly;
352EXPORT_SYMBOL_GPL(system_freezable_wq);
353struct workqueue_struct *system_power_efficient_wq __read_mostly;
354EXPORT_SYMBOL_GPL(system_power_efficient_wq);
355struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
356EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
357
358static int worker_thread(void *__worker);
359static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
360static void show_pwq(struct pool_workqueue *pwq);
361
362#define CREATE_TRACE_POINTS
363#include <trace/events/workqueue.h>
364
365#define assert_rcu_or_pool_mutex() \
366 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
367 !lockdep_is_held(&wq_pool_mutex), \
368 "RCU or wq_pool_mutex should be held")
369
370#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
372 !lockdep_is_held(&wq->mutex) && \
373 !lockdep_is_held(&wq_pool_mutex), \
374 "RCU, wq->mutex or wq_pool_mutex should be held")
375
376#define for_each_cpu_worker_pool(pool, cpu) \
377 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
378 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
379 (pool)++)
380
381
382
383
384
385
386
387
388
389
390
391
392
393#define for_each_pool(pool, pi) \
394 idr_for_each_entry(&worker_pool_idr, pool, pi) \
395 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
396 else
397
398
399
400
401
402
403
404
405
406
407
408#define for_each_pool_worker(worker, pool) \
409 list_for_each_entry((worker), &(pool)->workers, node) \
410 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
411 else
412
413
414
415
416
417
418
419
420
421
422
423
424
425#define for_each_pwq(pwq, wq) \
426 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
427 lockdep_is_held(&(wq->mutex)))
428
429#ifdef CONFIG_DEBUG_OBJECTS_WORK
430
431static const struct debug_obj_descr work_debug_descr;
432
433static void *work_debug_hint(void *addr)
434{
435 return ((struct work_struct *) addr)->func;
436}
437
438static bool work_is_static_object(void *addr)
439{
440 struct work_struct *work = addr;
441
442 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
443}
444
445
446
447
448
449static bool work_fixup_init(void *addr, enum debug_obj_state state)
450{
451 struct work_struct *work = addr;
452
453 switch (state) {
454 case ODEBUG_STATE_ACTIVE:
455 cancel_work_sync(work);
456 debug_object_init(work, &work_debug_descr);
457 return true;
458 default:
459 return false;
460 }
461}
462
463
464
465
466
467static bool work_fixup_free(void *addr, enum debug_obj_state state)
468{
469 struct work_struct *work = addr;
470
471 switch (state) {
472 case ODEBUG_STATE_ACTIVE:
473 cancel_work_sync(work);
474 debug_object_free(work, &work_debug_descr);
475 return true;
476 default:
477 return false;
478 }
479}
480
481static const struct debug_obj_descr work_debug_descr = {
482 .name = "work_struct",
483 .debug_hint = work_debug_hint,
484 .is_static_object = work_is_static_object,
485 .fixup_init = work_fixup_init,
486 .fixup_free = work_fixup_free,
487};
488
489static inline void debug_work_activate(struct work_struct *work)
490{
491 debug_object_activate(work, &work_debug_descr);
492}
493
494static inline void debug_work_deactivate(struct work_struct *work)
495{
496 debug_object_deactivate(work, &work_debug_descr);
497}
498
499void __init_work(struct work_struct *work, int onstack)
500{
501 if (onstack)
502 debug_object_init_on_stack(work, &work_debug_descr);
503 else
504 debug_object_init(work, &work_debug_descr);
505}
506EXPORT_SYMBOL_GPL(__init_work);
507
508void destroy_work_on_stack(struct work_struct *work)
509{
510 debug_object_free(work, &work_debug_descr);
511}
512EXPORT_SYMBOL_GPL(destroy_work_on_stack);
513
514void destroy_delayed_work_on_stack(struct delayed_work *work)
515{
516 destroy_timer_on_stack(&work->timer);
517 debug_object_free(&work->work, &work_debug_descr);
518}
519EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
520
521#else
522static inline void debug_work_activate(struct work_struct *work) { }
523static inline void debug_work_deactivate(struct work_struct *work) { }
524#endif
525
526
527
528
529
530
531
532
533static int worker_pool_assign_id(struct worker_pool *pool)
534{
535 int ret;
536
537 lockdep_assert_held(&wq_pool_mutex);
538
539 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
540 GFP_KERNEL);
541 if (ret >= 0) {
542 pool->id = ret;
543 return 0;
544 }
545 return ret;
546}
547
548
549
550
551
552
553
554
555
556
557
558
559
560static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
561 int node)
562{
563 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
564
565
566
567
568
569
570
571 if (unlikely(node == NUMA_NO_NODE))
572 return wq->dfl_pwq;
573
574 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
575}
576
577static unsigned int work_color_to_flags(int color)
578{
579 return color << WORK_STRUCT_COLOR_SHIFT;
580}
581
582static int get_work_color(struct work_struct *work)
583{
584 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
585 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
586}
587
588static int work_next_color(int color)
589{
590 return (color + 1) % WORK_NR_COLORS;
591}
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613static inline void set_work_data(struct work_struct *work, unsigned long data,
614 unsigned long flags)
615{
616 WARN_ON_ONCE(!work_pending(work));
617 atomic_long_set(&work->data, data | flags | work_static(work));
618}
619
620static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
621 unsigned long extra_flags)
622{
623 set_work_data(work, (unsigned long)pwq,
624 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
625}
626
627static void set_work_pool_and_keep_pending(struct work_struct *work,
628 int pool_id)
629{
630 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
631 WORK_STRUCT_PENDING);
632}
633
634static void set_work_pool_and_clear_pending(struct work_struct *work,
635 int pool_id)
636{
637
638
639
640
641
642
643 smp_wmb();
644 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673 smp_mb();
674}
675
676static void clear_work_data(struct work_struct *work)
677{
678 smp_wmb();
679 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
680}
681
682static struct pool_workqueue *get_work_pwq(struct work_struct *work)
683{
684 unsigned long data = atomic_long_read(&work->data);
685
686 if (data & WORK_STRUCT_PWQ)
687 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
688 else
689 return NULL;
690}
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static struct worker_pool *get_work_pool(struct work_struct *work)
708{
709 unsigned long data = atomic_long_read(&work->data);
710 int pool_id;
711
712 assert_rcu_or_pool_mutex();
713
714 if (data & WORK_STRUCT_PWQ)
715 return ((struct pool_workqueue *)
716 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
717
718 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
719 if (pool_id == WORK_OFFQ_POOL_NONE)
720 return NULL;
721
722 return idr_find(&worker_pool_idr, pool_id);
723}
724
725
726
727
728
729
730
731
732static int get_work_pool_id(struct work_struct *work)
733{
734 unsigned long data = atomic_long_read(&work->data);
735
736 if (data & WORK_STRUCT_PWQ)
737 return ((struct pool_workqueue *)
738 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
739
740 return data >> WORK_OFFQ_POOL_SHIFT;
741}
742
743static void mark_work_canceling(struct work_struct *work)
744{
745 unsigned long pool_id = get_work_pool_id(work);
746
747 pool_id <<= WORK_OFFQ_POOL_SHIFT;
748 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
749}
750
751static bool work_is_canceling(struct work_struct *work)
752{
753 unsigned long data = atomic_long_read(&work->data);
754
755 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
756}
757
758
759
760
761
762
763
764static bool __need_more_worker(struct worker_pool *pool)
765{
766 return !atomic_read(&pool->nr_running);
767}
768
769
770
771
772
773
774
775
776
777static bool need_more_worker(struct worker_pool *pool)
778{
779 return !list_empty(&pool->worklist) && __need_more_worker(pool);
780}
781
782
783static bool may_start_working(struct worker_pool *pool)
784{
785 return pool->nr_idle;
786}
787
788
789static bool keep_working(struct worker_pool *pool)
790{
791 return !list_empty(&pool->worklist) &&
792 atomic_read(&pool->nr_running) <= 1;
793}
794
795
796static bool need_to_create_worker(struct worker_pool *pool)
797{
798 return need_more_worker(pool) && !may_start_working(pool);
799}
800
801
802static bool too_many_workers(struct worker_pool *pool)
803{
804 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805 int nr_idle = pool->nr_idle + managing;
806 int nr_busy = pool->nr_workers - nr_idle;
807
808 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
809}
810
811
812
813
814
815
816static struct worker *first_idle_worker(struct worker_pool *pool)
817{
818 if (unlikely(list_empty(&pool->idle_list)))
819 return NULL;
820
821 return list_first_entry(&pool->idle_list, struct worker, entry);
822}
823
824
825
826
827
828
829
830
831
832
833static void wake_up_worker(struct worker_pool *pool)
834{
835 struct worker *worker = first_idle_worker(pool);
836
837 if (likely(worker))
838 wake_up_process(worker->task);
839}
840
841
842
843
844
845
846
847void wq_worker_running(struct task_struct *task)
848{
849 struct worker *worker = kthread_data(task);
850
851 if (!worker->sleeping)
852 return;
853 if (!(worker->flags & WORKER_NOT_RUNNING))
854 atomic_inc(&worker->pool->nr_running);
855 worker->sleeping = 0;
856}
857
858
859
860
861
862
863
864
865
866void wq_worker_sleeping(struct task_struct *task)
867{
868 struct worker *next, *worker = kthread_data(task);
869 struct worker_pool *pool;
870
871
872
873
874
875
876 if (worker->flags & WORKER_NOT_RUNNING)
877 return;
878
879 pool = worker->pool;
880
881
882 if (worker->sleeping)
883 return;
884
885 worker->sleeping = 1;
886 raw_spin_lock_irq(&pool->lock);
887
888
889
890
891
892
893
894
895
896
897
898
899 if (atomic_dec_and_test(&pool->nr_running) &&
900 !list_empty(&pool->worklist)) {
901 next = first_idle_worker(pool);
902 if (next)
903 wake_up_process(next->task);
904 }
905 raw_spin_unlock_irq(&pool->lock);
906}
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932work_func_t wq_worker_last_func(struct task_struct *task)
933{
934 struct worker *worker = kthread_data(task);
935
936 return worker->last_func;
937}
938
939
940
941
942
943
944
945
946
947
948
949static inline void worker_set_flags(struct worker *worker, unsigned int flags)
950{
951 struct worker_pool *pool = worker->pool;
952
953 WARN_ON_ONCE(worker->task != current);
954
955
956 if ((flags & WORKER_NOT_RUNNING) &&
957 !(worker->flags & WORKER_NOT_RUNNING)) {
958 atomic_dec(&pool->nr_running);
959 }
960
961 worker->flags |= flags;
962}
963
964
965
966
967
968
969
970
971
972
973
974static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
975{
976 struct worker_pool *pool = worker->pool;
977 unsigned int oflags = worker->flags;
978
979 WARN_ON_ONCE(worker->task != current);
980
981 worker->flags &= ~flags;
982
983
984
985
986
987
988 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
989 if (!(worker->flags & WORKER_NOT_RUNNING))
990 atomic_inc(&pool->nr_running);
991}
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026static struct worker *find_worker_executing_work(struct worker_pool *pool,
1027 struct work_struct *work)
1028{
1029 struct worker *worker;
1030
1031 hash_for_each_possible(pool->busy_hash, worker, hentry,
1032 (unsigned long)work)
1033 if (worker->current_work == work &&
1034 worker->current_func == work->func)
1035 return worker;
1036
1037 return NULL;
1038}
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057static void move_linked_works(struct work_struct *work, struct list_head *head,
1058 struct work_struct **nextp)
1059{
1060 struct work_struct *n;
1061
1062
1063
1064
1065
1066 list_for_each_entry_safe_from(work, n, NULL, entry) {
1067 list_move_tail(&work->entry, head);
1068 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1069 break;
1070 }
1071
1072
1073
1074
1075
1076
1077 if (nextp)
1078 *nextp = n;
1079}
1080
1081
1082
1083
1084
1085
1086
1087
1088static void get_pwq(struct pool_workqueue *pwq)
1089{
1090 lockdep_assert_held(&pwq->pool->lock);
1091 WARN_ON_ONCE(pwq->refcnt <= 0);
1092 pwq->refcnt++;
1093}
1094
1095
1096
1097
1098
1099
1100
1101
1102static void put_pwq(struct pool_workqueue *pwq)
1103{
1104 lockdep_assert_held(&pwq->pool->lock);
1105 if (likely(--pwq->refcnt))
1106 return;
1107 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1108 return;
1109
1110
1111
1112
1113
1114
1115
1116
1117 schedule_work(&pwq->unbound_release_work);
1118}
1119
1120
1121
1122
1123
1124
1125
1126static void put_pwq_unlocked(struct pool_workqueue *pwq)
1127{
1128 if (pwq) {
1129
1130
1131
1132
1133 raw_spin_lock_irq(&pwq->pool->lock);
1134 put_pwq(pwq);
1135 raw_spin_unlock_irq(&pwq->pool->lock);
1136 }
1137}
1138
1139static void pwq_activate_delayed_work(struct work_struct *work)
1140{
1141 struct pool_workqueue *pwq = get_work_pwq(work);
1142
1143 trace_workqueue_activate_work(work);
1144 if (list_empty(&pwq->pool->worklist))
1145 pwq->pool->watchdog_ts = jiffies;
1146 move_linked_works(work, &pwq->pool->worklist, NULL);
1147 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1148 pwq->nr_active++;
1149}
1150
1151static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1152{
1153 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1154 struct work_struct, entry);
1155
1156 pwq_activate_delayed_work(work);
1157}
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1171{
1172
1173 if (color == WORK_NO_COLOR)
1174 goto out_put;
1175
1176 pwq->nr_in_flight[color]--;
1177
1178 pwq->nr_active--;
1179 if (!list_empty(&pwq->delayed_works)) {
1180
1181 if (pwq->nr_active < pwq->max_active)
1182 pwq_activate_first_delayed(pwq);
1183 }
1184
1185
1186 if (likely(pwq->flush_color != color))
1187 goto out_put;
1188
1189
1190 if (pwq->nr_in_flight[color])
1191 goto out_put;
1192
1193
1194 pwq->flush_color = -1;
1195
1196
1197
1198
1199
1200 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1201 complete(&pwq->wq->first_flusher->done);
1202out_put:
1203 put_pwq(pwq);
1204}
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1237 unsigned long *flags)
1238{
1239 struct worker_pool *pool;
1240 struct pool_workqueue *pwq;
1241
1242 local_irq_save(*flags);
1243
1244
1245 if (is_dwork) {
1246 struct delayed_work *dwork = to_delayed_work(work);
1247
1248
1249
1250
1251
1252
1253 if (likely(del_timer(&dwork->timer)))
1254 return 1;
1255 }
1256
1257
1258 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1259 return 0;
1260
1261 rcu_read_lock();
1262
1263
1264
1265
1266 pool = get_work_pool(work);
1267 if (!pool)
1268 goto fail;
1269
1270 raw_spin_lock(&pool->lock);
1271
1272
1273
1274
1275
1276
1277
1278
1279 pwq = get_work_pwq(work);
1280 if (pwq && pwq->pool == pool) {
1281 debug_work_deactivate(work);
1282
1283
1284
1285
1286
1287
1288
1289
1290 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1291 pwq_activate_delayed_work(work);
1292
1293 list_del_init(&work->entry);
1294 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1295
1296
1297 set_work_pool_and_keep_pending(work, pool->id);
1298
1299 raw_spin_unlock(&pool->lock);
1300 rcu_read_unlock();
1301 return 1;
1302 }
1303 raw_spin_unlock(&pool->lock);
1304fail:
1305 rcu_read_unlock();
1306 local_irq_restore(*flags);
1307 if (work_is_canceling(work))
1308 return -ENOENT;
1309 cpu_relax();
1310 return -EAGAIN;
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1327 struct list_head *head, unsigned int extra_flags)
1328{
1329 struct worker_pool *pool = pwq->pool;
1330
1331
1332 kasan_record_aux_stack(work);
1333
1334
1335 set_work_pwq(work, pwq, extra_flags);
1336 list_add_tail(&work->entry, head);
1337 get_pwq(pwq);
1338
1339
1340
1341
1342
1343
1344 smp_mb();
1345
1346 if (__need_more_worker(pool))
1347 wake_up_worker(pool);
1348}
1349
1350
1351
1352
1353
1354static bool is_chained_work(struct workqueue_struct *wq)
1355{
1356 struct worker *worker;
1357
1358 worker = current_wq_worker();
1359
1360
1361
1362
1363 return worker && worker->current_pwq->wq == wq;
1364}
1365
1366
1367
1368
1369
1370
1371static int wq_select_unbound_cpu(int cpu)
1372{
1373 static bool printed_dbg_warning;
1374 int new_cpu;
1375
1376 if (likely(!wq_debug_force_rr_cpu)) {
1377 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1378 return cpu;
1379 } else if (!printed_dbg_warning) {
1380 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1381 printed_dbg_warning = true;
1382 }
1383
1384 if (cpumask_empty(wq_unbound_cpumask))
1385 return cpu;
1386
1387 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1388 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1389 if (unlikely(new_cpu >= nr_cpu_ids)) {
1390 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1391 if (unlikely(new_cpu >= nr_cpu_ids))
1392 return cpu;
1393 }
1394 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1395
1396 return new_cpu;
1397}
1398
1399static void __queue_work(int cpu, struct workqueue_struct *wq,
1400 struct work_struct *work)
1401{
1402 struct pool_workqueue *pwq;
1403 struct worker_pool *last_pool;
1404 struct list_head *worklist;
1405 unsigned int work_flags;
1406 unsigned int req_cpu = cpu;
1407
1408
1409
1410
1411
1412
1413
1414 lockdep_assert_irqs_disabled();
1415
1416
1417
1418 if (unlikely(wq->flags & __WQ_DRAINING) &&
1419 WARN_ON_ONCE(!is_chained_work(wq)))
1420 return;
1421 rcu_read_lock();
1422retry:
1423
1424 if (wq->flags & WQ_UNBOUND) {
1425 if (req_cpu == WORK_CPU_UNBOUND)
1426 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1427 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1428 } else {
1429 if (req_cpu == WORK_CPU_UNBOUND)
1430 cpu = raw_smp_processor_id();
1431 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1432 }
1433
1434
1435
1436
1437
1438
1439 last_pool = get_work_pool(work);
1440 if (last_pool && last_pool != pwq->pool) {
1441 struct worker *worker;
1442
1443 raw_spin_lock(&last_pool->lock);
1444
1445 worker = find_worker_executing_work(last_pool, work);
1446
1447 if (worker && worker->current_pwq->wq == wq) {
1448 pwq = worker->current_pwq;
1449 } else {
1450
1451 raw_spin_unlock(&last_pool->lock);
1452 raw_spin_lock(&pwq->pool->lock);
1453 }
1454 } else {
1455 raw_spin_lock(&pwq->pool->lock);
1456 }
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466 if (unlikely(!pwq->refcnt)) {
1467 if (wq->flags & WQ_UNBOUND) {
1468 raw_spin_unlock(&pwq->pool->lock);
1469 cpu_relax();
1470 goto retry;
1471 }
1472
1473 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1474 wq->name, cpu);
1475 }
1476
1477
1478 trace_workqueue_queue_work(req_cpu, pwq, work);
1479
1480 if (WARN_ON(!list_empty(&work->entry)))
1481 goto out;
1482
1483 pwq->nr_in_flight[pwq->work_color]++;
1484 work_flags = work_color_to_flags(pwq->work_color);
1485
1486 if (likely(pwq->nr_active < pwq->max_active)) {
1487 trace_workqueue_activate_work(work);
1488 pwq->nr_active++;
1489 worklist = &pwq->pool->worklist;
1490 if (list_empty(worklist))
1491 pwq->pool->watchdog_ts = jiffies;
1492 } else {
1493 work_flags |= WORK_STRUCT_DELAYED;
1494 worklist = &pwq->delayed_works;
1495 }
1496
1497 debug_work_activate(work);
1498 insert_work(pwq, work, worklist, work_flags);
1499
1500out:
1501 raw_spin_unlock(&pwq->pool->lock);
1502 rcu_read_unlock();
1503}
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516bool queue_work_on(int cpu, struct workqueue_struct *wq,
1517 struct work_struct *work)
1518{
1519 bool ret = false;
1520 unsigned long flags;
1521
1522 local_irq_save(flags);
1523
1524 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1525 __queue_work(cpu, wq, work);
1526 ret = true;
1527 }
1528
1529 local_irq_restore(flags);
1530 return ret;
1531}
1532EXPORT_SYMBOL(queue_work_on);
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543static int workqueue_select_cpu_near(int node)
1544{
1545 int cpu;
1546
1547
1548 if (!wq_numa_enabled)
1549 return WORK_CPU_UNBOUND;
1550
1551
1552 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1553 return WORK_CPU_UNBOUND;
1554
1555
1556 cpu = raw_smp_processor_id();
1557 if (node == cpu_to_node(cpu))
1558 return cpu;
1559
1560
1561 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1562
1563
1564 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1565}
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587bool queue_work_node(int node, struct workqueue_struct *wq,
1588 struct work_struct *work)
1589{
1590 unsigned long flags;
1591 bool ret = false;
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1603
1604 local_irq_save(flags);
1605
1606 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1607 int cpu = workqueue_select_cpu_near(node);
1608
1609 __queue_work(cpu, wq, work);
1610 ret = true;
1611 }
1612
1613 local_irq_restore(flags);
1614 return ret;
1615}
1616EXPORT_SYMBOL_GPL(queue_work_node);
1617
1618void delayed_work_timer_fn(struct timer_list *t)
1619{
1620 struct delayed_work *dwork = from_timer(dwork, t, timer);
1621
1622
1623 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1624}
1625EXPORT_SYMBOL(delayed_work_timer_fn);
1626
1627static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1628 struct delayed_work *dwork, unsigned long delay)
1629{
1630 struct timer_list *timer = &dwork->timer;
1631 struct work_struct *work = &dwork->work;
1632
1633 WARN_ON_ONCE(!wq);
1634 WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn);
1635 WARN_ON_ONCE(timer_pending(timer));
1636 WARN_ON_ONCE(!list_empty(&work->entry));
1637
1638
1639
1640
1641
1642
1643
1644 if (!delay) {
1645 __queue_work(cpu, wq, &dwork->work);
1646 return;
1647 }
1648
1649 dwork->wq = wq;
1650 dwork->cpu = cpu;
1651 timer->expires = jiffies + delay;
1652
1653 if (unlikely(cpu != WORK_CPU_UNBOUND))
1654 add_timer_on(timer, cpu);
1655 else
1656 add_timer(timer);
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1671 struct delayed_work *dwork, unsigned long delay)
1672{
1673 struct work_struct *work = &dwork->work;
1674 bool ret = false;
1675 unsigned long flags;
1676
1677
1678 local_irq_save(flags);
1679
1680 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1681 __queue_delayed_work(cpu, wq, dwork, delay);
1682 ret = true;
1683 }
1684
1685 local_irq_restore(flags);
1686 return ret;
1687}
1688EXPORT_SYMBOL(queue_delayed_work_on);
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1709 struct delayed_work *dwork, unsigned long delay)
1710{
1711 unsigned long flags;
1712 int ret;
1713
1714 do {
1715 ret = try_to_grab_pending(&dwork->work, true, &flags);
1716 } while (unlikely(ret == -EAGAIN));
1717
1718 if (likely(ret >= 0)) {
1719 __queue_delayed_work(cpu, wq, dwork, delay);
1720 local_irq_restore(flags);
1721 }
1722
1723
1724 return ret;
1725}
1726EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1727
1728static void rcu_work_rcufn(struct rcu_head *rcu)
1729{
1730 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1731
1732
1733 local_irq_disable();
1734 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1735 local_irq_enable();
1736}
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1749{
1750 struct work_struct *work = &rwork->work;
1751
1752 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1753 rwork->wq = wq;
1754 call_rcu(&rwork->rcu, rcu_work_rcufn);
1755 return true;
1756 }
1757
1758 return false;
1759}
1760EXPORT_SYMBOL(queue_rcu_work);
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772static void worker_enter_idle(struct worker *worker)
1773{
1774 struct worker_pool *pool = worker->pool;
1775
1776 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1777 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1778 (worker->hentry.next || worker->hentry.pprev)))
1779 return;
1780
1781
1782 worker->flags |= WORKER_IDLE;
1783 pool->nr_idle++;
1784 worker->last_active = jiffies;
1785
1786
1787 list_add(&worker->entry, &pool->idle_list);
1788
1789 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1790 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1791
1792
1793
1794
1795
1796
1797
1798 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1799 pool->nr_workers == pool->nr_idle &&
1800 atomic_read(&pool->nr_running));
1801}
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812static void worker_leave_idle(struct worker *worker)
1813{
1814 struct worker_pool *pool = worker->pool;
1815
1816 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1817 return;
1818 worker_clr_flags(worker, WORKER_IDLE);
1819 pool->nr_idle--;
1820 list_del_init(&worker->entry);
1821}
1822
1823static struct worker *alloc_worker(int node)
1824{
1825 struct worker *worker;
1826
1827 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1828 if (worker) {
1829 INIT_LIST_HEAD(&worker->entry);
1830 INIT_LIST_HEAD(&worker->scheduled);
1831 INIT_LIST_HEAD(&worker->node);
1832
1833 worker->flags = WORKER_PREP;
1834 }
1835 return worker;
1836}
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847static void worker_attach_to_pool(struct worker *worker,
1848 struct worker_pool *pool)
1849{
1850 mutex_lock(&wq_pool_attach_mutex);
1851
1852
1853
1854
1855
1856
1857 if (pool->flags & POOL_DISASSOCIATED)
1858 worker->flags |= WORKER_UNBOUND;
1859 else
1860 kthread_set_per_cpu(worker->task, pool->cpu);
1861
1862 if (worker->rescue_wq)
1863 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1864
1865 list_add_tail(&worker->node, &pool->workers);
1866 worker->pool = pool;
1867
1868 mutex_unlock(&wq_pool_attach_mutex);
1869}
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879static void worker_detach_from_pool(struct worker *worker)
1880{
1881 struct worker_pool *pool = worker->pool;
1882 struct completion *detach_completion = NULL;
1883
1884 mutex_lock(&wq_pool_attach_mutex);
1885
1886 kthread_set_per_cpu(worker->task, -1);
1887 list_del(&worker->node);
1888 worker->pool = NULL;
1889
1890 if (list_empty(&pool->workers))
1891 detach_completion = pool->detach_completion;
1892 mutex_unlock(&wq_pool_attach_mutex);
1893
1894
1895 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1896
1897 if (detach_completion)
1898 complete(detach_completion);
1899}
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913static struct worker *create_worker(struct worker_pool *pool)
1914{
1915 struct worker *worker = NULL;
1916 int id = -1;
1917 char id_buf[16];
1918
1919
1920 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1921 if (id < 0)
1922 goto fail;
1923
1924 worker = alloc_worker(pool->node);
1925 if (!worker)
1926 goto fail;
1927
1928 worker->id = id;
1929
1930 if (pool->cpu >= 0)
1931 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1932 pool->attrs->nice < 0 ? "H" : "");
1933 else
1934 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1935
1936 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1937 "kworker/%s", id_buf);
1938 if (IS_ERR(worker->task))
1939 goto fail;
1940
1941 set_user_nice(worker->task, pool->attrs->nice);
1942 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1943
1944
1945 worker_attach_to_pool(worker, pool);
1946
1947
1948 raw_spin_lock_irq(&pool->lock);
1949 worker->pool->nr_workers++;
1950 worker_enter_idle(worker);
1951 wake_up_process(worker->task);
1952 raw_spin_unlock_irq(&pool->lock);
1953
1954 return worker;
1955
1956fail:
1957 if (id >= 0)
1958 ida_simple_remove(&pool->worker_ida, id);
1959 kfree(worker);
1960 return NULL;
1961}
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973static void destroy_worker(struct worker *worker)
1974{
1975 struct worker_pool *pool = worker->pool;
1976
1977 lockdep_assert_held(&pool->lock);
1978
1979
1980 if (WARN_ON(worker->current_work) ||
1981 WARN_ON(!list_empty(&worker->scheduled)) ||
1982 WARN_ON(!(worker->flags & WORKER_IDLE)))
1983 return;
1984
1985 pool->nr_workers--;
1986 pool->nr_idle--;
1987
1988 list_del_init(&worker->entry);
1989 worker->flags |= WORKER_DIE;
1990 wake_up_process(worker->task);
1991}
1992
1993static void idle_worker_timeout(struct timer_list *t)
1994{
1995 struct worker_pool *pool = from_timer(pool, t, idle_timer);
1996
1997 raw_spin_lock_irq(&pool->lock);
1998
1999 while (too_many_workers(pool)) {
2000 struct worker *worker;
2001 unsigned long expires;
2002
2003
2004 worker = list_entry(pool->idle_list.prev, struct worker, entry);
2005 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2006
2007 if (time_before(jiffies, expires)) {
2008 mod_timer(&pool->idle_timer, expires);
2009 break;
2010 }
2011
2012 destroy_worker(worker);
2013 }
2014
2015 raw_spin_unlock_irq(&pool->lock);
2016}
2017
2018static void send_mayday(struct work_struct *work)
2019{
2020 struct pool_workqueue *pwq = get_work_pwq(work);
2021 struct workqueue_struct *wq = pwq->wq;
2022
2023 lockdep_assert_held(&wq_mayday_lock);
2024
2025 if (!wq->rescuer)
2026 return;
2027
2028
2029 if (list_empty(&pwq->mayday_node)) {
2030
2031
2032
2033
2034
2035 get_pwq(pwq);
2036 list_add_tail(&pwq->mayday_node, &wq->maydays);
2037 wake_up_process(wq->rescuer->task);
2038 }
2039}
2040
2041static void pool_mayday_timeout(struct timer_list *t)
2042{
2043 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
2044 struct work_struct *work;
2045
2046 raw_spin_lock_irq(&pool->lock);
2047 raw_spin_lock(&wq_mayday_lock);
2048
2049 if (need_to_create_worker(pool)) {
2050
2051
2052
2053
2054
2055
2056 list_for_each_entry(work, &pool->worklist, entry)
2057 send_mayday(work);
2058 }
2059
2060 raw_spin_unlock(&wq_mayday_lock);
2061 raw_spin_unlock_irq(&pool->lock);
2062
2063 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084static void maybe_create_worker(struct worker_pool *pool)
2085__releases(&pool->lock)
2086__acquires(&pool->lock)
2087{
2088restart:
2089 raw_spin_unlock_irq(&pool->lock);
2090
2091
2092 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
2093
2094 while (true) {
2095 if (create_worker(pool) || !need_to_create_worker(pool))
2096 break;
2097
2098 schedule_timeout_interruptible(CREATE_COOLDOWN);
2099
2100 if (!need_to_create_worker(pool))
2101 break;
2102 }
2103
2104 del_timer_sync(&pool->mayday_timer);
2105 raw_spin_lock_irq(&pool->lock);
2106
2107
2108
2109
2110
2111 if (need_to_create_worker(pool))
2112 goto restart;
2113}
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137static bool manage_workers(struct worker *worker)
2138{
2139 struct worker_pool *pool = worker->pool;
2140
2141 if (pool->flags & POOL_MANAGER_ACTIVE)
2142 return false;
2143
2144 pool->flags |= POOL_MANAGER_ACTIVE;
2145 pool->manager = worker;
2146
2147 maybe_create_worker(pool);
2148
2149 pool->manager = NULL;
2150 pool->flags &= ~POOL_MANAGER_ACTIVE;
2151 rcuwait_wake_up(&manager_wait);
2152 return true;
2153}
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169static void process_one_work(struct worker *worker, struct work_struct *work)
2170__releases(&pool->lock)
2171__acquires(&pool->lock)
2172{
2173 struct pool_workqueue *pwq = get_work_pwq(work);
2174 struct worker_pool *pool = worker->pool;
2175 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2176 int work_color;
2177 struct worker *collision;
2178#ifdef CONFIG_LOCKDEP
2179
2180
2181
2182
2183
2184
2185
2186 struct lockdep_map lockdep_map;
2187
2188 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2189#endif
2190
2191 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2192 raw_smp_processor_id() != pool->cpu);
2193
2194
2195
2196
2197
2198
2199
2200 collision = find_worker_executing_work(pool, work);
2201 if (unlikely(collision)) {
2202 move_linked_works(work, &collision->scheduled, NULL);
2203 return;
2204 }
2205
2206
2207 debug_work_deactivate(work);
2208 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2209 worker->current_work = work;
2210 worker->current_func = work->func;
2211 worker->current_pwq = pwq;
2212 work_color = get_work_color(work);
2213
2214
2215
2216
2217
2218 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2219
2220 list_del_init(&work->entry);
2221
2222
2223
2224
2225
2226
2227
2228 if (unlikely(cpu_intensive))
2229 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2230
2231
2232
2233
2234
2235
2236
2237
2238 if (need_more_worker(pool))
2239 wake_up_worker(pool);
2240
2241
2242
2243
2244
2245
2246
2247 set_work_pool_and_clear_pending(work, pool->id);
2248
2249 raw_spin_unlock_irq(&pool->lock);
2250
2251 lock_map_acquire(&pwq->wq->lockdep_map);
2252 lock_map_acquire(&lockdep_map);
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274 lockdep_invariant_state(true);
2275 trace_workqueue_execute_start(work);
2276 worker->current_func(work);
2277
2278
2279
2280
2281 trace_workqueue_execute_end(work, worker->current_func);
2282 lock_map_release(&lockdep_map);
2283 lock_map_release(&pwq->wq->lockdep_map);
2284
2285 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2286 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2287 " last function: %ps\n",
2288 current->comm, preempt_count(), task_pid_nr(current),
2289 worker->current_func);
2290 debug_show_held_locks(current);
2291 dump_stack();
2292 }
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302 cond_resched();
2303
2304 raw_spin_lock_irq(&pool->lock);
2305
2306
2307 if (unlikely(cpu_intensive))
2308 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2309
2310
2311 worker->last_func = worker->current_func;
2312
2313
2314 hash_del(&worker->hentry);
2315 worker->current_work = NULL;
2316 worker->current_func = NULL;
2317 worker->current_pwq = NULL;
2318 pwq_dec_nr_in_flight(pwq, work_color);
2319}
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333static void process_scheduled_works(struct worker *worker)
2334{
2335 while (!list_empty(&worker->scheduled)) {
2336 struct work_struct *work = list_first_entry(&worker->scheduled,
2337 struct work_struct, entry);
2338 process_one_work(worker, work);
2339 }
2340}
2341
2342static void set_pf_worker(bool val)
2343{
2344 mutex_lock(&wq_pool_attach_mutex);
2345 if (val)
2346 current->flags |= PF_WQ_WORKER;
2347 else
2348 current->flags &= ~PF_WQ_WORKER;
2349 mutex_unlock(&wq_pool_attach_mutex);
2350}
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364static int worker_thread(void *__worker)
2365{
2366 struct worker *worker = __worker;
2367 struct worker_pool *pool = worker->pool;
2368
2369
2370 set_pf_worker(true);
2371woke_up:
2372 raw_spin_lock_irq(&pool->lock);
2373
2374
2375 if (unlikely(worker->flags & WORKER_DIE)) {
2376 raw_spin_unlock_irq(&pool->lock);
2377 WARN_ON_ONCE(!list_empty(&worker->entry));
2378 set_pf_worker(false);
2379
2380 set_task_comm(worker->task, "kworker/dying");
2381 ida_simple_remove(&pool->worker_ida, worker->id);
2382 worker_detach_from_pool(worker);
2383 kfree(worker);
2384 return 0;
2385 }
2386
2387 worker_leave_idle(worker);
2388recheck:
2389
2390 if (!need_more_worker(pool))
2391 goto sleep;
2392
2393
2394 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2395 goto recheck;
2396
2397
2398
2399
2400
2401
2402 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2403
2404
2405
2406
2407
2408
2409
2410
2411 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2412
2413 do {
2414 struct work_struct *work =
2415 list_first_entry(&pool->worklist,
2416 struct work_struct, entry);
2417
2418 pool->watchdog_ts = jiffies;
2419
2420 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2421
2422 process_one_work(worker, work);
2423 if (unlikely(!list_empty(&worker->scheduled)))
2424 process_scheduled_works(worker);
2425 } else {
2426 move_linked_works(work, &worker->scheduled, NULL);
2427 process_scheduled_works(worker);
2428 }
2429 } while (keep_working(pool));
2430
2431 worker_set_flags(worker, WORKER_PREP);
2432sleep:
2433
2434
2435
2436
2437
2438
2439
2440 worker_enter_idle(worker);
2441 __set_current_state(TASK_IDLE);
2442 raw_spin_unlock_irq(&pool->lock);
2443 schedule();
2444 goto woke_up;
2445}
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468static int rescuer_thread(void *__rescuer)
2469{
2470 struct worker *rescuer = __rescuer;
2471 struct workqueue_struct *wq = rescuer->rescue_wq;
2472 struct list_head *scheduled = &rescuer->scheduled;
2473 bool should_stop;
2474
2475 set_user_nice(current, RESCUER_NICE_LEVEL);
2476
2477
2478
2479
2480
2481 set_pf_worker(true);
2482repeat:
2483 set_current_state(TASK_IDLE);
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493 should_stop = kthread_should_stop();
2494
2495
2496 raw_spin_lock_irq(&wq_mayday_lock);
2497
2498 while (!list_empty(&wq->maydays)) {
2499 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2500 struct pool_workqueue, mayday_node);
2501 struct worker_pool *pool = pwq->pool;
2502 struct work_struct *work, *n;
2503 bool first = true;
2504
2505 __set_current_state(TASK_RUNNING);
2506 list_del_init(&pwq->mayday_node);
2507
2508 raw_spin_unlock_irq(&wq_mayday_lock);
2509
2510 worker_attach_to_pool(rescuer, pool);
2511
2512 raw_spin_lock_irq(&pool->lock);
2513
2514
2515
2516
2517
2518 WARN_ON_ONCE(!list_empty(scheduled));
2519 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2520 if (get_work_pwq(work) == pwq) {
2521 if (first)
2522 pool->watchdog_ts = jiffies;
2523 move_linked_works(work, scheduled, &n);
2524 }
2525 first = false;
2526 }
2527
2528 if (!list_empty(scheduled)) {
2529 process_scheduled_works(rescuer);
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540 if (pwq->nr_active && need_to_create_worker(pool)) {
2541 raw_spin_lock(&wq_mayday_lock);
2542
2543
2544
2545
2546 if (wq->rescuer && list_empty(&pwq->mayday_node)) {
2547 get_pwq(pwq);
2548 list_add_tail(&pwq->mayday_node, &wq->maydays);
2549 }
2550 raw_spin_unlock(&wq_mayday_lock);
2551 }
2552 }
2553
2554
2555
2556
2557
2558 put_pwq(pwq);
2559
2560
2561
2562
2563
2564
2565 if (need_more_worker(pool))
2566 wake_up_worker(pool);
2567
2568 raw_spin_unlock_irq(&pool->lock);
2569
2570 worker_detach_from_pool(rescuer);
2571
2572 raw_spin_lock_irq(&wq_mayday_lock);
2573 }
2574
2575 raw_spin_unlock_irq(&wq_mayday_lock);
2576
2577 if (should_stop) {
2578 __set_current_state(TASK_RUNNING);
2579 set_pf_worker(false);
2580 return 0;
2581 }
2582
2583
2584 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2585 schedule();
2586 goto repeat;
2587}
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600static void check_flush_dependency(struct workqueue_struct *target_wq,
2601 struct work_struct *target_work)
2602{
2603 work_func_t target_func = target_work ? target_work->func : NULL;
2604 struct worker *worker;
2605
2606 if (target_wq->flags & WQ_MEM_RECLAIM)
2607 return;
2608
2609 worker = current_wq_worker();
2610
2611 WARN_ONCE(current->flags & PF_MEMALLOC,
2612 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2613 current->pid, current->comm, target_wq->name, target_func);
2614 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2615 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2616 "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2617 worker->current_pwq->wq->name, worker->current_func,
2618 target_wq->name, target_func);
2619}
2620
2621struct wq_barrier {
2622 struct work_struct work;
2623 struct completion done;
2624 struct task_struct *task;
2625};
2626
2627static void wq_barrier_func(struct work_struct *work)
2628{
2629 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2630 complete(&barr->done);
2631}
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657static void insert_wq_barrier(struct pool_workqueue *pwq,
2658 struct wq_barrier *barr,
2659 struct work_struct *target, struct worker *worker)
2660{
2661 struct list_head *head;
2662 unsigned int linked = 0;
2663
2664
2665
2666
2667
2668
2669
2670 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2671 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2672
2673 init_completion_map(&barr->done, &target->lockdep_map);
2674
2675 barr->task = current;
2676
2677
2678
2679
2680
2681 if (worker)
2682 head = worker->scheduled.next;
2683 else {
2684 unsigned long *bits = work_data_bits(target);
2685
2686 head = target->entry.next;
2687
2688 linked = *bits & WORK_STRUCT_LINKED;
2689 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2690 }
2691
2692 debug_work_activate(&barr->work);
2693 insert_work(pwq, &barr->work, head,
2694 work_color_to_flags(WORK_NO_COLOR) | linked);
2695}
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2729 int flush_color, int work_color)
2730{
2731 bool wait = false;
2732 struct pool_workqueue *pwq;
2733
2734 if (flush_color >= 0) {
2735 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2736 atomic_set(&wq->nr_pwqs_to_flush, 1);
2737 }
2738
2739 for_each_pwq(pwq, wq) {
2740 struct worker_pool *pool = pwq->pool;
2741
2742 raw_spin_lock_irq(&pool->lock);
2743
2744 if (flush_color >= 0) {
2745 WARN_ON_ONCE(pwq->flush_color != -1);
2746
2747 if (pwq->nr_in_flight[flush_color]) {
2748 pwq->flush_color = flush_color;
2749 atomic_inc(&wq->nr_pwqs_to_flush);
2750 wait = true;
2751 }
2752 }
2753
2754 if (work_color >= 0) {
2755 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2756 pwq->work_color = work_color;
2757 }
2758
2759 raw_spin_unlock_irq(&pool->lock);
2760 }
2761
2762 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2763 complete(&wq->first_flusher->done);
2764
2765 return wait;
2766}
2767
2768
2769
2770
2771
2772
2773
2774
2775void flush_workqueue(struct workqueue_struct *wq)
2776{
2777 struct wq_flusher this_flusher = {
2778 .list = LIST_HEAD_INIT(this_flusher.list),
2779 .flush_color = -1,
2780 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
2781 };
2782 int next_color;
2783
2784 if (WARN_ON(!wq_online))
2785 return;
2786
2787 lock_map_acquire(&wq->lockdep_map);
2788 lock_map_release(&wq->lockdep_map);
2789
2790 mutex_lock(&wq->mutex);
2791
2792
2793
2794
2795 next_color = work_next_color(wq->work_color);
2796
2797 if (next_color != wq->flush_color) {
2798
2799
2800
2801
2802
2803 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2804 this_flusher.flush_color = wq->work_color;
2805 wq->work_color = next_color;
2806
2807 if (!wq->first_flusher) {
2808
2809 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2810
2811 wq->first_flusher = &this_flusher;
2812
2813 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2814 wq->work_color)) {
2815
2816 wq->flush_color = next_color;
2817 wq->first_flusher = NULL;
2818 goto out_unlock;
2819 }
2820 } else {
2821
2822 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2823 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2824 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2825 }
2826 } else {
2827
2828
2829
2830
2831
2832 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2833 }
2834
2835 check_flush_dependency(wq, NULL);
2836
2837 mutex_unlock(&wq->mutex);
2838
2839 wait_for_completion(&this_flusher.done);
2840
2841
2842
2843
2844
2845
2846
2847 if (READ_ONCE(wq->first_flusher) != &this_flusher)
2848 return;
2849
2850 mutex_lock(&wq->mutex);
2851
2852
2853 if (wq->first_flusher != &this_flusher)
2854 goto out_unlock;
2855
2856 WRITE_ONCE(wq->first_flusher, NULL);
2857
2858 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2859 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2860
2861 while (true) {
2862 struct wq_flusher *next, *tmp;
2863
2864
2865 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2866 if (next->flush_color != wq->flush_color)
2867 break;
2868 list_del_init(&next->list);
2869 complete(&next->done);
2870 }
2871
2872 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2873 wq->flush_color != work_next_color(wq->work_color));
2874
2875
2876 wq->flush_color = work_next_color(wq->flush_color);
2877
2878
2879 if (!list_empty(&wq->flusher_overflow)) {
2880
2881
2882
2883
2884
2885
2886 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2887 tmp->flush_color = wq->work_color;
2888
2889 wq->work_color = work_next_color(wq->work_color);
2890
2891 list_splice_tail_init(&wq->flusher_overflow,
2892 &wq->flusher_queue);
2893 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2894 }
2895
2896 if (list_empty(&wq->flusher_queue)) {
2897 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2898 break;
2899 }
2900
2901
2902
2903
2904
2905 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2906 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2907
2908 list_del_init(&next->list);
2909 wq->first_flusher = next;
2910
2911 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2912 break;
2913
2914
2915
2916
2917
2918 wq->first_flusher = NULL;
2919 }
2920
2921out_unlock:
2922 mutex_unlock(&wq->mutex);
2923}
2924EXPORT_SYMBOL(flush_workqueue);
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937void drain_workqueue(struct workqueue_struct *wq)
2938{
2939 unsigned int flush_cnt = 0;
2940 struct pool_workqueue *pwq;
2941
2942
2943
2944
2945
2946
2947 mutex_lock(&wq->mutex);
2948 if (!wq->nr_drainers++)
2949 wq->flags |= __WQ_DRAINING;
2950 mutex_unlock(&wq->mutex);
2951reflush:
2952 flush_workqueue(wq);
2953
2954 mutex_lock(&wq->mutex);
2955
2956 for_each_pwq(pwq, wq) {
2957 bool drained;
2958
2959 raw_spin_lock_irq(&pwq->pool->lock);
2960 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2961 raw_spin_unlock_irq(&pwq->pool->lock);
2962
2963 if (drained)
2964 continue;
2965
2966 if (++flush_cnt == 10 ||
2967 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2968 pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
2969 wq->name, __func__, flush_cnt);
2970
2971 mutex_unlock(&wq->mutex);
2972 goto reflush;
2973 }
2974
2975 if (!--wq->nr_drainers)
2976 wq->flags &= ~__WQ_DRAINING;
2977 mutex_unlock(&wq->mutex);
2978}
2979EXPORT_SYMBOL_GPL(drain_workqueue);
2980
2981static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2982 bool from_cancel)
2983{
2984 struct worker *worker = NULL;
2985 struct worker_pool *pool;
2986 struct pool_workqueue *pwq;
2987
2988 might_sleep();
2989
2990 rcu_read_lock();
2991 pool = get_work_pool(work);
2992 if (!pool) {
2993 rcu_read_unlock();
2994 return false;
2995 }
2996
2997 raw_spin_lock_irq(&pool->lock);
2998
2999 pwq = get_work_pwq(work);
3000 if (pwq) {
3001 if (unlikely(pwq->pool != pool))
3002 goto already_gone;
3003 } else {
3004 worker = find_worker_executing_work(pool, work);
3005 if (!worker)
3006 goto already_gone;
3007 pwq = worker->current_pwq;
3008 }
3009
3010 check_flush_dependency(pwq->wq, work);
3011
3012 insert_wq_barrier(pwq, barr, work, worker);
3013 raw_spin_unlock_irq(&pool->lock);
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024 if (!from_cancel &&
3025 (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
3026 lock_map_acquire(&pwq->wq->lockdep_map);
3027 lock_map_release(&pwq->wq->lockdep_map);
3028 }
3029 rcu_read_unlock();
3030 return true;
3031already_gone:
3032 raw_spin_unlock_irq(&pool->lock);
3033 rcu_read_unlock();
3034 return false;
3035}
3036
3037static bool __flush_work(struct work_struct *work, bool from_cancel)
3038{
3039 struct wq_barrier barr;
3040
3041 if (WARN_ON(!wq_online))
3042 return false;
3043
3044 if (WARN_ON(!work->func))
3045 return false;
3046
3047 if (!from_cancel) {
3048 lock_map_acquire(&work->lockdep_map);
3049 lock_map_release(&work->lockdep_map);
3050 }
3051
3052 if (start_flush_work(work, &barr, from_cancel)) {
3053 wait_for_completion(&barr.done);
3054 destroy_work_on_stack(&barr.work);
3055 return true;
3056 } else {
3057 return false;
3058 }
3059}
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072bool flush_work(struct work_struct *work)
3073{
3074 return __flush_work(work, false);
3075}
3076EXPORT_SYMBOL_GPL(flush_work);
3077
3078struct cwt_wait {
3079 wait_queue_entry_t wait;
3080 struct work_struct *work;
3081};
3082
3083static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
3084{
3085 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3086
3087 if (cwait->work != key)
3088 return 0;
3089 return autoremove_wake_function(wait, mode, sync, key);
3090}
3091
3092static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
3093{
3094 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
3095 unsigned long flags;
3096 int ret;
3097
3098 do {
3099 ret = try_to_grab_pending(work, is_dwork, &flags);
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116 if (unlikely(ret == -ENOENT)) {
3117 struct cwt_wait cwait;
3118
3119 init_wait(&cwait.wait);
3120 cwait.wait.func = cwt_wakefn;
3121 cwait.work = work;
3122
3123 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3124 TASK_UNINTERRUPTIBLE);
3125 if (work_is_canceling(work))
3126 schedule();
3127 finish_wait(&cancel_waitq, &cwait.wait);
3128 }
3129 } while (unlikely(ret < 0));
3130
3131
3132 mark_work_canceling(work);
3133 local_irq_restore(flags);
3134
3135
3136
3137
3138
3139 if (wq_online)
3140 __flush_work(work, true);
3141
3142 clear_work_data(work);
3143
3144
3145
3146
3147
3148
3149 smp_mb();
3150 if (waitqueue_active(&cancel_waitq))
3151 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3152
3153 return ret;
3154}
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174bool cancel_work_sync(struct work_struct *work)
3175{
3176 return __cancel_work_timer(work, false);
3177}
3178EXPORT_SYMBOL_GPL(cancel_work_sync);
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192bool flush_delayed_work(struct delayed_work *dwork)
3193{
3194 local_irq_disable();
3195 if (del_timer_sync(&dwork->timer))
3196 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3197 local_irq_enable();
3198 return flush_work(&dwork->work);
3199}
3200EXPORT_SYMBOL(flush_delayed_work);
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210bool flush_rcu_work(struct rcu_work *rwork)
3211{
3212 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3213 rcu_barrier();
3214 flush_work(&rwork->work);
3215 return true;
3216 } else {
3217 return flush_work(&rwork->work);
3218 }
3219}
3220EXPORT_SYMBOL(flush_rcu_work);
3221
3222static bool __cancel_work(struct work_struct *work, bool is_dwork)
3223{
3224 unsigned long flags;
3225 int ret;
3226
3227 do {
3228 ret = try_to_grab_pending(work, is_dwork, &flags);
3229 } while (unlikely(ret == -EAGAIN));
3230
3231 if (unlikely(ret < 0))
3232 return false;
3233
3234 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3235 local_irq_restore(flags);
3236 return ret;
3237}
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255bool cancel_delayed_work(struct delayed_work *dwork)
3256{
3257 return __cancel_work(&dwork->work, true);
3258}
3259EXPORT_SYMBOL(cancel_delayed_work);
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270bool cancel_delayed_work_sync(struct delayed_work *dwork)
3271{
3272 return __cancel_work_timer(&dwork->work, true);
3273}
3274EXPORT_SYMBOL(cancel_delayed_work_sync);
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287int schedule_on_each_cpu(work_func_t func)
3288{
3289 int cpu;
3290 struct work_struct __percpu *works;
3291
3292 works = alloc_percpu(struct work_struct);
3293 if (!works)
3294 return -ENOMEM;
3295
3296 get_online_cpus();
3297
3298 for_each_online_cpu(cpu) {
3299 struct work_struct *work = per_cpu_ptr(works, cpu);
3300
3301 INIT_WORK(work, func);
3302 schedule_work_on(cpu, work);
3303 }
3304
3305 for_each_online_cpu(cpu)
3306 flush_work(per_cpu_ptr(works, cpu));
3307
3308 put_online_cpus();
3309 free_percpu(works);
3310 return 0;
3311}
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3326{
3327 if (!in_interrupt()) {
3328 fn(&ew->work);
3329 return 0;
3330 }
3331
3332 INIT_WORK(&ew->work, fn);
3333 schedule_work(&ew->work);
3334
3335 return 1;
3336}
3337EXPORT_SYMBOL_GPL(execute_in_process_context);
3338
3339
3340
3341
3342
3343
3344
3345void free_workqueue_attrs(struct workqueue_attrs *attrs)
3346{
3347 if (attrs) {
3348 free_cpumask_var(attrs->cpumask);
3349 kfree(attrs);
3350 }
3351}
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361struct workqueue_attrs *alloc_workqueue_attrs(void)
3362{
3363 struct workqueue_attrs *attrs;
3364
3365 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
3366 if (!attrs)
3367 goto fail;
3368 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
3369 goto fail;
3370
3371 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3372 return attrs;
3373fail:
3374 free_workqueue_attrs(attrs);
3375 return NULL;
3376}
3377
3378static void copy_workqueue_attrs(struct workqueue_attrs *to,
3379 const struct workqueue_attrs *from)
3380{
3381 to->nice = from->nice;
3382 cpumask_copy(to->cpumask, from->cpumask);
3383
3384
3385
3386
3387
3388 to->no_numa = from->no_numa;
3389}
3390
3391
3392static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3393{
3394 u32 hash = 0;
3395
3396 hash = jhash_1word(attrs->nice, hash);
3397 hash = jhash(cpumask_bits(attrs->cpumask),
3398 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3399 return hash;
3400}
3401
3402
3403static bool wqattrs_equal(const struct workqueue_attrs *a,
3404 const struct workqueue_attrs *b)
3405{
3406 if (a->nice != b->nice)
3407 return false;
3408 if (!cpumask_equal(a->cpumask, b->cpumask))
3409 return false;
3410 return true;
3411}
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423static int init_worker_pool(struct worker_pool *pool)
3424{
3425 raw_spin_lock_init(&pool->lock);
3426 pool->id = -1;
3427 pool->cpu = -1;
3428 pool->node = NUMA_NO_NODE;
3429 pool->flags |= POOL_DISASSOCIATED;
3430 pool->watchdog_ts = jiffies;
3431 INIT_LIST_HEAD(&pool->worklist);
3432 INIT_LIST_HEAD(&pool->idle_list);
3433 hash_init(pool->busy_hash);
3434
3435 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3436
3437 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3438
3439 INIT_LIST_HEAD(&pool->workers);
3440
3441 ida_init(&pool->worker_ida);
3442 INIT_HLIST_NODE(&pool->hash_node);
3443 pool->refcnt = 1;
3444
3445
3446 pool->attrs = alloc_workqueue_attrs();
3447 if (!pool->attrs)
3448 return -ENOMEM;
3449 return 0;
3450}
3451
3452#ifdef CONFIG_LOCKDEP
3453static void wq_init_lockdep(struct workqueue_struct *wq)
3454{
3455 char *lock_name;
3456
3457 lockdep_register_key(&wq->key);
3458 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3459 if (!lock_name)
3460 lock_name = wq->name;
3461
3462 wq->lock_name = lock_name;
3463 lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3464}
3465
3466static void wq_unregister_lockdep(struct workqueue_struct *wq)
3467{
3468 lockdep_unregister_key(&wq->key);
3469}
3470
3471static void wq_free_lockdep(struct workqueue_struct *wq)
3472{
3473 if (wq->lock_name != wq->name)
3474 kfree(wq->lock_name);
3475}
3476#else
3477static void wq_init_lockdep(struct workqueue_struct *wq)
3478{
3479}
3480
3481static void wq_unregister_lockdep(struct workqueue_struct *wq)
3482{
3483}
3484
3485static void wq_free_lockdep(struct workqueue_struct *wq)
3486{
3487}
3488#endif
3489
3490static void rcu_free_wq(struct rcu_head *rcu)
3491{
3492 struct workqueue_struct *wq =
3493 container_of(rcu, struct workqueue_struct, rcu);
3494
3495 wq_free_lockdep(wq);
3496
3497 if (!(wq->flags & WQ_UNBOUND))
3498 free_percpu(wq->cpu_pwqs);
3499 else
3500 free_workqueue_attrs(wq->unbound_attrs);
3501
3502 kfree(wq);
3503}
3504
3505static void rcu_free_pool(struct rcu_head *rcu)
3506{
3507 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3508
3509 ida_destroy(&pool->worker_ida);
3510 free_workqueue_attrs(pool->attrs);
3511 kfree(pool);
3512}
3513
3514
3515static bool wq_manager_inactive(struct worker_pool *pool)
3516{
3517 raw_spin_lock_irq(&pool->lock);
3518
3519 if (pool->flags & POOL_MANAGER_ACTIVE) {
3520 raw_spin_unlock_irq(&pool->lock);
3521 return false;
3522 }
3523 return true;
3524}
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537static void put_unbound_pool(struct worker_pool *pool)
3538{
3539 DECLARE_COMPLETION_ONSTACK(detach_completion);
3540 struct worker *worker;
3541
3542 lockdep_assert_held(&wq_pool_mutex);
3543
3544 if (--pool->refcnt)
3545 return;
3546
3547
3548 if (WARN_ON(!(pool->cpu < 0)) ||
3549 WARN_ON(!list_empty(&pool->worklist)))
3550 return;
3551
3552
3553 if (pool->id >= 0)
3554 idr_remove(&worker_pool_idr, pool->id);
3555 hash_del(&pool->hash_node);
3556
3557
3558
3559
3560
3561
3562
3563
3564 rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3565 TASK_UNINTERRUPTIBLE);
3566 pool->flags |= POOL_MANAGER_ACTIVE;
3567
3568 while ((worker = first_idle_worker(pool)))
3569 destroy_worker(worker);
3570 WARN_ON(pool->nr_workers || pool->nr_idle);
3571 raw_spin_unlock_irq(&pool->lock);
3572
3573 mutex_lock(&wq_pool_attach_mutex);
3574 if (!list_empty(&pool->workers))
3575 pool->detach_completion = &detach_completion;
3576 mutex_unlock(&wq_pool_attach_mutex);
3577
3578 if (pool->detach_completion)
3579 wait_for_completion(pool->detach_completion);
3580
3581
3582 del_timer_sync(&pool->idle_timer);
3583 del_timer_sync(&pool->mayday_timer);
3584
3585
3586 call_rcu(&pool->rcu, rcu_free_pool);
3587}
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3604{
3605 u32 hash = wqattrs_hash(attrs);
3606 struct worker_pool *pool;
3607 int node;
3608 int target_node = NUMA_NO_NODE;
3609
3610 lockdep_assert_held(&wq_pool_mutex);
3611
3612
3613 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3614 if (wqattrs_equal(pool->attrs, attrs)) {
3615 pool->refcnt++;
3616 return pool;
3617 }
3618 }
3619
3620
3621 if (wq_numa_enabled) {
3622 for_each_node(node) {
3623 if (cpumask_subset(attrs->cpumask,
3624 wq_numa_possible_cpumask[node])) {
3625 target_node = node;
3626 break;
3627 }
3628 }
3629 }
3630
3631
3632 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3633 if (!pool || init_worker_pool(pool) < 0)
3634 goto fail;
3635
3636 lockdep_set_subclass(&pool->lock, 1);
3637 copy_workqueue_attrs(pool->attrs, attrs);
3638 pool->node = target_node;
3639
3640
3641
3642
3643
3644 pool->attrs->no_numa = false;
3645
3646 if (worker_pool_assign_id(pool) < 0)
3647 goto fail;
3648
3649
3650 if (wq_online && !create_worker(pool))
3651 goto fail;
3652
3653
3654 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3655
3656 return pool;
3657fail:
3658 if (pool)
3659 put_unbound_pool(pool);
3660 return NULL;
3661}
3662
3663static void rcu_free_pwq(struct rcu_head *rcu)
3664{
3665 kmem_cache_free(pwq_cache,
3666 container_of(rcu, struct pool_workqueue, rcu));
3667}
3668
3669
3670
3671
3672
3673static void pwq_unbound_release_workfn(struct work_struct *work)
3674{
3675 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3676 unbound_release_work);
3677 struct workqueue_struct *wq = pwq->wq;
3678 struct worker_pool *pool = pwq->pool;
3679 bool is_last = false;
3680
3681
3682
3683
3684
3685 if (!list_empty(&pwq->pwqs_node)) {
3686 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3687 return;
3688
3689 mutex_lock(&wq->mutex);
3690 list_del_rcu(&pwq->pwqs_node);
3691 is_last = list_empty(&wq->pwqs);
3692 mutex_unlock(&wq->mutex);
3693 }
3694
3695 mutex_lock(&wq_pool_mutex);
3696 put_unbound_pool(pool);
3697 mutex_unlock(&wq_pool_mutex);
3698
3699 call_rcu(&pwq->rcu, rcu_free_pwq);
3700
3701
3702
3703
3704
3705 if (is_last) {
3706 wq_unregister_lockdep(wq);
3707 call_rcu(&wq->rcu, rcu_free_wq);
3708 }
3709}
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3720{
3721 struct workqueue_struct *wq = pwq->wq;
3722 bool freezable = wq->flags & WQ_FREEZABLE;
3723 unsigned long flags;
3724
3725
3726 lockdep_assert_held(&wq->mutex);
3727
3728
3729 if (!freezable && pwq->max_active == wq->saved_max_active)
3730 return;
3731
3732
3733 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
3734
3735
3736
3737
3738
3739
3740 if (!freezable || !workqueue_freezing) {
3741 bool kick = false;
3742
3743 pwq->max_active = wq->saved_max_active;
3744
3745 while (!list_empty(&pwq->delayed_works) &&
3746 pwq->nr_active < pwq->max_active) {
3747 pwq_activate_first_delayed(pwq);
3748 kick = true;
3749 }
3750
3751
3752
3753
3754
3755
3756
3757 if (kick)
3758 wake_up_worker(pwq->pool);
3759 } else {
3760 pwq->max_active = 0;
3761 }
3762
3763 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
3764}
3765
3766
3767static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3768 struct worker_pool *pool)
3769{
3770 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3771
3772 memset(pwq, 0, sizeof(*pwq));
3773
3774 pwq->pool = pool;
3775 pwq->wq = wq;
3776 pwq->flush_color = -1;
3777 pwq->refcnt = 1;
3778 INIT_LIST_HEAD(&pwq->delayed_works);
3779 INIT_LIST_HEAD(&pwq->pwqs_node);
3780 INIT_LIST_HEAD(&pwq->mayday_node);
3781 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3782}
3783
3784
3785static void link_pwq(struct pool_workqueue *pwq)
3786{
3787 struct workqueue_struct *wq = pwq->wq;
3788
3789 lockdep_assert_held(&wq->mutex);
3790
3791
3792 if (!list_empty(&pwq->pwqs_node))
3793 return;
3794
3795
3796 pwq->work_color = wq->work_color;
3797
3798
3799 pwq_adjust_max_active(pwq);
3800
3801
3802 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3803}
3804
3805
3806static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3807 const struct workqueue_attrs *attrs)
3808{
3809 struct worker_pool *pool;
3810 struct pool_workqueue *pwq;
3811
3812 lockdep_assert_held(&wq_pool_mutex);
3813
3814 pool = get_unbound_pool(attrs);
3815 if (!pool)
3816 return NULL;
3817
3818 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3819 if (!pwq) {
3820 put_unbound_pool(pool);
3821 return NULL;
3822 }
3823
3824 init_pwq(pwq, wq, pool);
3825 return pwq;
3826}
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3851 int cpu_going_down, cpumask_t *cpumask)
3852{
3853 if (!wq_numa_enabled || attrs->no_numa)
3854 goto use_dfl;
3855
3856
3857 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3858 if (cpu_going_down >= 0)
3859 cpumask_clear_cpu(cpu_going_down, cpumask);
3860
3861 if (cpumask_empty(cpumask))
3862 goto use_dfl;
3863
3864
3865 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3866
3867 if (cpumask_empty(cpumask)) {
3868 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3869 "possible intersect\n");
3870 return false;
3871 }
3872
3873 return !cpumask_equal(cpumask, attrs->cpumask);
3874
3875use_dfl:
3876 cpumask_copy(cpumask, attrs->cpumask);
3877 return false;
3878}
3879
3880
3881static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3882 int node,
3883 struct pool_workqueue *pwq)
3884{
3885 struct pool_workqueue *old_pwq;
3886
3887 lockdep_assert_held(&wq_pool_mutex);
3888 lockdep_assert_held(&wq->mutex);
3889
3890
3891 link_pwq(pwq);
3892
3893 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3894 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3895 return old_pwq;
3896}
3897
3898
3899struct apply_wqattrs_ctx {
3900 struct workqueue_struct *wq;
3901 struct workqueue_attrs *attrs;
3902 struct list_head list;
3903 struct pool_workqueue *dfl_pwq;
3904 struct pool_workqueue *pwq_tbl[];
3905};
3906
3907
3908static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3909{
3910 if (ctx) {
3911 int node;
3912
3913 for_each_node(node)
3914 put_pwq_unlocked(ctx->pwq_tbl[node]);
3915 put_pwq_unlocked(ctx->dfl_pwq);
3916
3917 free_workqueue_attrs(ctx->attrs);
3918
3919 kfree(ctx);
3920 }
3921}
3922
3923
3924static struct apply_wqattrs_ctx *
3925apply_wqattrs_prepare(struct workqueue_struct *wq,
3926 const struct workqueue_attrs *attrs)
3927{
3928 struct apply_wqattrs_ctx *ctx;
3929 struct workqueue_attrs *new_attrs, *tmp_attrs;
3930 int node;
3931
3932 lockdep_assert_held(&wq_pool_mutex);
3933
3934 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
3935
3936 new_attrs = alloc_workqueue_attrs();
3937 tmp_attrs = alloc_workqueue_attrs();
3938 if (!ctx || !new_attrs || !tmp_attrs)
3939 goto out_free;
3940
3941
3942
3943
3944
3945
3946 copy_workqueue_attrs(new_attrs, attrs);
3947 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3948 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3949 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3950
3951
3952
3953
3954
3955
3956 copy_workqueue_attrs(tmp_attrs, new_attrs);
3957
3958
3959
3960
3961
3962
3963 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3964 if (!ctx->dfl_pwq)
3965 goto out_free;
3966
3967 for_each_node(node) {
3968 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3969 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3970 if (!ctx->pwq_tbl[node])
3971 goto out_free;
3972 } else {
3973 ctx->dfl_pwq->refcnt++;
3974 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3975 }
3976 }
3977
3978
3979 copy_workqueue_attrs(new_attrs, attrs);
3980 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3981 ctx->attrs = new_attrs;
3982
3983 ctx->wq = wq;
3984 free_workqueue_attrs(tmp_attrs);
3985 return ctx;
3986
3987out_free:
3988 free_workqueue_attrs(tmp_attrs);
3989 free_workqueue_attrs(new_attrs);
3990 apply_wqattrs_cleanup(ctx);
3991 return NULL;
3992}
3993
3994
3995static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3996{
3997 int node;
3998
3999
4000 mutex_lock(&ctx->wq->mutex);
4001
4002 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
4003
4004
4005 for_each_node(node)
4006 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
4007 ctx->pwq_tbl[node]);
4008
4009
4010 link_pwq(ctx->dfl_pwq);
4011 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
4012
4013 mutex_unlock(&ctx->wq->mutex);
4014}
4015
4016static void apply_wqattrs_lock(void)
4017{
4018
4019 get_online_cpus();
4020 mutex_lock(&wq_pool_mutex);
4021}
4022
4023static void apply_wqattrs_unlock(void)
4024{
4025 mutex_unlock(&wq_pool_mutex);
4026 put_online_cpus();
4027}
4028
4029static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4030 const struct workqueue_attrs *attrs)
4031{
4032 struct apply_wqattrs_ctx *ctx;
4033
4034
4035 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4036 return -EINVAL;
4037
4038
4039 if (!list_empty(&wq->pwqs)) {
4040 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4041 return -EINVAL;
4042
4043 wq->flags &= ~__WQ_ORDERED;
4044 }
4045
4046 ctx = apply_wqattrs_prepare(wq, attrs);
4047 if (!ctx)
4048 return -ENOMEM;
4049
4050
4051 apply_wqattrs_commit(ctx);
4052 apply_wqattrs_cleanup(ctx);
4053
4054 return 0;
4055}
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075int apply_workqueue_attrs(struct workqueue_struct *wq,
4076 const struct workqueue_attrs *attrs)
4077{
4078 int ret;
4079
4080 lockdep_assert_cpus_held();
4081
4082 mutex_lock(&wq_pool_mutex);
4083 ret = apply_workqueue_attrs_locked(wq, attrs);
4084 mutex_unlock(&wq_pool_mutex);
4085
4086 return ret;
4087}
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4112 bool online)
4113{
4114 int node = cpu_to_node(cpu);
4115 int cpu_off = online ? -1 : cpu;
4116 struct pool_workqueue *old_pwq = NULL, *pwq;
4117 struct workqueue_attrs *target_attrs;
4118 cpumask_t *cpumask;
4119
4120 lockdep_assert_held(&wq_pool_mutex);
4121
4122 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4123 wq->unbound_attrs->no_numa)
4124 return;
4125
4126
4127
4128
4129
4130
4131 target_attrs = wq_update_unbound_numa_attrs_buf;
4132 cpumask = target_attrs->cpumask;
4133
4134 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4135 pwq = unbound_pwq_by_node(wq, node);
4136
4137
4138
4139
4140
4141
4142
4143 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
4144 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4145 return;
4146 } else {
4147 goto use_dfl_pwq;
4148 }
4149
4150
4151 pwq = alloc_unbound_pwq(wq, target_attrs);
4152 if (!pwq) {
4153 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4154 wq->name);
4155 goto use_dfl_pwq;
4156 }
4157
4158
4159 mutex_lock(&wq->mutex);
4160 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4161 goto out_unlock;
4162
4163use_dfl_pwq:
4164 mutex_lock(&wq->mutex);
4165 raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
4166 get_pwq(wq->dfl_pwq);
4167 raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4168 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4169out_unlock:
4170 mutex_unlock(&wq->mutex);
4171 put_pwq_unlocked(old_pwq);
4172}
4173
4174static int alloc_and_link_pwqs(struct workqueue_struct *wq)
4175{
4176 bool highpri = wq->flags & WQ_HIGHPRI;
4177 int cpu, ret;
4178
4179 if (!(wq->flags & WQ_UNBOUND)) {
4180 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4181 if (!wq->cpu_pwqs)
4182 return -ENOMEM;
4183
4184 for_each_possible_cpu(cpu) {
4185 struct pool_workqueue *pwq =
4186 per_cpu_ptr(wq->cpu_pwqs, cpu);
4187 struct worker_pool *cpu_pools =
4188 per_cpu(cpu_worker_pools, cpu);
4189
4190 init_pwq(pwq, wq, &cpu_pools[highpri]);
4191
4192 mutex_lock(&wq->mutex);
4193 link_pwq(pwq);
4194 mutex_unlock(&wq->mutex);
4195 }
4196 return 0;
4197 }
4198
4199 get_online_cpus();
4200 if (wq->flags & __WQ_ORDERED) {
4201 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4202
4203 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4204 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4205 "ordering guarantee broken for workqueue %s\n", wq->name);
4206 } else {
4207 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4208 }
4209 put_online_cpus();
4210
4211 return ret;
4212}
4213
4214static int wq_clamp_max_active(int max_active, unsigned int flags,
4215 const char *name)
4216{
4217 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4218
4219 if (max_active < 1 || max_active > lim)
4220 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4221 max_active, name, 1, lim);
4222
4223 return clamp_val(max_active, 1, lim);
4224}
4225
4226
4227
4228
4229
4230static int init_rescuer(struct workqueue_struct *wq)
4231{
4232 struct worker *rescuer;
4233 int ret;
4234
4235 if (!(wq->flags & WQ_MEM_RECLAIM))
4236 return 0;
4237
4238 rescuer = alloc_worker(NUMA_NO_NODE);
4239 if (!rescuer)
4240 return -ENOMEM;
4241
4242 rescuer->rescue_wq = wq;
4243 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4244 if (IS_ERR(rescuer->task)) {
4245 ret = PTR_ERR(rescuer->task);
4246 kfree(rescuer);
4247 return ret;
4248 }
4249
4250 wq->rescuer = rescuer;
4251 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4252 wake_up_process(rescuer->task);
4253
4254 return 0;
4255}
4256
4257__printf(1, 4)
4258struct workqueue_struct *alloc_workqueue(const char *fmt,
4259 unsigned int flags,
4260 int max_active, ...)
4261{
4262 size_t tbl_size = 0;
4263 va_list args;
4264 struct workqueue_struct *wq;
4265 struct pool_workqueue *pwq;
4266
4267
4268
4269
4270
4271
4272
4273
4274 if ((flags & WQ_UNBOUND) && max_active == 1)
4275 flags |= __WQ_ORDERED;
4276
4277
4278 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4279 flags |= WQ_UNBOUND;
4280
4281
4282 if (flags & WQ_UNBOUND)
4283 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4284
4285 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4286 if (!wq)
4287 return NULL;
4288
4289 if (flags & WQ_UNBOUND) {
4290 wq->unbound_attrs = alloc_workqueue_attrs();
4291 if (!wq->unbound_attrs)
4292 goto err_free_wq;
4293 }
4294
4295 va_start(args, max_active);
4296 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4297 va_end(args);
4298
4299 max_active = max_active ?: WQ_DFL_ACTIVE;
4300 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4301
4302
4303 wq->flags = flags;
4304 wq->saved_max_active = max_active;
4305 mutex_init(&wq->mutex);
4306 atomic_set(&wq->nr_pwqs_to_flush, 0);
4307 INIT_LIST_HEAD(&wq->pwqs);
4308 INIT_LIST_HEAD(&wq->flusher_queue);
4309 INIT_LIST_HEAD(&wq->flusher_overflow);
4310 INIT_LIST_HEAD(&wq->maydays);
4311
4312 wq_init_lockdep(wq);
4313 INIT_LIST_HEAD(&wq->list);
4314
4315 if (alloc_and_link_pwqs(wq) < 0)
4316 goto err_unreg_lockdep;
4317
4318 if (wq_online && init_rescuer(wq) < 0)
4319 goto err_destroy;
4320
4321 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4322 goto err_destroy;
4323
4324
4325
4326
4327
4328
4329 mutex_lock(&wq_pool_mutex);
4330
4331 mutex_lock(&wq->mutex);
4332 for_each_pwq(pwq, wq)
4333 pwq_adjust_max_active(pwq);
4334 mutex_unlock(&wq->mutex);
4335
4336 list_add_tail_rcu(&wq->list, &workqueues);
4337
4338 mutex_unlock(&wq_pool_mutex);
4339
4340 return wq;
4341
4342err_unreg_lockdep:
4343 wq_unregister_lockdep(wq);
4344 wq_free_lockdep(wq);
4345err_free_wq:
4346 free_workqueue_attrs(wq->unbound_attrs);
4347 kfree(wq);
4348 return NULL;
4349err_destroy:
4350 destroy_workqueue(wq);
4351 return NULL;
4352}
4353EXPORT_SYMBOL_GPL(alloc_workqueue);
4354
4355static bool pwq_busy(struct pool_workqueue *pwq)
4356{
4357 int i;
4358
4359 for (i = 0; i < WORK_NR_COLORS; i++)
4360 if (pwq->nr_in_flight[i])
4361 return true;
4362
4363 if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4364 return true;
4365 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4366 return true;
4367
4368 return false;
4369}
4370
4371
4372
4373
4374
4375
4376
4377void destroy_workqueue(struct workqueue_struct *wq)
4378{
4379 struct pool_workqueue *pwq;
4380 int node;
4381
4382
4383
4384
4385
4386 workqueue_sysfs_unregister(wq);
4387
4388
4389 drain_workqueue(wq);
4390
4391
4392 if (wq->rescuer) {
4393 struct worker *rescuer = wq->rescuer;
4394
4395
4396 raw_spin_lock_irq(&wq_mayday_lock);
4397 wq->rescuer = NULL;
4398 raw_spin_unlock_irq(&wq_mayday_lock);
4399
4400
4401 kthread_stop(rescuer->task);
4402 kfree(rescuer);
4403 }
4404
4405
4406
4407
4408
4409 mutex_lock(&wq_pool_mutex);
4410 mutex_lock(&wq->mutex);
4411 for_each_pwq(pwq, wq) {
4412 raw_spin_lock_irq(&pwq->pool->lock);
4413 if (WARN_ON(pwq_busy(pwq))) {
4414 pr_warn("%s: %s has the following busy pwq\n",
4415 __func__, wq->name);
4416 show_pwq(pwq);
4417 raw_spin_unlock_irq(&pwq->pool->lock);
4418 mutex_unlock(&wq->mutex);
4419 mutex_unlock(&wq_pool_mutex);
4420 show_workqueue_state();
4421 return;
4422 }
4423 raw_spin_unlock_irq(&pwq->pool->lock);
4424 }
4425 mutex_unlock(&wq->mutex);
4426
4427
4428
4429
4430
4431 list_del_rcu(&wq->list);
4432 mutex_unlock(&wq_pool_mutex);
4433
4434 if (!(wq->flags & WQ_UNBOUND)) {
4435 wq_unregister_lockdep(wq);
4436
4437
4438
4439
4440 call_rcu(&wq->rcu, rcu_free_wq);
4441 } else {
4442
4443
4444
4445
4446
4447 for_each_node(node) {
4448 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4449 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4450 put_pwq_unlocked(pwq);
4451 }
4452
4453
4454
4455
4456
4457 pwq = wq->dfl_pwq;
4458 wq->dfl_pwq = NULL;
4459 put_pwq_unlocked(pwq);
4460 }
4461}
4462EXPORT_SYMBOL_GPL(destroy_workqueue);
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4475{
4476 struct pool_workqueue *pwq;
4477
4478
4479 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4480 return;
4481
4482 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4483
4484 mutex_lock(&wq->mutex);
4485
4486 wq->flags &= ~__WQ_ORDERED;
4487 wq->saved_max_active = max_active;
4488
4489 for_each_pwq(pwq, wq)
4490 pwq_adjust_max_active(pwq);
4491
4492 mutex_unlock(&wq->mutex);
4493}
4494EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504struct work_struct *current_work(void)
4505{
4506 struct worker *worker = current_wq_worker();
4507
4508 return worker ? worker->current_work : NULL;
4509}
4510EXPORT_SYMBOL(current_work);
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520bool current_is_workqueue_rescuer(void)
4521{
4522 struct worker *worker = current_wq_worker();
4523
4524 return worker && worker->rescue_wq;
4525}
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4546{
4547 struct pool_workqueue *pwq;
4548 bool ret;
4549
4550 rcu_read_lock();
4551 preempt_disable();
4552
4553 if (cpu == WORK_CPU_UNBOUND)
4554 cpu = smp_processor_id();
4555
4556 if (!(wq->flags & WQ_UNBOUND))
4557 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4558 else
4559 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4560
4561 ret = !list_empty(&pwq->delayed_works);
4562 preempt_enable();
4563 rcu_read_unlock();
4564
4565 return ret;
4566}
4567EXPORT_SYMBOL_GPL(workqueue_congested);
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580unsigned int work_busy(struct work_struct *work)
4581{
4582 struct worker_pool *pool;
4583 unsigned long flags;
4584 unsigned int ret = 0;
4585
4586 if (work_pending(work))
4587 ret |= WORK_BUSY_PENDING;
4588
4589 rcu_read_lock();
4590 pool = get_work_pool(work);
4591 if (pool) {
4592 raw_spin_lock_irqsave(&pool->lock, flags);
4593 if (find_worker_executing_work(pool, work))
4594 ret |= WORK_BUSY_RUNNING;
4595 raw_spin_unlock_irqrestore(&pool->lock, flags);
4596 }
4597 rcu_read_unlock();
4598
4599 return ret;
4600}
4601EXPORT_SYMBOL_GPL(work_busy);
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613void set_worker_desc(const char *fmt, ...)
4614{
4615 struct worker *worker = current_wq_worker();
4616 va_list args;
4617
4618 if (worker) {
4619 va_start(args, fmt);
4620 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4621 va_end(args);
4622 }
4623}
4624EXPORT_SYMBOL_GPL(set_worker_desc);
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639void print_worker_info(const char *log_lvl, struct task_struct *task)
4640{
4641 work_func_t *fn = NULL;
4642 char name[WQ_NAME_LEN] = { };
4643 char desc[WORKER_DESC_LEN] = { };
4644 struct pool_workqueue *pwq = NULL;
4645 struct workqueue_struct *wq = NULL;
4646 struct worker *worker;
4647
4648 if (!(task->flags & PF_WQ_WORKER))
4649 return;
4650
4651
4652
4653
4654
4655 worker = kthread_probe_data(task);
4656
4657
4658
4659
4660
4661 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4662 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4663 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4664 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4665 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
4666
4667 if (fn || name[0] || desc[0]) {
4668 printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
4669 if (strcmp(name, desc))
4670 pr_cont(" (%s)", desc);
4671 pr_cont("\n");
4672 }
4673}
4674
4675static void pr_cont_pool_info(struct worker_pool *pool)
4676{
4677 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4678 if (pool->node != NUMA_NO_NODE)
4679 pr_cont(" node=%d", pool->node);
4680 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4681}
4682
4683static void pr_cont_work(bool comma, struct work_struct *work)
4684{
4685 if (work->func == wq_barrier_func) {
4686 struct wq_barrier *barr;
4687
4688 barr = container_of(work, struct wq_barrier, work);
4689
4690 pr_cont("%s BAR(%d)", comma ? "," : "",
4691 task_pid_nr(barr->task));
4692 } else {
4693 pr_cont("%s %ps", comma ? "," : "", work->func);
4694 }
4695}
4696
4697static void show_pwq(struct pool_workqueue *pwq)
4698{
4699 struct worker_pool *pool = pwq->pool;
4700 struct work_struct *work;
4701 struct worker *worker;
4702 bool has_in_flight = false, has_pending = false;
4703 int bkt;
4704
4705 pr_info(" pwq %d:", pool->id);
4706 pr_cont_pool_info(pool);
4707
4708 pr_cont(" active=%d/%d refcnt=%d%s\n",
4709 pwq->nr_active, pwq->max_active, pwq->refcnt,
4710 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4711
4712 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4713 if (worker->current_pwq == pwq) {
4714 has_in_flight = true;
4715 break;
4716 }
4717 }
4718 if (has_in_flight) {
4719 bool comma = false;
4720
4721 pr_info(" in-flight:");
4722 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4723 if (worker->current_pwq != pwq)
4724 continue;
4725
4726 pr_cont("%s %d%s:%ps", comma ? "," : "",
4727 task_pid_nr(worker->task),
4728 worker->rescue_wq ? "(RESCUER)" : "",
4729 worker->current_func);
4730 list_for_each_entry(work, &worker->scheduled, entry)
4731 pr_cont_work(false, work);
4732 comma = true;
4733 }
4734 pr_cont("\n");
4735 }
4736
4737 list_for_each_entry(work, &pool->worklist, entry) {
4738 if (get_work_pwq(work) == pwq) {
4739 has_pending = true;
4740 break;
4741 }
4742 }
4743 if (has_pending) {
4744 bool comma = false;
4745
4746 pr_info(" pending:");
4747 list_for_each_entry(work, &pool->worklist, entry) {
4748 if (get_work_pwq(work) != pwq)
4749 continue;
4750
4751 pr_cont_work(comma, work);
4752 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4753 }
4754 pr_cont("\n");
4755 }
4756
4757 if (!list_empty(&pwq->delayed_works)) {
4758 bool comma = false;
4759
4760 pr_info(" delayed:");
4761 list_for_each_entry(work, &pwq->delayed_works, entry) {
4762 pr_cont_work(comma, work);
4763 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4764 }
4765 pr_cont("\n");
4766 }
4767}
4768
4769
4770
4771
4772
4773
4774
4775void show_workqueue_state(void)
4776{
4777 struct workqueue_struct *wq;
4778 struct worker_pool *pool;
4779 unsigned long flags;
4780 int pi;
4781
4782 rcu_read_lock();
4783
4784 pr_info("Showing busy workqueues and worker pools:\n");
4785
4786 list_for_each_entry_rcu(wq, &workqueues, list) {
4787 struct pool_workqueue *pwq;
4788 bool idle = true;
4789
4790 for_each_pwq(pwq, wq) {
4791 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4792 idle = false;
4793 break;
4794 }
4795 }
4796 if (idle)
4797 continue;
4798
4799 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4800
4801 for_each_pwq(pwq, wq) {
4802 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
4803 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4804 show_pwq(pwq);
4805 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
4806
4807
4808
4809
4810
4811 touch_nmi_watchdog();
4812 }
4813 }
4814
4815 for_each_pool(pool, pi) {
4816 struct worker *worker;
4817 bool first = true;
4818
4819 raw_spin_lock_irqsave(&pool->lock, flags);
4820 if (pool->nr_workers == pool->nr_idle)
4821 goto next_pool;
4822
4823 pr_info("pool %d:", pool->id);
4824 pr_cont_pool_info(pool);
4825 pr_cont(" hung=%us workers=%d",
4826 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4827 pool->nr_workers);
4828 if (pool->manager)
4829 pr_cont(" manager: %d",
4830 task_pid_nr(pool->manager->task));
4831 list_for_each_entry(worker, &pool->idle_list, entry) {
4832 pr_cont(" %s%d", first ? "idle: " : "",
4833 task_pid_nr(worker->task));
4834 first = false;
4835 }
4836 pr_cont("\n");
4837 next_pool:
4838 raw_spin_unlock_irqrestore(&pool->lock, flags);
4839
4840
4841
4842
4843
4844 touch_nmi_watchdog();
4845 }
4846
4847 rcu_read_unlock();
4848}
4849
4850
4851void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
4852{
4853 int off;
4854
4855
4856 off = strscpy(buf, task->comm, size);
4857 if (off < 0)
4858 return;
4859
4860
4861 mutex_lock(&wq_pool_attach_mutex);
4862
4863 if (task->flags & PF_WQ_WORKER) {
4864 struct worker *worker = kthread_data(task);
4865 struct worker_pool *pool = worker->pool;
4866
4867 if (pool) {
4868 raw_spin_lock_irq(&pool->lock);
4869
4870
4871
4872
4873
4874 if (worker->desc[0] != '\0') {
4875 if (worker->current_work)
4876 scnprintf(buf + off, size - off, "+%s",
4877 worker->desc);
4878 else
4879 scnprintf(buf + off, size - off, "-%s",
4880 worker->desc);
4881 }
4882 raw_spin_unlock_irq(&pool->lock);
4883 }
4884 }
4885
4886 mutex_unlock(&wq_pool_attach_mutex);
4887}
4888
4889#ifdef CONFIG_SMP
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906static void unbind_workers(int cpu)
4907{
4908 struct worker_pool *pool;
4909 struct worker *worker;
4910
4911 for_each_cpu_worker_pool(pool, cpu) {
4912 mutex_lock(&wq_pool_attach_mutex);
4913 raw_spin_lock_irq(&pool->lock);
4914
4915
4916
4917
4918
4919
4920
4921
4922 for_each_pool_worker(worker, pool)
4923 worker->flags |= WORKER_UNBOUND;
4924
4925 pool->flags |= POOL_DISASSOCIATED;
4926
4927 raw_spin_unlock_irq(&pool->lock);
4928
4929 for_each_pool_worker(worker, pool) {
4930 kthread_set_per_cpu(worker->task, -1);
4931 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
4932 }
4933
4934 mutex_unlock(&wq_pool_attach_mutex);
4935
4936
4937
4938
4939
4940
4941
4942 schedule();
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952 atomic_set(&pool->nr_running, 0);
4953
4954
4955
4956
4957
4958
4959 raw_spin_lock_irq(&pool->lock);
4960 wake_up_worker(pool);
4961 raw_spin_unlock_irq(&pool->lock);
4962 }
4963}
4964
4965
4966
4967
4968
4969
4970
4971static void rebind_workers(struct worker_pool *pool)
4972{
4973 struct worker *worker;
4974
4975 lockdep_assert_held(&wq_pool_attach_mutex);
4976
4977
4978
4979
4980
4981
4982
4983
4984 for_each_pool_worker(worker, pool) {
4985 kthread_set_per_cpu(worker->task, pool->cpu);
4986 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4987 pool->attrs->cpumask) < 0);
4988 }
4989
4990 raw_spin_lock_irq(&pool->lock);
4991
4992 pool->flags &= ~POOL_DISASSOCIATED;
4993
4994 for_each_pool_worker(worker, pool) {
4995 unsigned int worker_flags = worker->flags;
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005 if (worker_flags & WORKER_IDLE)
5006 wake_up_process(worker->task);
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
5024 worker_flags |= WORKER_REBOUND;
5025 worker_flags &= ~WORKER_UNBOUND;
5026 WRITE_ONCE(worker->flags, worker_flags);
5027 }
5028
5029 raw_spin_unlock_irq(&pool->lock);
5030}
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
5043{
5044 static cpumask_t cpumask;
5045 struct worker *worker;
5046
5047 lockdep_assert_held(&wq_pool_attach_mutex);
5048
5049
5050 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
5051 return;
5052
5053 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
5054
5055
5056 for_each_pool_worker(worker, pool)
5057 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
5058}
5059
5060int workqueue_prepare_cpu(unsigned int cpu)
5061{
5062 struct worker_pool *pool;
5063
5064 for_each_cpu_worker_pool(pool, cpu) {
5065 if (pool->nr_workers)
5066 continue;
5067 if (!create_worker(pool))
5068 return -ENOMEM;
5069 }
5070 return 0;
5071}
5072
5073int workqueue_online_cpu(unsigned int cpu)
5074{
5075 struct worker_pool *pool;
5076 struct workqueue_struct *wq;
5077 int pi;
5078
5079 mutex_lock(&wq_pool_mutex);
5080
5081 for_each_pool(pool, pi) {
5082 mutex_lock(&wq_pool_attach_mutex);
5083
5084 if (pool->cpu == cpu)
5085 rebind_workers(pool);
5086 else if (pool->cpu < 0)
5087 restore_unbound_workers_cpumask(pool, cpu);
5088
5089 mutex_unlock(&wq_pool_attach_mutex);
5090 }
5091
5092
5093 list_for_each_entry(wq, &workqueues, list)
5094 wq_update_unbound_numa(wq, cpu, true);
5095
5096 mutex_unlock(&wq_pool_mutex);
5097 return 0;
5098}
5099
5100int workqueue_offline_cpu(unsigned int cpu)
5101{
5102 struct workqueue_struct *wq;
5103
5104
5105 if (WARN_ON(cpu != smp_processor_id()))
5106 return -1;
5107
5108 unbind_workers(cpu);
5109
5110
5111 mutex_lock(&wq_pool_mutex);
5112 list_for_each_entry(wq, &workqueues, list)
5113 wq_update_unbound_numa(wq, cpu, false);
5114 mutex_unlock(&wq_pool_mutex);
5115
5116 return 0;
5117}
5118
5119struct work_for_cpu {
5120 struct work_struct work;
5121 long (*fn)(void *);
5122 void *arg;
5123 long ret;
5124};
5125
5126static void work_for_cpu_fn(struct work_struct *work)
5127{
5128 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5129
5130 wfc->ret = wfc->fn(wfc->arg);
5131}
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
5145{
5146 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
5147
5148 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5149 schedule_work_on(cpu, &wfc.work);
5150 flush_work(&wfc.work);
5151 destroy_work_on_stack(&wfc.work);
5152 return wfc.ret;
5153}
5154EXPORT_SYMBOL_GPL(work_on_cpu);
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5168{
5169 long ret = -ENODEV;
5170
5171 get_online_cpus();
5172 if (cpu_online(cpu))
5173 ret = work_on_cpu(cpu, fn, arg);
5174 put_online_cpus();
5175 return ret;
5176}
5177EXPORT_SYMBOL_GPL(work_on_cpu_safe);
5178#endif
5179
5180#ifdef CONFIG_FREEZER
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192void freeze_workqueues_begin(void)
5193{
5194 struct workqueue_struct *wq;
5195 struct pool_workqueue *pwq;
5196
5197 mutex_lock(&wq_pool_mutex);
5198
5199 WARN_ON_ONCE(workqueue_freezing);
5200 workqueue_freezing = true;
5201
5202 list_for_each_entry(wq, &workqueues, list) {
5203 mutex_lock(&wq->mutex);
5204 for_each_pwq(pwq, wq)
5205 pwq_adjust_max_active(pwq);
5206 mutex_unlock(&wq->mutex);
5207 }
5208
5209 mutex_unlock(&wq_pool_mutex);
5210}
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225bool freeze_workqueues_busy(void)
5226{
5227 bool busy = false;
5228 struct workqueue_struct *wq;
5229 struct pool_workqueue *pwq;
5230
5231 mutex_lock(&wq_pool_mutex);
5232
5233 WARN_ON_ONCE(!workqueue_freezing);
5234
5235 list_for_each_entry(wq, &workqueues, list) {
5236 if (!(wq->flags & WQ_FREEZABLE))
5237 continue;
5238
5239
5240
5241
5242 rcu_read_lock();
5243 for_each_pwq(pwq, wq) {
5244 WARN_ON_ONCE(pwq->nr_active < 0);
5245 if (pwq->nr_active) {
5246 busy = true;
5247 rcu_read_unlock();
5248 goto out_unlock;
5249 }
5250 }
5251 rcu_read_unlock();
5252 }
5253out_unlock:
5254 mutex_unlock(&wq_pool_mutex);
5255 return busy;
5256}
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267void thaw_workqueues(void)
5268{
5269 struct workqueue_struct *wq;
5270 struct pool_workqueue *pwq;
5271
5272 mutex_lock(&wq_pool_mutex);
5273
5274 if (!workqueue_freezing)
5275 goto out_unlock;
5276
5277 workqueue_freezing = false;
5278
5279
5280 list_for_each_entry(wq, &workqueues, list) {
5281 mutex_lock(&wq->mutex);
5282 for_each_pwq(pwq, wq)
5283 pwq_adjust_max_active(pwq);
5284 mutex_unlock(&wq->mutex);
5285 }
5286
5287out_unlock:
5288 mutex_unlock(&wq_pool_mutex);
5289}
5290#endif
5291
5292static int workqueue_apply_unbound_cpumask(void)
5293{
5294 LIST_HEAD(ctxs);
5295 int ret = 0;
5296 struct workqueue_struct *wq;
5297 struct apply_wqattrs_ctx *ctx, *n;
5298
5299 lockdep_assert_held(&wq_pool_mutex);
5300
5301 list_for_each_entry(wq, &workqueues, list) {
5302 if (!(wq->flags & WQ_UNBOUND))
5303 continue;
5304
5305 if (wq->flags & __WQ_ORDERED)
5306 continue;
5307
5308 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
5309 if (!ctx) {
5310 ret = -ENOMEM;
5311 break;
5312 }
5313
5314 list_add_tail(&ctx->list, &ctxs);
5315 }
5316
5317 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5318 if (!ret)
5319 apply_wqattrs_commit(ctx);
5320 apply_wqattrs_cleanup(ctx);
5321 }
5322
5323 return ret;
5324}
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5339{
5340 int ret = -EINVAL;
5341 cpumask_var_t saved_cpumask;
5342
5343 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
5344 return -ENOMEM;
5345
5346
5347
5348
5349
5350 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5351 if (!cpumask_empty(cpumask)) {
5352 apply_wqattrs_lock();
5353
5354
5355 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5356
5357
5358 cpumask_copy(wq_unbound_cpumask, cpumask);
5359 ret = workqueue_apply_unbound_cpumask();
5360
5361
5362 if (ret < 0)
5363 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5364
5365 apply_wqattrs_unlock();
5366 }
5367
5368 free_cpumask_var(saved_cpumask);
5369 return ret;
5370}
5371
5372#ifdef CONFIG_SYSFS
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388struct wq_device {
5389 struct workqueue_struct *wq;
5390 struct device dev;
5391};
5392
5393static struct workqueue_struct *dev_to_wq(struct device *dev)
5394{
5395 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5396
5397 return wq_dev->wq;
5398}
5399
5400static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5401 char *buf)
5402{
5403 struct workqueue_struct *wq = dev_to_wq(dev);
5404
5405 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5406}
5407static DEVICE_ATTR_RO(per_cpu);
5408
5409static ssize_t max_active_show(struct device *dev,
5410 struct device_attribute *attr, char *buf)
5411{
5412 struct workqueue_struct *wq = dev_to_wq(dev);
5413
5414 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5415}
5416
5417static ssize_t max_active_store(struct device *dev,
5418 struct device_attribute *attr, const char *buf,
5419 size_t count)
5420{
5421 struct workqueue_struct *wq = dev_to_wq(dev);
5422 int val;
5423
5424 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5425 return -EINVAL;
5426
5427 workqueue_set_max_active(wq, val);
5428 return count;
5429}
5430static DEVICE_ATTR_RW(max_active);
5431
5432static struct attribute *wq_sysfs_attrs[] = {
5433 &dev_attr_per_cpu.attr,
5434 &dev_attr_max_active.attr,
5435 NULL,
5436};
5437ATTRIBUTE_GROUPS(wq_sysfs);
5438
5439static ssize_t wq_pool_ids_show(struct device *dev,
5440 struct device_attribute *attr, char *buf)
5441{
5442 struct workqueue_struct *wq = dev_to_wq(dev);
5443 const char *delim = "";
5444 int node, written = 0;
5445
5446 get_online_cpus();
5447 rcu_read_lock();
5448 for_each_node(node) {
5449 written += scnprintf(buf + written, PAGE_SIZE - written,
5450 "%s%d:%d", delim, node,
5451 unbound_pwq_by_node(wq, node)->pool->id);
5452 delim = " ";
5453 }
5454 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5455 rcu_read_unlock();
5456 put_online_cpus();
5457
5458 return written;
5459}
5460
5461static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5462 char *buf)
5463{
5464 struct workqueue_struct *wq = dev_to_wq(dev);
5465 int written;
5466
5467 mutex_lock(&wq->mutex);
5468 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5469 mutex_unlock(&wq->mutex);
5470
5471 return written;
5472}
5473
5474
5475static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5476{
5477 struct workqueue_attrs *attrs;
5478
5479 lockdep_assert_held(&wq_pool_mutex);
5480
5481 attrs = alloc_workqueue_attrs();
5482 if (!attrs)
5483 return NULL;
5484
5485 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5486 return attrs;
5487}
5488
5489static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5490 const char *buf, size_t count)
5491{
5492 struct workqueue_struct *wq = dev_to_wq(dev);
5493 struct workqueue_attrs *attrs;
5494 int ret = -ENOMEM;
5495
5496 apply_wqattrs_lock();
5497
5498 attrs = wq_sysfs_prep_attrs(wq);
5499 if (!attrs)
5500 goto out_unlock;
5501
5502 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5503 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5504 ret = apply_workqueue_attrs_locked(wq, attrs);
5505 else
5506 ret = -EINVAL;
5507
5508out_unlock:
5509 apply_wqattrs_unlock();
5510 free_workqueue_attrs(attrs);
5511 return ret ?: count;
5512}
5513
5514static ssize_t wq_cpumask_show(struct device *dev,
5515 struct device_attribute *attr, char *buf)
5516{
5517 struct workqueue_struct *wq = dev_to_wq(dev);
5518 int written;
5519
5520 mutex_lock(&wq->mutex);
5521 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5522 cpumask_pr_args(wq->unbound_attrs->cpumask));
5523 mutex_unlock(&wq->mutex);
5524 return written;
5525}
5526
5527static ssize_t wq_cpumask_store(struct device *dev,
5528 struct device_attribute *attr,
5529 const char *buf, size_t count)
5530{
5531 struct workqueue_struct *wq = dev_to_wq(dev);
5532 struct workqueue_attrs *attrs;
5533 int ret = -ENOMEM;
5534
5535 apply_wqattrs_lock();
5536
5537 attrs = wq_sysfs_prep_attrs(wq);
5538 if (!attrs)
5539 goto out_unlock;
5540
5541 ret = cpumask_parse(buf, attrs->cpumask);
5542 if (!ret)
5543 ret = apply_workqueue_attrs_locked(wq, attrs);
5544
5545out_unlock:
5546 apply_wqattrs_unlock();
5547 free_workqueue_attrs(attrs);
5548 return ret ?: count;
5549}
5550
5551static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5552 char *buf)
5553{
5554 struct workqueue_struct *wq = dev_to_wq(dev);
5555 int written;
5556
5557 mutex_lock(&wq->mutex);
5558 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5559 !wq->unbound_attrs->no_numa);
5560 mutex_unlock(&wq->mutex);
5561
5562 return written;
5563}
5564
5565static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5566 const char *buf, size_t count)
5567{
5568 struct workqueue_struct *wq = dev_to_wq(dev);
5569 struct workqueue_attrs *attrs;
5570 int v, ret = -ENOMEM;
5571
5572 apply_wqattrs_lock();
5573
5574 attrs = wq_sysfs_prep_attrs(wq);
5575 if (!attrs)
5576 goto out_unlock;
5577
5578 ret = -EINVAL;
5579 if (sscanf(buf, "%d", &v) == 1) {
5580 attrs->no_numa = !v;
5581 ret = apply_workqueue_attrs_locked(wq, attrs);
5582 }
5583
5584out_unlock:
5585 apply_wqattrs_unlock();
5586 free_workqueue_attrs(attrs);
5587 return ret ?: count;
5588}
5589
5590static struct device_attribute wq_sysfs_unbound_attrs[] = {
5591 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5592 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5593 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5594 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5595 __ATTR_NULL,
5596};
5597
5598static struct bus_type wq_subsys = {
5599 .name = "workqueue",
5600 .dev_groups = wq_sysfs_groups,
5601};
5602
5603static ssize_t wq_unbound_cpumask_show(struct device *dev,
5604 struct device_attribute *attr, char *buf)
5605{
5606 int written;
5607
5608 mutex_lock(&wq_pool_mutex);
5609 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5610 cpumask_pr_args(wq_unbound_cpumask));
5611 mutex_unlock(&wq_pool_mutex);
5612
5613 return written;
5614}
5615
5616static ssize_t wq_unbound_cpumask_store(struct device *dev,
5617 struct device_attribute *attr, const char *buf, size_t count)
5618{
5619 cpumask_var_t cpumask;
5620 int ret;
5621
5622 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5623 return -ENOMEM;
5624
5625 ret = cpumask_parse(buf, cpumask);
5626 if (!ret)
5627 ret = workqueue_set_unbound_cpumask(cpumask);
5628
5629 free_cpumask_var(cpumask);
5630 return ret ? ret : count;
5631}
5632
5633static struct device_attribute wq_sysfs_cpumask_attr =
5634 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5635 wq_unbound_cpumask_store);
5636
5637static int __init wq_sysfs_init(void)
5638{
5639 int err;
5640
5641 err = subsys_virtual_register(&wq_subsys, NULL);
5642 if (err)
5643 return err;
5644
5645 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5646}
5647core_initcall(wq_sysfs_init);
5648
5649static void wq_device_release(struct device *dev)
5650{
5651 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5652
5653 kfree(wq_dev);
5654}
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671int workqueue_sysfs_register(struct workqueue_struct *wq)
5672{
5673 struct wq_device *wq_dev;
5674 int ret;
5675
5676
5677
5678
5679
5680
5681 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5682 return -EINVAL;
5683
5684 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5685 if (!wq_dev)
5686 return -ENOMEM;
5687
5688 wq_dev->wq = wq;
5689 wq_dev->dev.bus = &wq_subsys;
5690 wq_dev->dev.release = wq_device_release;
5691 dev_set_name(&wq_dev->dev, "%s", wq->name);
5692
5693
5694
5695
5696
5697 dev_set_uevent_suppress(&wq_dev->dev, true);
5698
5699 ret = device_register(&wq_dev->dev);
5700 if (ret) {
5701 put_device(&wq_dev->dev);
5702 wq->wq_dev = NULL;
5703 return ret;
5704 }
5705
5706 if (wq->flags & WQ_UNBOUND) {
5707 struct device_attribute *attr;
5708
5709 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5710 ret = device_create_file(&wq_dev->dev, attr);
5711 if (ret) {
5712 device_unregister(&wq_dev->dev);
5713 wq->wq_dev = NULL;
5714 return ret;
5715 }
5716 }
5717 }
5718
5719 dev_set_uevent_suppress(&wq_dev->dev, false);
5720 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5721 return 0;
5722}
5723
5724
5725
5726
5727
5728
5729
5730static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5731{
5732 struct wq_device *wq_dev = wq->wq_dev;
5733
5734 if (!wq->wq_dev)
5735 return;
5736
5737 wq->wq_dev = NULL;
5738 device_unregister(&wq_dev->dev);
5739}
5740#else
5741static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5742#endif
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761#ifdef CONFIG_WQ_WATCHDOG
5762
5763static unsigned long wq_watchdog_thresh = 30;
5764static struct timer_list wq_watchdog_timer;
5765
5766static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5767static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5768
5769static void wq_watchdog_reset_touched(void)
5770{
5771 int cpu;
5772
5773 wq_watchdog_touched = jiffies;
5774 for_each_possible_cpu(cpu)
5775 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5776}
5777
5778static void wq_watchdog_timer_fn(struct timer_list *unused)
5779{
5780 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5781 bool lockup_detected = false;
5782 unsigned long now = jiffies;
5783 struct worker_pool *pool;
5784 int pi;
5785
5786 if (!thresh)
5787 return;
5788
5789 rcu_read_lock();
5790
5791 for_each_pool(pool, pi) {
5792 unsigned long pool_ts, touched, ts;
5793
5794 if (list_empty(&pool->worklist))
5795 continue;
5796
5797
5798
5799
5800
5801 kvm_check_and_clear_guest_paused();
5802
5803
5804 if (pool->cpu >= 0)
5805 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
5806 else
5807 touched = READ_ONCE(wq_watchdog_touched);
5808 pool_ts = READ_ONCE(pool->watchdog_ts);
5809
5810 if (time_after(pool_ts, touched))
5811 ts = pool_ts;
5812 else
5813 ts = touched;
5814
5815
5816 if (time_after(now, ts + thresh)) {
5817 lockup_detected = true;
5818 pr_emerg("BUG: workqueue lockup - pool");
5819 pr_cont_pool_info(pool);
5820 pr_cont(" stuck for %us!\n",
5821 jiffies_to_msecs(now - pool_ts) / 1000);
5822 }
5823 }
5824
5825 rcu_read_unlock();
5826
5827 if (lockup_detected)
5828 show_workqueue_state();
5829
5830 wq_watchdog_reset_touched();
5831 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5832}
5833
5834notrace void wq_watchdog_touch(int cpu)
5835{
5836 if (cpu >= 0)
5837 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5838
5839 wq_watchdog_touched = jiffies;
5840}
5841
5842static void wq_watchdog_set_thresh(unsigned long thresh)
5843{
5844 wq_watchdog_thresh = 0;
5845 del_timer_sync(&wq_watchdog_timer);
5846
5847 if (thresh) {
5848 wq_watchdog_thresh = thresh;
5849 wq_watchdog_reset_touched();
5850 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5851 }
5852}
5853
5854static int wq_watchdog_param_set_thresh(const char *val,
5855 const struct kernel_param *kp)
5856{
5857 unsigned long thresh;
5858 int ret;
5859
5860 ret = kstrtoul(val, 0, &thresh);
5861 if (ret)
5862 return ret;
5863
5864 if (system_wq)
5865 wq_watchdog_set_thresh(thresh);
5866 else
5867 wq_watchdog_thresh = thresh;
5868
5869 return 0;
5870}
5871
5872static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5873 .set = wq_watchdog_param_set_thresh,
5874 .get = param_get_ulong,
5875};
5876
5877module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5878 0644);
5879
5880static void wq_watchdog_init(void)
5881{
5882 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
5883 wq_watchdog_set_thresh(wq_watchdog_thresh);
5884}
5885
5886#else
5887
5888static inline void wq_watchdog_init(void) { }
5889
5890#endif
5891
5892static void __init wq_numa_init(void)
5893{
5894 cpumask_var_t *tbl;
5895 int node, cpu;
5896
5897 if (num_possible_nodes() <= 1)
5898 return;
5899
5900 if (wq_disable_numa) {
5901 pr_info("workqueue: NUMA affinity support disabled\n");
5902 return;
5903 }
5904
5905 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
5906 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5907
5908
5909
5910
5911
5912
5913 tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
5914 BUG_ON(!tbl);
5915
5916 for_each_node(node)
5917 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5918 node_online(node) ? node : NUMA_NO_NODE));
5919
5920 for_each_possible_cpu(cpu) {
5921 node = cpu_to_node(cpu);
5922 if (WARN_ON(node == NUMA_NO_NODE)) {
5923 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5924
5925 return;
5926 }
5927 cpumask_set_cpu(cpu, tbl[node]);
5928 }
5929
5930 wq_numa_possible_cpumask = tbl;
5931 wq_numa_enabled = true;
5932}
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944void __init workqueue_init_early(void)
5945{
5946 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5947 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
5948 int i, cpu;
5949
5950 BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5951
5952 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5953 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
5954
5955 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5956
5957
5958 for_each_possible_cpu(cpu) {
5959 struct worker_pool *pool;
5960
5961 i = 0;
5962 for_each_cpu_worker_pool(pool, cpu) {
5963 BUG_ON(init_worker_pool(pool));
5964 pool->cpu = cpu;
5965 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
5966 pool->attrs->nice = std_nice[i++];
5967 pool->node = cpu_to_node(cpu);
5968
5969
5970 mutex_lock(&wq_pool_mutex);
5971 BUG_ON(worker_pool_assign_id(pool));
5972 mutex_unlock(&wq_pool_mutex);
5973 }
5974 }
5975
5976
5977 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5978 struct workqueue_attrs *attrs;
5979
5980 BUG_ON(!(attrs = alloc_workqueue_attrs()));
5981 attrs->nice = std_nice[i];
5982 unbound_std_wq_attrs[i] = attrs;
5983
5984
5985
5986
5987
5988
5989 BUG_ON(!(attrs = alloc_workqueue_attrs()));
5990 attrs->nice = std_nice[i];
5991 attrs->no_numa = true;
5992 ordered_wq_attrs[i] = attrs;
5993 }
5994
5995 system_wq = alloc_workqueue("events", 0, 0);
5996 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5997 system_long_wq = alloc_workqueue("events_long", 0, 0);
5998 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5999 WQ_UNBOUND_MAX_ACTIVE);
6000 system_freezable_wq = alloc_workqueue("events_freezable",
6001 WQ_FREEZABLE, 0);
6002 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
6003 WQ_POWER_EFFICIENT, 0);
6004 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
6005 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
6006 0);
6007 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
6008 !system_unbound_wq || !system_freezable_wq ||
6009 !system_power_efficient_wq ||
6010 !system_freezable_power_efficient_wq);
6011}
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022void __init workqueue_init(void)
6023{
6024 struct workqueue_struct *wq;
6025 struct worker_pool *pool;
6026 int cpu, bkt;
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037 wq_numa_init();
6038
6039 mutex_lock(&wq_pool_mutex);
6040
6041 for_each_possible_cpu(cpu) {
6042 for_each_cpu_worker_pool(pool, cpu) {
6043 pool->node = cpu_to_node(cpu);
6044 }
6045 }
6046
6047 list_for_each_entry(wq, &workqueues, list) {
6048 wq_update_unbound_numa(wq, smp_processor_id(), true);
6049 WARN(init_rescuer(wq),
6050 "workqueue: failed to create early rescuer for %s",
6051 wq->name);
6052 }
6053
6054 mutex_unlock(&wq_pool_mutex);
6055
6056
6057 for_each_online_cpu(cpu) {
6058 for_each_cpu_worker_pool(pool, cpu) {
6059 pool->flags &= ~POOL_DISASSOCIATED;
6060 BUG_ON(!create_worker(pool));
6061 }
6062 }
6063
6064 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
6065 BUG_ON(!create_worker(pool));
6066
6067 wq_online = true;
6068 wq_watchdog_init();
6069}
6070