1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/export.h>
29#include <linux/kernel.h>
30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/signal.h>
33#include <linux/completion.h>
34#include <linux/workqueue.h>
35#include <linux/slab.h>
36#include <linux/cpu.h>
37#include <linux/notifier.h>
38#include <linux/kthread.h>
39#include <linux/hardirq.h>
40#include <linux/mempolicy.h>
41#include <linux/freezer.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51#include <linux/sched/isolation.h>
52#include <linux/nmi.h>
53#include <linux/kvm_para.h>
54
55#include "workqueue_internal.h"
56
57enum {
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 POOL_MANAGER_ACTIVE = 1 << 0,
75 POOL_DISASSOCIATED = 1 << 2,
76
77
78 WORKER_DIE = 1 << 1,
79 WORKER_IDLE = 1 << 2,
80 WORKER_PREP = 1 << 3,
81 WORKER_CPU_INTENSIVE = 1 << 6,
82 WORKER_UNBOUND = 1 << 7,
83 WORKER_REBOUND = 1 << 8,
84
85 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
86 WORKER_UNBOUND | WORKER_REBOUND,
87
88 NR_STD_WORKER_POOLS = 2,
89
90 UNBOUND_POOL_HASH_ORDER = 6,
91 BUSY_WORKER_HASH_ORDER = 6,
92
93 MAX_IDLE_WORKERS_RATIO = 4,
94 IDLE_WORKER_TIMEOUT = 300 * HZ,
95
96 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
97
98
99 MAYDAY_INTERVAL = HZ / 10,
100 CREATE_COOLDOWN = HZ,
101
102
103
104
105
106 RESCUER_NICE_LEVEL = MIN_NICE,
107 HIGHPRI_NICE_LEVEL = MIN_NICE,
108
109 WQ_NAME_LEN = 24,
110};
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148struct worker_pool {
149 raw_spinlock_t lock;
150 int cpu;
151 int node;
152 int id;
153 unsigned int flags;
154
155 unsigned long watchdog_ts;
156
157 struct list_head worklist;
158
159 int nr_workers;
160 int nr_idle;
161
162 struct list_head idle_list;
163 struct timer_list idle_timer;
164 struct timer_list mayday_timer;
165
166
167 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
168
169
170 struct worker *manager;
171 struct list_head workers;
172 struct completion *detach_completion;
173
174 struct ida worker_ida;
175
176 struct workqueue_attrs *attrs;
177 struct hlist_node hash_node;
178 int refcnt;
179
180
181
182
183
184
185 atomic_t nr_running ____cacheline_aligned_in_smp;
186
187
188
189
190
191 struct rcu_head rcu;
192} ____cacheline_aligned_in_smp;
193
194
195
196
197
198
199
200struct pool_workqueue {
201 struct worker_pool *pool;
202 struct workqueue_struct *wq;
203 int work_color;
204 int flush_color;
205 int refcnt;
206 int nr_in_flight[WORK_NR_COLORS];
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225 int nr_active;
226 int max_active;
227 struct list_head inactive_works;
228 struct list_head pwqs_node;
229 struct list_head mayday_node;
230
231
232
233
234
235
236
237 struct work_struct unbound_release_work;
238 struct rcu_head rcu;
239} __aligned(1 << WORK_STRUCT_FLAG_BITS);
240
241
242
243
244struct wq_flusher {
245 struct list_head list;
246 int flush_color;
247 struct completion done;
248};
249
250struct wq_device;
251
252
253
254
255
256struct workqueue_struct {
257 struct list_head pwqs;
258 struct list_head list;
259
260 struct mutex mutex;
261 int work_color;
262 int flush_color;
263 atomic_t nr_pwqs_to_flush;
264 struct wq_flusher *first_flusher;
265 struct list_head flusher_queue;
266 struct list_head flusher_overflow;
267
268 struct list_head maydays;
269 struct worker *rescuer;
270
271 int nr_drainers;
272 int saved_max_active;
273
274 struct workqueue_attrs *unbound_attrs;
275 struct pool_workqueue *dfl_pwq;
276
277#ifdef CONFIG_SYSFS
278 struct wq_device *wq_dev;
279#endif
280#ifdef CONFIG_LOCKDEP
281 char *lock_name;
282 struct lock_class_key key;
283 struct lockdep_map lockdep_map;
284#endif
285 char name[WQ_NAME_LEN];
286
287
288
289
290
291
292 struct rcu_head rcu;
293
294
295 unsigned int flags ____cacheline_aligned;
296 struct pool_workqueue __percpu *cpu_pwqs;
297 struct pool_workqueue __rcu *numa_pwq_tbl[];
298};
299
300static struct kmem_cache *pwq_cache;
301
302static cpumask_var_t *wq_numa_possible_cpumask;
303
304
305static bool wq_disable_numa;
306module_param_named(disable_numa, wq_disable_numa, bool, 0444);
307
308
309static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
310module_param_named(power_efficient, wq_power_efficient, bool, 0444);
311
312static bool wq_online;
313
314static bool wq_numa_enabled;
315
316
317static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
318
319static DEFINE_MUTEX(wq_pool_mutex);
320static DEFINE_MUTEX(wq_pool_attach_mutex);
321static DEFINE_RAW_SPINLOCK(wq_mayday_lock);
322
323static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
324
325static LIST_HEAD(workqueues);
326static bool workqueue_freezing;
327
328
329static cpumask_var_t wq_unbound_cpumask;
330
331
332static DEFINE_PER_CPU(int, wq_rr_cpu_last);
333
334
335
336
337
338
339#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
340static bool wq_debug_force_rr_cpu = true;
341#else
342static bool wq_debug_force_rr_cpu = false;
343#endif
344module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
345
346
347static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
348
349static DEFINE_IDR(worker_pool_idr);
350
351
352static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
353
354
355static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
356
357
358static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
359
360struct workqueue_struct *system_wq __read_mostly;
361EXPORT_SYMBOL(system_wq);
362struct workqueue_struct *system_highpri_wq __read_mostly;
363EXPORT_SYMBOL_GPL(system_highpri_wq);
364struct workqueue_struct *system_long_wq __read_mostly;
365EXPORT_SYMBOL_GPL(system_long_wq);
366struct workqueue_struct *system_unbound_wq __read_mostly;
367EXPORT_SYMBOL_GPL(system_unbound_wq);
368struct workqueue_struct *system_freezable_wq __read_mostly;
369EXPORT_SYMBOL_GPL(system_freezable_wq);
370struct workqueue_struct *system_power_efficient_wq __read_mostly;
371EXPORT_SYMBOL_GPL(system_power_efficient_wq);
372struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
373EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
374
375static int worker_thread(void *__worker);
376static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
377static void show_pwq(struct pool_workqueue *pwq);
378
379#define CREATE_TRACE_POINTS
380#include <trace/events/workqueue.h>
381
382#define assert_rcu_or_pool_mutex() \
383 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
384 !lockdep_is_held(&wq_pool_mutex), \
385 "RCU or wq_pool_mutex should be held")
386
387#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
388 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
389 !lockdep_is_held(&wq->mutex) && \
390 !lockdep_is_held(&wq_pool_mutex), \
391 "RCU, wq->mutex or wq_pool_mutex should be held")
392
393#define for_each_cpu_worker_pool(pool, cpu) \
394 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
395 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
396 (pool)++)
397
398
399
400
401
402
403
404
405
406
407
408
409
410#define for_each_pool(pool, pi) \
411 idr_for_each_entry(&worker_pool_idr, pool, pi) \
412 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
413 else
414
415
416
417
418
419
420
421
422
423
424
425#define for_each_pool_worker(worker, pool) \
426 list_for_each_entry((worker), &(pool)->workers, node) \
427 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
428 else
429
430
431
432
433
434
435
436
437
438
439
440
441
442#define for_each_pwq(pwq, wq) \
443 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
444 lockdep_is_held(&(wq->mutex)))
445
446#ifdef CONFIG_DEBUG_OBJECTS_WORK
447
448static const struct debug_obj_descr work_debug_descr;
449
450static void *work_debug_hint(void *addr)
451{
452 return ((struct work_struct *) addr)->func;
453}
454
455static bool work_is_static_object(void *addr)
456{
457 struct work_struct *work = addr;
458
459 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
460}
461
462
463
464
465
466static bool work_fixup_init(void *addr, enum debug_obj_state state)
467{
468 struct work_struct *work = addr;
469
470 switch (state) {
471 case ODEBUG_STATE_ACTIVE:
472 cancel_work_sync(work);
473 debug_object_init(work, &work_debug_descr);
474 return true;
475 default:
476 return false;
477 }
478}
479
480
481
482
483
484static bool work_fixup_free(void *addr, enum debug_obj_state state)
485{
486 struct work_struct *work = addr;
487
488 switch (state) {
489 case ODEBUG_STATE_ACTIVE:
490 cancel_work_sync(work);
491 debug_object_free(work, &work_debug_descr);
492 return true;
493 default:
494 return false;
495 }
496}
497
498static const struct debug_obj_descr work_debug_descr = {
499 .name = "work_struct",
500 .debug_hint = work_debug_hint,
501 .is_static_object = work_is_static_object,
502 .fixup_init = work_fixup_init,
503 .fixup_free = work_fixup_free,
504};
505
506static inline void debug_work_activate(struct work_struct *work)
507{
508 debug_object_activate(work, &work_debug_descr);
509}
510
511static inline void debug_work_deactivate(struct work_struct *work)
512{
513 debug_object_deactivate(work, &work_debug_descr);
514}
515
516void __init_work(struct work_struct *work, int onstack)
517{
518 if (onstack)
519 debug_object_init_on_stack(work, &work_debug_descr);
520 else
521 debug_object_init(work, &work_debug_descr);
522}
523EXPORT_SYMBOL_GPL(__init_work);
524
525void destroy_work_on_stack(struct work_struct *work)
526{
527 debug_object_free(work, &work_debug_descr);
528}
529EXPORT_SYMBOL_GPL(destroy_work_on_stack);
530
531void destroy_delayed_work_on_stack(struct delayed_work *work)
532{
533 destroy_timer_on_stack(&work->timer);
534 debug_object_free(&work->work, &work_debug_descr);
535}
536EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
537
538#else
539static inline void debug_work_activate(struct work_struct *work) { }
540static inline void debug_work_deactivate(struct work_struct *work) { }
541#endif
542
543
544
545
546
547
548
549
550static int worker_pool_assign_id(struct worker_pool *pool)
551{
552 int ret;
553
554 lockdep_assert_held(&wq_pool_mutex);
555
556 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
557 GFP_KERNEL);
558 if (ret >= 0) {
559 pool->id = ret;
560 return 0;
561 }
562 return ret;
563}
564
565
566
567
568
569
570
571
572
573
574
575
576
577static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
578 int node)
579{
580 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
581
582
583
584
585
586
587
588 if (unlikely(node == NUMA_NO_NODE))
589 return wq->dfl_pwq;
590
591 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
592}
593
594static unsigned int work_color_to_flags(int color)
595{
596 return color << WORK_STRUCT_COLOR_SHIFT;
597}
598
599static int get_work_color(unsigned long work_data)
600{
601 return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
602 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
603}
604
605static int work_next_color(int color)
606{
607 return (color + 1) % WORK_NR_COLORS;
608}
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630static inline void set_work_data(struct work_struct *work, unsigned long data,
631 unsigned long flags)
632{
633 WARN_ON_ONCE(!work_pending(work));
634 atomic_long_set(&work->data, data | flags | work_static(work));
635}
636
637static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
638 unsigned long extra_flags)
639{
640 set_work_data(work, (unsigned long)pwq,
641 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
642}
643
644static void set_work_pool_and_keep_pending(struct work_struct *work,
645 int pool_id)
646{
647 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
648 WORK_STRUCT_PENDING);
649}
650
651static void set_work_pool_and_clear_pending(struct work_struct *work,
652 int pool_id)
653{
654
655
656
657
658
659
660 smp_wmb();
661 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690 smp_mb();
691}
692
693static void clear_work_data(struct work_struct *work)
694{
695 smp_wmb();
696 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
697}
698
699static struct pool_workqueue *get_work_pwq(struct work_struct *work)
700{
701 unsigned long data = atomic_long_read(&work->data);
702
703 if (data & WORK_STRUCT_PWQ)
704 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
705 else
706 return NULL;
707}
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724static struct worker_pool *get_work_pool(struct work_struct *work)
725{
726 unsigned long data = atomic_long_read(&work->data);
727 int pool_id;
728
729 assert_rcu_or_pool_mutex();
730
731 if (data & WORK_STRUCT_PWQ)
732 return ((struct pool_workqueue *)
733 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
734
735 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
736 if (pool_id == WORK_OFFQ_POOL_NONE)
737 return NULL;
738
739 return idr_find(&worker_pool_idr, pool_id);
740}
741
742
743
744
745
746
747
748
749static int get_work_pool_id(struct work_struct *work)
750{
751 unsigned long data = atomic_long_read(&work->data);
752
753 if (data & WORK_STRUCT_PWQ)
754 return ((struct pool_workqueue *)
755 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
756
757 return data >> WORK_OFFQ_POOL_SHIFT;
758}
759
760static void mark_work_canceling(struct work_struct *work)
761{
762 unsigned long pool_id = get_work_pool_id(work);
763
764 pool_id <<= WORK_OFFQ_POOL_SHIFT;
765 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
766}
767
768static bool work_is_canceling(struct work_struct *work)
769{
770 unsigned long data = atomic_long_read(&work->data);
771
772 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
773}
774
775
776
777
778
779
780
781static bool __need_more_worker(struct worker_pool *pool)
782{
783 return !atomic_read(&pool->nr_running);
784}
785
786
787
788
789
790
791
792
793
794static bool need_more_worker(struct worker_pool *pool)
795{
796 return !list_empty(&pool->worklist) && __need_more_worker(pool);
797}
798
799
800static bool may_start_working(struct worker_pool *pool)
801{
802 return pool->nr_idle;
803}
804
805
806static bool keep_working(struct worker_pool *pool)
807{
808 return !list_empty(&pool->worklist) &&
809 atomic_read(&pool->nr_running) <= 1;
810}
811
812
813static bool need_to_create_worker(struct worker_pool *pool)
814{
815 return need_more_worker(pool) && !may_start_working(pool);
816}
817
818
819static bool too_many_workers(struct worker_pool *pool)
820{
821 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
822 int nr_idle = pool->nr_idle + managing;
823 int nr_busy = pool->nr_workers - nr_idle;
824
825 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
826}
827
828
829
830
831
832
833static struct worker *first_idle_worker(struct worker_pool *pool)
834{
835 if (unlikely(list_empty(&pool->idle_list)))
836 return NULL;
837
838 return list_first_entry(&pool->idle_list, struct worker, entry);
839}
840
841
842
843
844
845
846
847
848
849
850static void wake_up_worker(struct worker_pool *pool)
851{
852 struct worker *worker = first_idle_worker(pool);
853
854 if (likely(worker))
855 wake_up_process(worker->task);
856}
857
858
859
860
861
862
863
864void wq_worker_running(struct task_struct *task)
865{
866 struct worker *worker = kthread_data(task);
867
868 if (!worker->sleeping)
869 return;
870 if (!(worker->flags & WORKER_NOT_RUNNING))
871 atomic_inc(&worker->pool->nr_running);
872 worker->sleeping = 0;
873}
874
875
876
877
878
879
880
881
882
883void wq_worker_sleeping(struct task_struct *task)
884{
885 struct worker *next, *worker = kthread_data(task);
886 struct worker_pool *pool;
887
888
889
890
891
892
893 if (worker->flags & WORKER_NOT_RUNNING)
894 return;
895
896 pool = worker->pool;
897
898
899 if (worker->sleeping)
900 return;
901
902 worker->sleeping = 1;
903 raw_spin_lock_irq(&pool->lock);
904
905
906
907
908
909
910
911
912
913
914
915
916 if (atomic_dec_and_test(&pool->nr_running) &&
917 !list_empty(&pool->worklist)) {
918 next = first_idle_worker(pool);
919 if (next)
920 wake_up_process(next->task);
921 }
922 raw_spin_unlock_irq(&pool->lock);
923}
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949work_func_t wq_worker_last_func(struct task_struct *task)
950{
951 struct worker *worker = kthread_data(task);
952
953 return worker->last_func;
954}
955
956
957
958
959
960
961
962
963
964
965
966static inline void worker_set_flags(struct worker *worker, unsigned int flags)
967{
968 struct worker_pool *pool = worker->pool;
969
970 WARN_ON_ONCE(worker->task != current);
971
972
973 if ((flags & WORKER_NOT_RUNNING) &&
974 !(worker->flags & WORKER_NOT_RUNNING)) {
975 atomic_dec(&pool->nr_running);
976 }
977
978 worker->flags |= flags;
979}
980
981
982
983
984
985
986
987
988
989
990
991static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
992{
993 struct worker_pool *pool = worker->pool;
994 unsigned int oflags = worker->flags;
995
996 WARN_ON_ONCE(worker->task != current);
997
998 worker->flags &= ~flags;
999
1000
1001
1002
1003
1004
1005 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
1006 if (!(worker->flags & WORKER_NOT_RUNNING))
1007 atomic_inc(&pool->nr_running);
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043static struct worker *find_worker_executing_work(struct worker_pool *pool,
1044 struct work_struct *work)
1045{
1046 struct worker *worker;
1047
1048 hash_for_each_possible(pool->busy_hash, worker, hentry,
1049 (unsigned long)work)
1050 if (worker->current_work == work &&
1051 worker->current_func == work->func)
1052 return worker;
1053
1054 return NULL;
1055}
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074static void move_linked_works(struct work_struct *work, struct list_head *head,
1075 struct work_struct **nextp)
1076{
1077 struct work_struct *n;
1078
1079
1080
1081
1082
1083 list_for_each_entry_safe_from(work, n, NULL, entry) {
1084 list_move_tail(&work->entry, head);
1085 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1086 break;
1087 }
1088
1089
1090
1091
1092
1093
1094 if (nextp)
1095 *nextp = n;
1096}
1097
1098
1099
1100
1101
1102
1103
1104
1105static void get_pwq(struct pool_workqueue *pwq)
1106{
1107 lockdep_assert_held(&pwq->pool->lock);
1108 WARN_ON_ONCE(pwq->refcnt <= 0);
1109 pwq->refcnt++;
1110}
1111
1112
1113
1114
1115
1116
1117
1118
1119static void put_pwq(struct pool_workqueue *pwq)
1120{
1121 lockdep_assert_held(&pwq->pool->lock);
1122 if (likely(--pwq->refcnt))
1123 return;
1124 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1125 return;
1126
1127
1128
1129
1130
1131
1132
1133
1134 schedule_work(&pwq->unbound_release_work);
1135}
1136
1137
1138
1139
1140
1141
1142
1143static void put_pwq_unlocked(struct pool_workqueue *pwq)
1144{
1145 if (pwq) {
1146
1147
1148
1149
1150 raw_spin_lock_irq(&pwq->pool->lock);
1151 put_pwq(pwq);
1152 raw_spin_unlock_irq(&pwq->pool->lock);
1153 }
1154}
1155
1156static void pwq_activate_inactive_work(struct work_struct *work)
1157{
1158 struct pool_workqueue *pwq = get_work_pwq(work);
1159
1160 trace_workqueue_activate_work(work);
1161 if (list_empty(&pwq->pool->worklist))
1162 pwq->pool->watchdog_ts = jiffies;
1163 move_linked_works(work, &pwq->pool->worklist, NULL);
1164 __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
1165 pwq->nr_active++;
1166}
1167
1168static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
1169{
1170 struct work_struct *work = list_first_entry(&pwq->inactive_works,
1171 struct work_struct, entry);
1172
1173 pwq_activate_inactive_work(work);
1174}
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
1188{
1189 int color = get_work_color(work_data);
1190
1191 if (!(work_data & WORK_STRUCT_INACTIVE)) {
1192 pwq->nr_active--;
1193 if (!list_empty(&pwq->inactive_works)) {
1194
1195 if (pwq->nr_active < pwq->max_active)
1196 pwq_activate_first_inactive(pwq);
1197 }
1198 }
1199
1200 pwq->nr_in_flight[color]--;
1201
1202
1203 if (likely(pwq->flush_color != color))
1204 goto out_put;
1205
1206
1207 if (pwq->nr_in_flight[color])
1208 goto out_put;
1209
1210
1211 pwq->flush_color = -1;
1212
1213
1214
1215
1216
1217 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1218 complete(&pwq->wq->first_flusher->done);
1219out_put:
1220 put_pwq(pwq);
1221}
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1254 unsigned long *flags)
1255{
1256 struct worker_pool *pool;
1257 struct pool_workqueue *pwq;
1258
1259 local_irq_save(*flags);
1260
1261
1262 if (is_dwork) {
1263 struct delayed_work *dwork = to_delayed_work(work);
1264
1265
1266
1267
1268
1269
1270 if (likely(del_timer(&dwork->timer)))
1271 return 1;
1272 }
1273
1274
1275 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1276 return 0;
1277
1278 rcu_read_lock();
1279
1280
1281
1282
1283 pool = get_work_pool(work);
1284 if (!pool)
1285 goto fail;
1286
1287 raw_spin_lock(&pool->lock);
1288
1289
1290
1291
1292
1293
1294
1295
1296 pwq = get_work_pwq(work);
1297 if (pwq && pwq->pool == pool) {
1298 debug_work_deactivate(work);
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311 if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
1312 pwq_activate_inactive_work(work);
1313
1314 list_del_init(&work->entry);
1315 pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
1316
1317
1318 set_work_pool_and_keep_pending(work, pool->id);
1319
1320 raw_spin_unlock(&pool->lock);
1321 rcu_read_unlock();
1322 return 1;
1323 }
1324 raw_spin_unlock(&pool->lock);
1325fail:
1326 rcu_read_unlock();
1327 local_irq_restore(*flags);
1328 if (work_is_canceling(work))
1329 return -ENOENT;
1330 cpu_relax();
1331 return -EAGAIN;
1332}
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1348 struct list_head *head, unsigned int extra_flags)
1349{
1350 struct worker_pool *pool = pwq->pool;
1351
1352
1353 kasan_record_aux_stack(work);
1354
1355
1356 set_work_pwq(work, pwq, extra_flags);
1357 list_add_tail(&work->entry, head);
1358 get_pwq(pwq);
1359
1360
1361
1362
1363
1364
1365 smp_mb();
1366
1367 if (__need_more_worker(pool))
1368 wake_up_worker(pool);
1369}
1370
1371
1372
1373
1374
1375static bool is_chained_work(struct workqueue_struct *wq)
1376{
1377 struct worker *worker;
1378
1379 worker = current_wq_worker();
1380
1381
1382
1383
1384 return worker && worker->current_pwq->wq == wq;
1385}
1386
1387
1388
1389
1390
1391
1392static int wq_select_unbound_cpu(int cpu)
1393{
1394 static bool printed_dbg_warning;
1395 int new_cpu;
1396
1397 if (likely(!wq_debug_force_rr_cpu)) {
1398 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1399 return cpu;
1400 } else if (!printed_dbg_warning) {
1401 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1402 printed_dbg_warning = true;
1403 }
1404
1405 if (cpumask_empty(wq_unbound_cpumask))
1406 return cpu;
1407
1408 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1409 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1410 if (unlikely(new_cpu >= nr_cpu_ids)) {
1411 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1412 if (unlikely(new_cpu >= nr_cpu_ids))
1413 return cpu;
1414 }
1415 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1416
1417 return new_cpu;
1418}
1419
1420static void __queue_work(int cpu, struct workqueue_struct *wq,
1421 struct work_struct *work)
1422{
1423 struct pool_workqueue *pwq;
1424 struct worker_pool *last_pool;
1425 struct list_head *worklist;
1426 unsigned int work_flags;
1427 unsigned int req_cpu = cpu;
1428
1429
1430
1431
1432
1433
1434
1435 lockdep_assert_irqs_disabled();
1436
1437
1438
1439 if (unlikely(wq->flags & __WQ_DRAINING) &&
1440 WARN_ON_ONCE(!is_chained_work(wq)))
1441 return;
1442 rcu_read_lock();
1443retry:
1444
1445 if (wq->flags & WQ_UNBOUND) {
1446 if (req_cpu == WORK_CPU_UNBOUND)
1447 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1448 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1449 } else {
1450 if (req_cpu == WORK_CPU_UNBOUND)
1451 cpu = raw_smp_processor_id();
1452 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1453 }
1454
1455
1456
1457
1458
1459
1460 last_pool = get_work_pool(work);
1461 if (last_pool && last_pool != pwq->pool) {
1462 struct worker *worker;
1463
1464 raw_spin_lock(&last_pool->lock);
1465
1466 worker = find_worker_executing_work(last_pool, work);
1467
1468 if (worker && worker->current_pwq->wq == wq) {
1469 pwq = worker->current_pwq;
1470 } else {
1471
1472 raw_spin_unlock(&last_pool->lock);
1473 raw_spin_lock(&pwq->pool->lock);
1474 }
1475 } else {
1476 raw_spin_lock(&pwq->pool->lock);
1477 }
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487 if (unlikely(!pwq->refcnt)) {
1488 if (wq->flags & WQ_UNBOUND) {
1489 raw_spin_unlock(&pwq->pool->lock);
1490 cpu_relax();
1491 goto retry;
1492 }
1493
1494 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1495 wq->name, cpu);
1496 }
1497
1498
1499 trace_workqueue_queue_work(req_cpu, pwq, work);
1500
1501 if (WARN_ON(!list_empty(&work->entry)))
1502 goto out;
1503
1504 pwq->nr_in_flight[pwq->work_color]++;
1505 work_flags = work_color_to_flags(pwq->work_color);
1506
1507 if (likely(pwq->nr_active < pwq->max_active)) {
1508 trace_workqueue_activate_work(work);
1509 pwq->nr_active++;
1510 worklist = &pwq->pool->worklist;
1511 if (list_empty(worklist))
1512 pwq->pool->watchdog_ts = jiffies;
1513 } else {
1514 work_flags |= WORK_STRUCT_INACTIVE;
1515 worklist = &pwq->inactive_works;
1516 }
1517
1518 debug_work_activate(work);
1519 insert_work(pwq, work, worklist, work_flags);
1520
1521out:
1522 raw_spin_unlock(&pwq->pool->lock);
1523 rcu_read_unlock();
1524}
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537bool queue_work_on(int cpu, struct workqueue_struct *wq,
1538 struct work_struct *work)
1539{
1540 bool ret = false;
1541 unsigned long flags;
1542
1543 local_irq_save(flags);
1544
1545 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1546 __queue_work(cpu, wq, work);
1547 ret = true;
1548 }
1549
1550 local_irq_restore(flags);
1551 return ret;
1552}
1553EXPORT_SYMBOL(queue_work_on);
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564static int workqueue_select_cpu_near(int node)
1565{
1566 int cpu;
1567
1568
1569 if (!wq_numa_enabled)
1570 return WORK_CPU_UNBOUND;
1571
1572
1573 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1574 return WORK_CPU_UNBOUND;
1575
1576
1577 cpu = raw_smp_processor_id();
1578 if (node == cpu_to_node(cpu))
1579 return cpu;
1580
1581
1582 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1583
1584
1585 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1586}
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608bool queue_work_node(int node, struct workqueue_struct *wq,
1609 struct work_struct *work)
1610{
1611 unsigned long flags;
1612 bool ret = false;
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1624
1625 local_irq_save(flags);
1626
1627 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1628 int cpu = workqueue_select_cpu_near(node);
1629
1630 __queue_work(cpu, wq, work);
1631 ret = true;
1632 }
1633
1634 local_irq_restore(flags);
1635 return ret;
1636}
1637EXPORT_SYMBOL_GPL(queue_work_node);
1638
1639void delayed_work_timer_fn(struct timer_list *t)
1640{
1641 struct delayed_work *dwork = from_timer(dwork, t, timer);
1642
1643
1644 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1645}
1646EXPORT_SYMBOL(delayed_work_timer_fn);
1647
1648static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1649 struct delayed_work *dwork, unsigned long delay)
1650{
1651 struct timer_list *timer = &dwork->timer;
1652 struct work_struct *work = &dwork->work;
1653
1654 WARN_ON_ONCE(!wq);
1655 WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn);
1656 WARN_ON_ONCE(timer_pending(timer));
1657 WARN_ON_ONCE(!list_empty(&work->entry));
1658
1659
1660
1661
1662
1663
1664
1665 if (!delay) {
1666 __queue_work(cpu, wq, &dwork->work);
1667 return;
1668 }
1669
1670 dwork->wq = wq;
1671 dwork->cpu = cpu;
1672 timer->expires = jiffies + delay;
1673
1674 if (unlikely(cpu != WORK_CPU_UNBOUND))
1675 add_timer_on(timer, cpu);
1676 else
1677 add_timer(timer);
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1692 struct delayed_work *dwork, unsigned long delay)
1693{
1694 struct work_struct *work = &dwork->work;
1695 bool ret = false;
1696 unsigned long flags;
1697
1698
1699 local_irq_save(flags);
1700
1701 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1702 __queue_delayed_work(cpu, wq, dwork, delay);
1703 ret = true;
1704 }
1705
1706 local_irq_restore(flags);
1707 return ret;
1708}
1709EXPORT_SYMBOL(queue_delayed_work_on);
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1730 struct delayed_work *dwork, unsigned long delay)
1731{
1732 unsigned long flags;
1733 int ret;
1734
1735 do {
1736 ret = try_to_grab_pending(&dwork->work, true, &flags);
1737 } while (unlikely(ret == -EAGAIN));
1738
1739 if (likely(ret >= 0)) {
1740 __queue_delayed_work(cpu, wq, dwork, delay);
1741 local_irq_restore(flags);
1742 }
1743
1744
1745 return ret;
1746}
1747EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1748
1749static void rcu_work_rcufn(struct rcu_head *rcu)
1750{
1751 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1752
1753
1754 local_irq_disable();
1755 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1756 local_irq_enable();
1757}
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1770{
1771 struct work_struct *work = &rwork->work;
1772
1773 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1774 rwork->wq = wq;
1775 call_rcu(&rwork->rcu, rcu_work_rcufn);
1776 return true;
1777 }
1778
1779 return false;
1780}
1781EXPORT_SYMBOL(queue_rcu_work);
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793static void worker_enter_idle(struct worker *worker)
1794{
1795 struct worker_pool *pool = worker->pool;
1796
1797 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1798 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1799 (worker->hentry.next || worker->hentry.pprev)))
1800 return;
1801
1802
1803 worker->flags |= WORKER_IDLE;
1804 pool->nr_idle++;
1805 worker->last_active = jiffies;
1806
1807
1808 list_add(&worker->entry, &pool->idle_list);
1809
1810 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1811 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1812
1813
1814
1815
1816
1817
1818
1819 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1820 pool->nr_workers == pool->nr_idle &&
1821 atomic_read(&pool->nr_running));
1822}
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833static void worker_leave_idle(struct worker *worker)
1834{
1835 struct worker_pool *pool = worker->pool;
1836
1837 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1838 return;
1839 worker_clr_flags(worker, WORKER_IDLE);
1840 pool->nr_idle--;
1841 list_del_init(&worker->entry);
1842}
1843
1844static struct worker *alloc_worker(int node)
1845{
1846 struct worker *worker;
1847
1848 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1849 if (worker) {
1850 INIT_LIST_HEAD(&worker->entry);
1851 INIT_LIST_HEAD(&worker->scheduled);
1852 INIT_LIST_HEAD(&worker->node);
1853
1854 worker->flags = WORKER_PREP;
1855 }
1856 return worker;
1857}
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868static void worker_attach_to_pool(struct worker *worker,
1869 struct worker_pool *pool)
1870{
1871 mutex_lock(&wq_pool_attach_mutex);
1872
1873
1874
1875
1876
1877
1878 if (pool->flags & POOL_DISASSOCIATED)
1879 worker->flags |= WORKER_UNBOUND;
1880 else
1881 kthread_set_per_cpu(worker->task, pool->cpu);
1882
1883 if (worker->rescue_wq)
1884 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1885
1886 list_add_tail(&worker->node, &pool->workers);
1887 worker->pool = pool;
1888
1889 mutex_unlock(&wq_pool_attach_mutex);
1890}
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900static void worker_detach_from_pool(struct worker *worker)
1901{
1902 struct worker_pool *pool = worker->pool;
1903 struct completion *detach_completion = NULL;
1904
1905 mutex_lock(&wq_pool_attach_mutex);
1906
1907 kthread_set_per_cpu(worker->task, -1);
1908 list_del(&worker->node);
1909 worker->pool = NULL;
1910
1911 if (list_empty(&pool->workers))
1912 detach_completion = pool->detach_completion;
1913 mutex_unlock(&wq_pool_attach_mutex);
1914
1915
1916 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1917
1918 if (detach_completion)
1919 complete(detach_completion);
1920}
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934static struct worker *create_worker(struct worker_pool *pool)
1935{
1936 struct worker *worker;
1937 int id;
1938 char id_buf[16];
1939
1940
1941 id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
1942 if (id < 0)
1943 return NULL;
1944
1945 worker = alloc_worker(pool->node);
1946 if (!worker)
1947 goto fail;
1948
1949 worker->id = id;
1950
1951 if (pool->cpu >= 0)
1952 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1953 pool->attrs->nice < 0 ? "H" : "");
1954 else
1955 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1956
1957 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1958 "kworker/%s", id_buf);
1959 if (IS_ERR(worker->task))
1960 goto fail;
1961
1962 set_user_nice(worker->task, pool->attrs->nice);
1963 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1964
1965
1966 worker_attach_to_pool(worker, pool);
1967
1968
1969 raw_spin_lock_irq(&pool->lock);
1970 worker->pool->nr_workers++;
1971 worker_enter_idle(worker);
1972 wake_up_process(worker->task);
1973 raw_spin_unlock_irq(&pool->lock);
1974
1975 return worker;
1976
1977fail:
1978 ida_free(&pool->worker_ida, id);
1979 kfree(worker);
1980 return NULL;
1981}
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993static void destroy_worker(struct worker *worker)
1994{
1995 struct worker_pool *pool = worker->pool;
1996
1997 lockdep_assert_held(&pool->lock);
1998
1999
2000 if (WARN_ON(worker->current_work) ||
2001 WARN_ON(!list_empty(&worker->scheduled)) ||
2002 WARN_ON(!(worker->flags & WORKER_IDLE)))
2003 return;
2004
2005 pool->nr_workers--;
2006 pool->nr_idle--;
2007
2008 list_del_init(&worker->entry);
2009 worker->flags |= WORKER_DIE;
2010 wake_up_process(worker->task);
2011}
2012
2013static void idle_worker_timeout(struct timer_list *t)
2014{
2015 struct worker_pool *pool = from_timer(pool, t, idle_timer);
2016
2017 raw_spin_lock_irq(&pool->lock);
2018
2019 while (too_many_workers(pool)) {
2020 struct worker *worker;
2021 unsigned long expires;
2022
2023
2024 worker = list_entry(pool->idle_list.prev, struct worker, entry);
2025 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2026
2027 if (time_before(jiffies, expires)) {
2028 mod_timer(&pool->idle_timer, expires);
2029 break;
2030 }
2031
2032 destroy_worker(worker);
2033 }
2034
2035 raw_spin_unlock_irq(&pool->lock);
2036}
2037
2038static void send_mayday(struct work_struct *work)
2039{
2040 struct pool_workqueue *pwq = get_work_pwq(work);
2041 struct workqueue_struct *wq = pwq->wq;
2042
2043 lockdep_assert_held(&wq_mayday_lock);
2044
2045 if (!wq->rescuer)
2046 return;
2047
2048
2049 if (list_empty(&pwq->mayday_node)) {
2050
2051
2052
2053
2054
2055 get_pwq(pwq);
2056 list_add_tail(&pwq->mayday_node, &wq->maydays);
2057 wake_up_process(wq->rescuer->task);
2058 }
2059}
2060
2061static void pool_mayday_timeout(struct timer_list *t)
2062{
2063 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
2064 struct work_struct *work;
2065
2066 raw_spin_lock_irq(&pool->lock);
2067 raw_spin_lock(&wq_mayday_lock);
2068
2069 if (need_to_create_worker(pool)) {
2070
2071
2072
2073
2074
2075
2076 list_for_each_entry(work, &pool->worklist, entry)
2077 send_mayday(work);
2078 }
2079
2080 raw_spin_unlock(&wq_mayday_lock);
2081 raw_spin_unlock_irq(&pool->lock);
2082
2083 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
2084}
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104static void maybe_create_worker(struct worker_pool *pool)
2105__releases(&pool->lock)
2106__acquires(&pool->lock)
2107{
2108restart:
2109 raw_spin_unlock_irq(&pool->lock);
2110
2111
2112 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
2113
2114 while (true) {
2115 if (create_worker(pool) || !need_to_create_worker(pool))
2116 break;
2117
2118 schedule_timeout_interruptible(CREATE_COOLDOWN);
2119
2120 if (!need_to_create_worker(pool))
2121 break;
2122 }
2123
2124 del_timer_sync(&pool->mayday_timer);
2125 raw_spin_lock_irq(&pool->lock);
2126
2127
2128
2129
2130
2131 if (need_to_create_worker(pool))
2132 goto restart;
2133}
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157static bool manage_workers(struct worker *worker)
2158{
2159 struct worker_pool *pool = worker->pool;
2160
2161 if (pool->flags & POOL_MANAGER_ACTIVE)
2162 return false;
2163
2164 pool->flags |= POOL_MANAGER_ACTIVE;
2165 pool->manager = worker;
2166
2167 maybe_create_worker(pool);
2168
2169 pool->manager = NULL;
2170 pool->flags &= ~POOL_MANAGER_ACTIVE;
2171 rcuwait_wake_up(&manager_wait);
2172 return true;
2173}
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189static void process_one_work(struct worker *worker, struct work_struct *work)
2190__releases(&pool->lock)
2191__acquires(&pool->lock)
2192{
2193 struct pool_workqueue *pwq = get_work_pwq(work);
2194 struct worker_pool *pool = worker->pool;
2195 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2196 unsigned long work_data;
2197 struct worker *collision;
2198#ifdef CONFIG_LOCKDEP
2199
2200
2201
2202
2203
2204
2205
2206 struct lockdep_map lockdep_map;
2207
2208 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2209#endif
2210
2211 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2212 raw_smp_processor_id() != pool->cpu);
2213
2214
2215
2216
2217
2218
2219
2220 collision = find_worker_executing_work(pool, work);
2221 if (unlikely(collision)) {
2222 move_linked_works(work, &collision->scheduled, NULL);
2223 return;
2224 }
2225
2226
2227 debug_work_deactivate(work);
2228 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2229 worker->current_work = work;
2230 worker->current_func = work->func;
2231 worker->current_pwq = pwq;
2232 work_data = *work_data_bits(work);
2233 worker->current_color = get_work_color(work_data);
2234
2235
2236
2237
2238
2239 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2240
2241 list_del_init(&work->entry);
2242
2243
2244
2245
2246
2247
2248
2249 if (unlikely(cpu_intensive))
2250 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2251
2252
2253
2254
2255
2256
2257
2258
2259 if (need_more_worker(pool))
2260 wake_up_worker(pool);
2261
2262
2263
2264
2265
2266
2267
2268 set_work_pool_and_clear_pending(work, pool->id);
2269
2270 raw_spin_unlock_irq(&pool->lock);
2271
2272 lock_map_acquire(&pwq->wq->lockdep_map);
2273 lock_map_acquire(&lockdep_map);
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295 lockdep_invariant_state(true);
2296 trace_workqueue_execute_start(work);
2297 worker->current_func(work);
2298
2299
2300
2301
2302 trace_workqueue_execute_end(work, worker->current_func);
2303 lock_map_release(&lockdep_map);
2304 lock_map_release(&pwq->wq->lockdep_map);
2305
2306 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2307 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2308 " last function: %ps\n",
2309 current->comm, preempt_count(), task_pid_nr(current),
2310 worker->current_func);
2311 debug_show_held_locks(current);
2312 dump_stack();
2313 }
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323 cond_resched();
2324
2325 raw_spin_lock_irq(&pool->lock);
2326
2327
2328 if (unlikely(cpu_intensive))
2329 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2330
2331
2332 worker->last_func = worker->current_func;
2333
2334
2335 hash_del(&worker->hentry);
2336 worker->current_work = NULL;
2337 worker->current_func = NULL;
2338 worker->current_pwq = NULL;
2339 worker->current_color = INT_MAX;
2340 pwq_dec_nr_in_flight(pwq, work_data);
2341}
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355static void process_scheduled_works(struct worker *worker)
2356{
2357 while (!list_empty(&worker->scheduled)) {
2358 struct work_struct *work = list_first_entry(&worker->scheduled,
2359 struct work_struct, entry);
2360 process_one_work(worker, work);
2361 }
2362}
2363
2364static void set_pf_worker(bool val)
2365{
2366 mutex_lock(&wq_pool_attach_mutex);
2367 if (val)
2368 current->flags |= PF_WQ_WORKER;
2369 else
2370 current->flags &= ~PF_WQ_WORKER;
2371 mutex_unlock(&wq_pool_attach_mutex);
2372}
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386static int worker_thread(void *__worker)
2387{
2388 struct worker *worker = __worker;
2389 struct worker_pool *pool = worker->pool;
2390
2391
2392 set_pf_worker(true);
2393woke_up:
2394 raw_spin_lock_irq(&pool->lock);
2395
2396
2397 if (unlikely(worker->flags & WORKER_DIE)) {
2398 raw_spin_unlock_irq(&pool->lock);
2399 WARN_ON_ONCE(!list_empty(&worker->entry));
2400 set_pf_worker(false);
2401
2402 set_task_comm(worker->task, "kworker/dying");
2403 ida_free(&pool->worker_ida, worker->id);
2404 worker_detach_from_pool(worker);
2405 kfree(worker);
2406 return 0;
2407 }
2408
2409 worker_leave_idle(worker);
2410recheck:
2411
2412 if (!need_more_worker(pool))
2413 goto sleep;
2414
2415
2416 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2417 goto recheck;
2418
2419
2420
2421
2422
2423
2424 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2425
2426
2427
2428
2429
2430
2431
2432
2433 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2434
2435 do {
2436 struct work_struct *work =
2437 list_first_entry(&pool->worklist,
2438 struct work_struct, entry);
2439
2440 pool->watchdog_ts = jiffies;
2441
2442 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2443
2444 process_one_work(worker, work);
2445 if (unlikely(!list_empty(&worker->scheduled)))
2446 process_scheduled_works(worker);
2447 } else {
2448 move_linked_works(work, &worker->scheduled, NULL);
2449 process_scheduled_works(worker);
2450 }
2451 } while (keep_working(pool));
2452
2453 worker_set_flags(worker, WORKER_PREP);
2454sleep:
2455
2456
2457
2458
2459
2460
2461
2462 worker_enter_idle(worker);
2463 __set_current_state(TASK_IDLE);
2464 raw_spin_unlock_irq(&pool->lock);
2465 schedule();
2466 goto woke_up;
2467}
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490static int rescuer_thread(void *__rescuer)
2491{
2492 struct worker *rescuer = __rescuer;
2493 struct workqueue_struct *wq = rescuer->rescue_wq;
2494 struct list_head *scheduled = &rescuer->scheduled;
2495 bool should_stop;
2496
2497 set_user_nice(current, RESCUER_NICE_LEVEL);
2498
2499
2500
2501
2502
2503 set_pf_worker(true);
2504repeat:
2505 set_current_state(TASK_IDLE);
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515 should_stop = kthread_should_stop();
2516
2517
2518 raw_spin_lock_irq(&wq_mayday_lock);
2519
2520 while (!list_empty(&wq->maydays)) {
2521 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2522 struct pool_workqueue, mayday_node);
2523 struct worker_pool *pool = pwq->pool;
2524 struct work_struct *work, *n;
2525 bool first = true;
2526
2527 __set_current_state(TASK_RUNNING);
2528 list_del_init(&pwq->mayday_node);
2529
2530 raw_spin_unlock_irq(&wq_mayday_lock);
2531
2532 worker_attach_to_pool(rescuer, pool);
2533
2534 raw_spin_lock_irq(&pool->lock);
2535
2536
2537
2538
2539
2540 WARN_ON_ONCE(!list_empty(scheduled));
2541 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2542 if (get_work_pwq(work) == pwq) {
2543 if (first)
2544 pool->watchdog_ts = jiffies;
2545 move_linked_works(work, scheduled, &n);
2546 }
2547 first = false;
2548 }
2549
2550 if (!list_empty(scheduled)) {
2551 process_scheduled_works(rescuer);
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562 if (pwq->nr_active && need_to_create_worker(pool)) {
2563 raw_spin_lock(&wq_mayday_lock);
2564
2565
2566
2567
2568 if (wq->rescuer && list_empty(&pwq->mayday_node)) {
2569 get_pwq(pwq);
2570 list_add_tail(&pwq->mayday_node, &wq->maydays);
2571 }
2572 raw_spin_unlock(&wq_mayday_lock);
2573 }
2574 }
2575
2576
2577
2578
2579
2580 put_pwq(pwq);
2581
2582
2583
2584
2585
2586
2587 if (need_more_worker(pool))
2588 wake_up_worker(pool);
2589
2590 raw_spin_unlock_irq(&pool->lock);
2591
2592 worker_detach_from_pool(rescuer);
2593
2594 raw_spin_lock_irq(&wq_mayday_lock);
2595 }
2596
2597 raw_spin_unlock_irq(&wq_mayday_lock);
2598
2599 if (should_stop) {
2600 __set_current_state(TASK_RUNNING);
2601 set_pf_worker(false);
2602 return 0;
2603 }
2604
2605
2606 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2607 schedule();
2608 goto repeat;
2609}
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622static void check_flush_dependency(struct workqueue_struct *target_wq,
2623 struct work_struct *target_work)
2624{
2625 work_func_t target_func = target_work ? target_work->func : NULL;
2626 struct worker *worker;
2627
2628 if (target_wq->flags & WQ_MEM_RECLAIM)
2629 return;
2630
2631 worker = current_wq_worker();
2632
2633 WARN_ONCE(current->flags & PF_MEMALLOC,
2634 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2635 current->pid, current->comm, target_wq->name, target_func);
2636 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2637 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2638 "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2639 worker->current_pwq->wq->name, worker->current_func,
2640 target_wq->name, target_func);
2641}
2642
2643struct wq_barrier {
2644 struct work_struct work;
2645 struct completion done;
2646 struct task_struct *task;
2647};
2648
2649static void wq_barrier_func(struct work_struct *work)
2650{
2651 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2652 complete(&barr->done);
2653}
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679static void insert_wq_barrier(struct pool_workqueue *pwq,
2680 struct wq_barrier *barr,
2681 struct work_struct *target, struct worker *worker)
2682{
2683 unsigned int work_flags = 0;
2684 unsigned int work_color;
2685 struct list_head *head;
2686
2687
2688
2689
2690
2691
2692
2693 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2694 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2695
2696 init_completion_map(&barr->done, &target->lockdep_map);
2697
2698 barr->task = current;
2699
2700
2701 work_flags |= WORK_STRUCT_INACTIVE;
2702
2703
2704
2705
2706
2707 if (worker) {
2708 head = worker->scheduled.next;
2709 work_color = worker->current_color;
2710 } else {
2711 unsigned long *bits = work_data_bits(target);
2712
2713 head = target->entry.next;
2714
2715 work_flags |= *bits & WORK_STRUCT_LINKED;
2716 work_color = get_work_color(*bits);
2717 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2718 }
2719
2720 pwq->nr_in_flight[work_color]++;
2721 work_flags |= work_color_to_flags(work_color);
2722
2723 debug_work_activate(&barr->work);
2724 insert_work(pwq, &barr->work, head, work_flags);
2725}
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2759 int flush_color, int work_color)
2760{
2761 bool wait = false;
2762 struct pool_workqueue *pwq;
2763
2764 if (flush_color >= 0) {
2765 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2766 atomic_set(&wq->nr_pwqs_to_flush, 1);
2767 }
2768
2769 for_each_pwq(pwq, wq) {
2770 struct worker_pool *pool = pwq->pool;
2771
2772 raw_spin_lock_irq(&pool->lock);
2773
2774 if (flush_color >= 0) {
2775 WARN_ON_ONCE(pwq->flush_color != -1);
2776
2777 if (pwq->nr_in_flight[flush_color]) {
2778 pwq->flush_color = flush_color;
2779 atomic_inc(&wq->nr_pwqs_to_flush);
2780 wait = true;
2781 }
2782 }
2783
2784 if (work_color >= 0) {
2785 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2786 pwq->work_color = work_color;
2787 }
2788
2789 raw_spin_unlock_irq(&pool->lock);
2790 }
2791
2792 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2793 complete(&wq->first_flusher->done);
2794
2795 return wait;
2796}
2797
2798
2799
2800
2801
2802
2803
2804
2805void flush_workqueue(struct workqueue_struct *wq)
2806{
2807 struct wq_flusher this_flusher = {
2808 .list = LIST_HEAD_INIT(this_flusher.list),
2809 .flush_color = -1,
2810 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
2811 };
2812 int next_color;
2813
2814 if (WARN_ON(!wq_online))
2815 return;
2816
2817 lock_map_acquire(&wq->lockdep_map);
2818 lock_map_release(&wq->lockdep_map);
2819
2820 mutex_lock(&wq->mutex);
2821
2822
2823
2824
2825 next_color = work_next_color(wq->work_color);
2826
2827 if (next_color != wq->flush_color) {
2828
2829
2830
2831
2832
2833 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2834 this_flusher.flush_color = wq->work_color;
2835 wq->work_color = next_color;
2836
2837 if (!wq->first_flusher) {
2838
2839 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2840
2841 wq->first_flusher = &this_flusher;
2842
2843 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2844 wq->work_color)) {
2845
2846 wq->flush_color = next_color;
2847 wq->first_flusher = NULL;
2848 goto out_unlock;
2849 }
2850 } else {
2851
2852 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2853 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2854 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2855 }
2856 } else {
2857
2858
2859
2860
2861
2862 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2863 }
2864
2865 check_flush_dependency(wq, NULL);
2866
2867 mutex_unlock(&wq->mutex);
2868
2869 wait_for_completion(&this_flusher.done);
2870
2871
2872
2873
2874
2875
2876
2877 if (READ_ONCE(wq->first_flusher) != &this_flusher)
2878 return;
2879
2880 mutex_lock(&wq->mutex);
2881
2882
2883 if (wq->first_flusher != &this_flusher)
2884 goto out_unlock;
2885
2886 WRITE_ONCE(wq->first_flusher, NULL);
2887
2888 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2889 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2890
2891 while (true) {
2892 struct wq_flusher *next, *tmp;
2893
2894
2895 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2896 if (next->flush_color != wq->flush_color)
2897 break;
2898 list_del_init(&next->list);
2899 complete(&next->done);
2900 }
2901
2902 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2903 wq->flush_color != work_next_color(wq->work_color));
2904
2905
2906 wq->flush_color = work_next_color(wq->flush_color);
2907
2908
2909 if (!list_empty(&wq->flusher_overflow)) {
2910
2911
2912
2913
2914
2915
2916 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2917 tmp->flush_color = wq->work_color;
2918
2919 wq->work_color = work_next_color(wq->work_color);
2920
2921 list_splice_tail_init(&wq->flusher_overflow,
2922 &wq->flusher_queue);
2923 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2924 }
2925
2926 if (list_empty(&wq->flusher_queue)) {
2927 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2928 break;
2929 }
2930
2931
2932
2933
2934
2935 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2936 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2937
2938 list_del_init(&next->list);
2939 wq->first_flusher = next;
2940
2941 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2942 break;
2943
2944
2945
2946
2947
2948 wq->first_flusher = NULL;
2949 }
2950
2951out_unlock:
2952 mutex_unlock(&wq->mutex);
2953}
2954EXPORT_SYMBOL(flush_workqueue);
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967void drain_workqueue(struct workqueue_struct *wq)
2968{
2969 unsigned int flush_cnt = 0;
2970 struct pool_workqueue *pwq;
2971
2972
2973
2974
2975
2976
2977 mutex_lock(&wq->mutex);
2978 if (!wq->nr_drainers++)
2979 wq->flags |= __WQ_DRAINING;
2980 mutex_unlock(&wq->mutex);
2981reflush:
2982 flush_workqueue(wq);
2983
2984 mutex_lock(&wq->mutex);
2985
2986 for_each_pwq(pwq, wq) {
2987 bool drained;
2988
2989 raw_spin_lock_irq(&pwq->pool->lock);
2990 drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
2991 raw_spin_unlock_irq(&pwq->pool->lock);
2992
2993 if (drained)
2994 continue;
2995
2996 if (++flush_cnt == 10 ||
2997 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2998 pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
2999 wq->name, __func__, flush_cnt);
3000
3001 mutex_unlock(&wq->mutex);
3002 goto reflush;
3003 }
3004
3005 if (!--wq->nr_drainers)
3006 wq->flags &= ~__WQ_DRAINING;
3007 mutex_unlock(&wq->mutex);
3008}
3009EXPORT_SYMBOL_GPL(drain_workqueue);
3010
3011static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
3012 bool from_cancel)
3013{
3014 struct worker *worker = NULL;
3015 struct worker_pool *pool;
3016 struct pool_workqueue *pwq;
3017
3018 might_sleep();
3019
3020 rcu_read_lock();
3021 pool = get_work_pool(work);
3022 if (!pool) {
3023 rcu_read_unlock();
3024 return false;
3025 }
3026
3027 raw_spin_lock_irq(&pool->lock);
3028
3029 pwq = get_work_pwq(work);
3030 if (pwq) {
3031 if (unlikely(pwq->pool != pool))
3032 goto already_gone;
3033 } else {
3034 worker = find_worker_executing_work(pool, work);
3035 if (!worker)
3036 goto already_gone;
3037 pwq = worker->current_pwq;
3038 }
3039
3040 check_flush_dependency(pwq->wq, work);
3041
3042 insert_wq_barrier(pwq, barr, work, worker);
3043 raw_spin_unlock_irq(&pool->lock);
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054 if (!from_cancel &&
3055 (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
3056 lock_map_acquire(&pwq->wq->lockdep_map);
3057 lock_map_release(&pwq->wq->lockdep_map);
3058 }
3059 rcu_read_unlock();
3060 return true;
3061already_gone:
3062 raw_spin_unlock_irq(&pool->lock);
3063 rcu_read_unlock();
3064 return false;
3065}
3066
3067static bool __flush_work(struct work_struct *work, bool from_cancel)
3068{
3069 struct wq_barrier barr;
3070
3071 if (WARN_ON(!wq_online))
3072 return false;
3073
3074 if (WARN_ON(!work->func))
3075 return false;
3076
3077 if (!from_cancel) {
3078 lock_map_acquire(&work->lockdep_map);
3079 lock_map_release(&work->lockdep_map);
3080 }
3081
3082 if (start_flush_work(work, &barr, from_cancel)) {
3083 wait_for_completion(&barr.done);
3084 destroy_work_on_stack(&barr.work);
3085 return true;
3086 } else {
3087 return false;
3088 }
3089}
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102bool flush_work(struct work_struct *work)
3103{
3104 return __flush_work(work, false);
3105}
3106EXPORT_SYMBOL_GPL(flush_work);
3107
3108struct cwt_wait {
3109 wait_queue_entry_t wait;
3110 struct work_struct *work;
3111};
3112
3113static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
3114{
3115 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3116
3117 if (cwait->work != key)
3118 return 0;
3119 return autoremove_wake_function(wait, mode, sync, key);
3120}
3121
3122static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
3123{
3124 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
3125 unsigned long flags;
3126 int ret;
3127
3128 do {
3129 ret = try_to_grab_pending(work, is_dwork, &flags);
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146 if (unlikely(ret == -ENOENT)) {
3147 struct cwt_wait cwait;
3148
3149 init_wait(&cwait.wait);
3150 cwait.wait.func = cwt_wakefn;
3151 cwait.work = work;
3152
3153 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3154 TASK_UNINTERRUPTIBLE);
3155 if (work_is_canceling(work))
3156 schedule();
3157 finish_wait(&cancel_waitq, &cwait.wait);
3158 }
3159 } while (unlikely(ret < 0));
3160
3161
3162 mark_work_canceling(work);
3163 local_irq_restore(flags);
3164
3165
3166
3167
3168
3169 if (wq_online)
3170 __flush_work(work, true);
3171
3172 clear_work_data(work);
3173
3174
3175
3176
3177
3178
3179 smp_mb();
3180 if (waitqueue_active(&cancel_waitq))
3181 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3182
3183 return ret;
3184}
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204bool cancel_work_sync(struct work_struct *work)
3205{
3206 return __cancel_work_timer(work, false);
3207}
3208EXPORT_SYMBOL_GPL(cancel_work_sync);
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222bool flush_delayed_work(struct delayed_work *dwork)
3223{
3224 local_irq_disable();
3225 if (del_timer_sync(&dwork->timer))
3226 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3227 local_irq_enable();
3228 return flush_work(&dwork->work);
3229}
3230EXPORT_SYMBOL(flush_delayed_work);
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240bool flush_rcu_work(struct rcu_work *rwork)
3241{
3242 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3243 rcu_barrier();
3244 flush_work(&rwork->work);
3245 return true;
3246 } else {
3247 return flush_work(&rwork->work);
3248 }
3249}
3250EXPORT_SYMBOL(flush_rcu_work);
3251
3252static bool __cancel_work(struct work_struct *work, bool is_dwork)
3253{
3254 unsigned long flags;
3255 int ret;
3256
3257 do {
3258 ret = try_to_grab_pending(work, is_dwork, &flags);
3259 } while (unlikely(ret == -EAGAIN));
3260
3261 if (unlikely(ret < 0))
3262 return false;
3263
3264 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3265 local_irq_restore(flags);
3266 return ret;
3267}
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285bool cancel_delayed_work(struct delayed_work *dwork)
3286{
3287 return __cancel_work(&dwork->work, true);
3288}
3289EXPORT_SYMBOL(cancel_delayed_work);
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300bool cancel_delayed_work_sync(struct delayed_work *dwork)
3301{
3302 return __cancel_work_timer(&dwork->work, true);
3303}
3304EXPORT_SYMBOL(cancel_delayed_work_sync);
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317int schedule_on_each_cpu(work_func_t func)
3318{
3319 int cpu;
3320 struct work_struct __percpu *works;
3321
3322 works = alloc_percpu(struct work_struct);
3323 if (!works)
3324 return -ENOMEM;
3325
3326 cpus_read_lock();
3327
3328 for_each_online_cpu(cpu) {
3329 struct work_struct *work = per_cpu_ptr(works, cpu);
3330
3331 INIT_WORK(work, func);
3332 schedule_work_on(cpu, work);
3333 }
3334
3335 for_each_online_cpu(cpu)
3336 flush_work(per_cpu_ptr(works, cpu));
3337
3338 cpus_read_unlock();
3339 free_percpu(works);
3340 return 0;
3341}
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3356{
3357 if (!in_interrupt()) {
3358 fn(&ew->work);
3359 return 0;
3360 }
3361
3362 INIT_WORK(&ew->work, fn);
3363 schedule_work(&ew->work);
3364
3365 return 1;
3366}
3367EXPORT_SYMBOL_GPL(execute_in_process_context);
3368
3369
3370
3371
3372
3373
3374
3375void free_workqueue_attrs(struct workqueue_attrs *attrs)
3376{
3377 if (attrs) {
3378 free_cpumask_var(attrs->cpumask);
3379 kfree(attrs);
3380 }
3381}
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391struct workqueue_attrs *alloc_workqueue_attrs(void)
3392{
3393 struct workqueue_attrs *attrs;
3394
3395 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
3396 if (!attrs)
3397 goto fail;
3398 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
3399 goto fail;
3400
3401 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3402 return attrs;
3403fail:
3404 free_workqueue_attrs(attrs);
3405 return NULL;
3406}
3407
3408static void copy_workqueue_attrs(struct workqueue_attrs *to,
3409 const struct workqueue_attrs *from)
3410{
3411 to->nice = from->nice;
3412 cpumask_copy(to->cpumask, from->cpumask);
3413
3414
3415
3416
3417
3418 to->no_numa = from->no_numa;
3419}
3420
3421
3422static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3423{
3424 u32 hash = 0;
3425
3426 hash = jhash_1word(attrs->nice, hash);
3427 hash = jhash(cpumask_bits(attrs->cpumask),
3428 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3429 return hash;
3430}
3431
3432
3433static bool wqattrs_equal(const struct workqueue_attrs *a,
3434 const struct workqueue_attrs *b)
3435{
3436 if (a->nice != b->nice)
3437 return false;
3438 if (!cpumask_equal(a->cpumask, b->cpumask))
3439 return false;
3440 return true;
3441}
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453static int init_worker_pool(struct worker_pool *pool)
3454{
3455 raw_spin_lock_init(&pool->lock);
3456 pool->id = -1;
3457 pool->cpu = -1;
3458 pool->node = NUMA_NO_NODE;
3459 pool->flags |= POOL_DISASSOCIATED;
3460 pool->watchdog_ts = jiffies;
3461 INIT_LIST_HEAD(&pool->worklist);
3462 INIT_LIST_HEAD(&pool->idle_list);
3463 hash_init(pool->busy_hash);
3464
3465 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3466
3467 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3468
3469 INIT_LIST_HEAD(&pool->workers);
3470
3471 ida_init(&pool->worker_ida);
3472 INIT_HLIST_NODE(&pool->hash_node);
3473 pool->refcnt = 1;
3474
3475
3476 pool->attrs = alloc_workqueue_attrs();
3477 if (!pool->attrs)
3478 return -ENOMEM;
3479 return 0;
3480}
3481
3482#ifdef CONFIG_LOCKDEP
3483static void wq_init_lockdep(struct workqueue_struct *wq)
3484{
3485 char *lock_name;
3486
3487 lockdep_register_key(&wq->key);
3488 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3489 if (!lock_name)
3490 lock_name = wq->name;
3491
3492 wq->lock_name = lock_name;
3493 lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3494}
3495
3496static void wq_unregister_lockdep(struct workqueue_struct *wq)
3497{
3498 lockdep_unregister_key(&wq->key);
3499}
3500
3501static void wq_free_lockdep(struct workqueue_struct *wq)
3502{
3503 if (wq->lock_name != wq->name)
3504 kfree(wq->lock_name);
3505}
3506#else
3507static void wq_init_lockdep(struct workqueue_struct *wq)
3508{
3509}
3510
3511static void wq_unregister_lockdep(struct workqueue_struct *wq)
3512{
3513}
3514
3515static void wq_free_lockdep(struct workqueue_struct *wq)
3516{
3517}
3518#endif
3519
3520static void rcu_free_wq(struct rcu_head *rcu)
3521{
3522 struct workqueue_struct *wq =
3523 container_of(rcu, struct workqueue_struct, rcu);
3524
3525 wq_free_lockdep(wq);
3526
3527 if (!(wq->flags & WQ_UNBOUND))
3528 free_percpu(wq->cpu_pwqs);
3529 else
3530 free_workqueue_attrs(wq->unbound_attrs);
3531
3532 kfree(wq);
3533}
3534
3535static void rcu_free_pool(struct rcu_head *rcu)
3536{
3537 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3538
3539 ida_destroy(&pool->worker_ida);
3540 free_workqueue_attrs(pool->attrs);
3541 kfree(pool);
3542}
3543
3544
3545static bool wq_manager_inactive(struct worker_pool *pool)
3546{
3547 raw_spin_lock_irq(&pool->lock);
3548
3549 if (pool->flags & POOL_MANAGER_ACTIVE) {
3550 raw_spin_unlock_irq(&pool->lock);
3551 return false;
3552 }
3553 return true;
3554}
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567static void put_unbound_pool(struct worker_pool *pool)
3568{
3569 DECLARE_COMPLETION_ONSTACK(detach_completion);
3570 struct worker *worker;
3571
3572 lockdep_assert_held(&wq_pool_mutex);
3573
3574 if (--pool->refcnt)
3575 return;
3576
3577
3578 if (WARN_ON(!(pool->cpu < 0)) ||
3579 WARN_ON(!list_empty(&pool->worklist)))
3580 return;
3581
3582
3583 if (pool->id >= 0)
3584 idr_remove(&worker_pool_idr, pool->id);
3585 hash_del(&pool->hash_node);
3586
3587
3588
3589
3590
3591
3592
3593
3594 rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3595 TASK_UNINTERRUPTIBLE);
3596 pool->flags |= POOL_MANAGER_ACTIVE;
3597
3598 while ((worker = first_idle_worker(pool)))
3599 destroy_worker(worker);
3600 WARN_ON(pool->nr_workers || pool->nr_idle);
3601 raw_spin_unlock_irq(&pool->lock);
3602
3603 mutex_lock(&wq_pool_attach_mutex);
3604 if (!list_empty(&pool->workers))
3605 pool->detach_completion = &detach_completion;
3606 mutex_unlock(&wq_pool_attach_mutex);
3607
3608 if (pool->detach_completion)
3609 wait_for_completion(pool->detach_completion);
3610
3611
3612 del_timer_sync(&pool->idle_timer);
3613 del_timer_sync(&pool->mayday_timer);
3614
3615
3616 call_rcu(&pool->rcu, rcu_free_pool);
3617}
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3634{
3635 u32 hash = wqattrs_hash(attrs);
3636 struct worker_pool *pool;
3637 int node;
3638 int target_node = NUMA_NO_NODE;
3639
3640 lockdep_assert_held(&wq_pool_mutex);
3641
3642
3643 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3644 if (wqattrs_equal(pool->attrs, attrs)) {
3645 pool->refcnt++;
3646 return pool;
3647 }
3648 }
3649
3650
3651 if (wq_numa_enabled) {
3652 for_each_node(node) {
3653 if (cpumask_subset(attrs->cpumask,
3654 wq_numa_possible_cpumask[node])) {
3655 target_node = node;
3656 break;
3657 }
3658 }
3659 }
3660
3661
3662 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3663 if (!pool || init_worker_pool(pool) < 0)
3664 goto fail;
3665
3666 lockdep_set_subclass(&pool->lock, 1);
3667 copy_workqueue_attrs(pool->attrs, attrs);
3668 pool->node = target_node;
3669
3670
3671
3672
3673
3674 pool->attrs->no_numa = false;
3675
3676 if (worker_pool_assign_id(pool) < 0)
3677 goto fail;
3678
3679
3680 if (wq_online && !create_worker(pool))
3681 goto fail;
3682
3683
3684 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3685
3686 return pool;
3687fail:
3688 if (pool)
3689 put_unbound_pool(pool);
3690 return NULL;
3691}
3692
3693static void rcu_free_pwq(struct rcu_head *rcu)
3694{
3695 kmem_cache_free(pwq_cache,
3696 container_of(rcu, struct pool_workqueue, rcu));
3697}
3698
3699
3700
3701
3702
3703static void pwq_unbound_release_workfn(struct work_struct *work)
3704{
3705 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3706 unbound_release_work);
3707 struct workqueue_struct *wq = pwq->wq;
3708 struct worker_pool *pool = pwq->pool;
3709 bool is_last = false;
3710
3711
3712
3713
3714
3715 if (!list_empty(&pwq->pwqs_node)) {
3716 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3717 return;
3718
3719 mutex_lock(&wq->mutex);
3720 list_del_rcu(&pwq->pwqs_node);
3721 is_last = list_empty(&wq->pwqs);
3722 mutex_unlock(&wq->mutex);
3723 }
3724
3725 mutex_lock(&wq_pool_mutex);
3726 put_unbound_pool(pool);
3727 mutex_unlock(&wq_pool_mutex);
3728
3729 call_rcu(&pwq->rcu, rcu_free_pwq);
3730
3731
3732
3733
3734
3735 if (is_last) {
3736 wq_unregister_lockdep(wq);
3737 call_rcu(&wq->rcu, rcu_free_wq);
3738 }
3739}
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3750{
3751 struct workqueue_struct *wq = pwq->wq;
3752 bool freezable = wq->flags & WQ_FREEZABLE;
3753 unsigned long flags;
3754
3755
3756 lockdep_assert_held(&wq->mutex);
3757
3758
3759 if (!freezable && pwq->max_active == wq->saved_max_active)
3760 return;
3761
3762
3763 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
3764
3765
3766
3767
3768
3769
3770 if (!freezable || !workqueue_freezing) {
3771 bool kick = false;
3772
3773 pwq->max_active = wq->saved_max_active;
3774
3775 while (!list_empty(&pwq->inactive_works) &&
3776 pwq->nr_active < pwq->max_active) {
3777 pwq_activate_first_inactive(pwq);
3778 kick = true;
3779 }
3780
3781
3782
3783
3784
3785
3786
3787 if (kick)
3788 wake_up_worker(pwq->pool);
3789 } else {
3790 pwq->max_active = 0;
3791 }
3792
3793 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
3794}
3795
3796
3797static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3798 struct worker_pool *pool)
3799{
3800 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3801
3802 memset(pwq, 0, sizeof(*pwq));
3803
3804 pwq->pool = pool;
3805 pwq->wq = wq;
3806 pwq->flush_color = -1;
3807 pwq->refcnt = 1;
3808 INIT_LIST_HEAD(&pwq->inactive_works);
3809 INIT_LIST_HEAD(&pwq->pwqs_node);
3810 INIT_LIST_HEAD(&pwq->mayday_node);
3811 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3812}
3813
3814
3815static void link_pwq(struct pool_workqueue *pwq)
3816{
3817 struct workqueue_struct *wq = pwq->wq;
3818
3819 lockdep_assert_held(&wq->mutex);
3820
3821
3822 if (!list_empty(&pwq->pwqs_node))
3823 return;
3824
3825
3826 pwq->work_color = wq->work_color;
3827
3828
3829 pwq_adjust_max_active(pwq);
3830
3831
3832 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3833}
3834
3835
3836static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3837 const struct workqueue_attrs *attrs)
3838{
3839 struct worker_pool *pool;
3840 struct pool_workqueue *pwq;
3841
3842 lockdep_assert_held(&wq_pool_mutex);
3843
3844 pool = get_unbound_pool(attrs);
3845 if (!pool)
3846 return NULL;
3847
3848 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3849 if (!pwq) {
3850 put_unbound_pool(pool);
3851 return NULL;
3852 }
3853
3854 init_pwq(pwq, wq, pool);
3855 return pwq;
3856}
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3881 int cpu_going_down, cpumask_t *cpumask)
3882{
3883 if (!wq_numa_enabled || attrs->no_numa)
3884 goto use_dfl;
3885
3886
3887 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3888 if (cpu_going_down >= 0)
3889 cpumask_clear_cpu(cpu_going_down, cpumask);
3890
3891 if (cpumask_empty(cpumask))
3892 goto use_dfl;
3893
3894
3895 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3896
3897 if (cpumask_empty(cpumask)) {
3898 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3899 "possible intersect\n");
3900 return false;
3901 }
3902
3903 return !cpumask_equal(cpumask, attrs->cpumask);
3904
3905use_dfl:
3906 cpumask_copy(cpumask, attrs->cpumask);
3907 return false;
3908}
3909
3910
3911static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3912 int node,
3913 struct pool_workqueue *pwq)
3914{
3915 struct pool_workqueue *old_pwq;
3916
3917 lockdep_assert_held(&wq_pool_mutex);
3918 lockdep_assert_held(&wq->mutex);
3919
3920
3921 link_pwq(pwq);
3922
3923 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3924 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3925 return old_pwq;
3926}
3927
3928
3929struct apply_wqattrs_ctx {
3930 struct workqueue_struct *wq;
3931 struct workqueue_attrs *attrs;
3932 struct list_head list;
3933 struct pool_workqueue *dfl_pwq;
3934 struct pool_workqueue *pwq_tbl[];
3935};
3936
3937
3938static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3939{
3940 if (ctx) {
3941 int node;
3942
3943 for_each_node(node)
3944 put_pwq_unlocked(ctx->pwq_tbl[node]);
3945 put_pwq_unlocked(ctx->dfl_pwq);
3946
3947 free_workqueue_attrs(ctx->attrs);
3948
3949 kfree(ctx);
3950 }
3951}
3952
3953
3954static struct apply_wqattrs_ctx *
3955apply_wqattrs_prepare(struct workqueue_struct *wq,
3956 const struct workqueue_attrs *attrs)
3957{
3958 struct apply_wqattrs_ctx *ctx;
3959 struct workqueue_attrs *new_attrs, *tmp_attrs;
3960 int node;
3961
3962 lockdep_assert_held(&wq_pool_mutex);
3963
3964 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
3965
3966 new_attrs = alloc_workqueue_attrs();
3967 tmp_attrs = alloc_workqueue_attrs();
3968 if (!ctx || !new_attrs || !tmp_attrs)
3969 goto out_free;
3970
3971
3972
3973
3974
3975
3976 copy_workqueue_attrs(new_attrs, attrs);
3977 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3978 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3979 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3980
3981
3982
3983
3984
3985
3986 copy_workqueue_attrs(tmp_attrs, new_attrs);
3987
3988
3989
3990
3991
3992
3993 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3994 if (!ctx->dfl_pwq)
3995 goto out_free;
3996
3997 for_each_node(node) {
3998 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3999 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
4000 if (!ctx->pwq_tbl[node])
4001 goto out_free;
4002 } else {
4003 ctx->dfl_pwq->refcnt++;
4004 ctx->pwq_tbl[node] = ctx->dfl_pwq;
4005 }
4006 }
4007
4008
4009 copy_workqueue_attrs(new_attrs, attrs);
4010 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
4011 ctx->attrs = new_attrs;
4012
4013 ctx->wq = wq;
4014 free_workqueue_attrs(tmp_attrs);
4015 return ctx;
4016
4017out_free:
4018 free_workqueue_attrs(tmp_attrs);
4019 free_workqueue_attrs(new_attrs);
4020 apply_wqattrs_cleanup(ctx);
4021 return NULL;
4022}
4023
4024
4025static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
4026{
4027 int node;
4028
4029
4030 mutex_lock(&ctx->wq->mutex);
4031
4032 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
4033
4034
4035 for_each_node(node)
4036 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
4037 ctx->pwq_tbl[node]);
4038
4039
4040 link_pwq(ctx->dfl_pwq);
4041 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
4042
4043 mutex_unlock(&ctx->wq->mutex);
4044}
4045
4046static void apply_wqattrs_lock(void)
4047{
4048
4049 cpus_read_lock();
4050 mutex_lock(&wq_pool_mutex);
4051}
4052
4053static void apply_wqattrs_unlock(void)
4054{
4055 mutex_unlock(&wq_pool_mutex);
4056 cpus_read_unlock();
4057}
4058
4059static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4060 const struct workqueue_attrs *attrs)
4061{
4062 struct apply_wqattrs_ctx *ctx;
4063
4064
4065 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4066 return -EINVAL;
4067
4068
4069 if (!list_empty(&wq->pwqs)) {
4070 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4071 return -EINVAL;
4072
4073 wq->flags &= ~__WQ_ORDERED;
4074 }
4075
4076 ctx = apply_wqattrs_prepare(wq, attrs);
4077 if (!ctx)
4078 return -ENOMEM;
4079
4080
4081 apply_wqattrs_commit(ctx);
4082 apply_wqattrs_cleanup(ctx);
4083
4084 return 0;
4085}
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105int apply_workqueue_attrs(struct workqueue_struct *wq,
4106 const struct workqueue_attrs *attrs)
4107{
4108 int ret;
4109
4110 lockdep_assert_cpus_held();
4111
4112 mutex_lock(&wq_pool_mutex);
4113 ret = apply_workqueue_attrs_locked(wq, attrs);
4114 mutex_unlock(&wq_pool_mutex);
4115
4116 return ret;
4117}
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4142 bool online)
4143{
4144 int node = cpu_to_node(cpu);
4145 int cpu_off = online ? -1 : cpu;
4146 struct pool_workqueue *old_pwq = NULL, *pwq;
4147 struct workqueue_attrs *target_attrs;
4148 cpumask_t *cpumask;
4149
4150 lockdep_assert_held(&wq_pool_mutex);
4151
4152 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4153 wq->unbound_attrs->no_numa)
4154 return;
4155
4156
4157
4158
4159
4160
4161 target_attrs = wq_update_unbound_numa_attrs_buf;
4162 cpumask = target_attrs->cpumask;
4163
4164 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4165 pwq = unbound_pwq_by_node(wq, node);
4166
4167
4168
4169
4170
4171
4172
4173 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
4174 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4175 return;
4176 } else {
4177 goto use_dfl_pwq;
4178 }
4179
4180
4181 pwq = alloc_unbound_pwq(wq, target_attrs);
4182 if (!pwq) {
4183 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4184 wq->name);
4185 goto use_dfl_pwq;
4186 }
4187
4188
4189 mutex_lock(&wq->mutex);
4190 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4191 goto out_unlock;
4192
4193use_dfl_pwq:
4194 mutex_lock(&wq->mutex);
4195 raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
4196 get_pwq(wq->dfl_pwq);
4197 raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4198 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4199out_unlock:
4200 mutex_unlock(&wq->mutex);
4201 put_pwq_unlocked(old_pwq);
4202}
4203
4204static int alloc_and_link_pwqs(struct workqueue_struct *wq)
4205{
4206 bool highpri = wq->flags & WQ_HIGHPRI;
4207 int cpu, ret;
4208
4209 if (!(wq->flags & WQ_UNBOUND)) {
4210 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4211 if (!wq->cpu_pwqs)
4212 return -ENOMEM;
4213
4214 for_each_possible_cpu(cpu) {
4215 struct pool_workqueue *pwq =
4216 per_cpu_ptr(wq->cpu_pwqs, cpu);
4217 struct worker_pool *cpu_pools =
4218 per_cpu(cpu_worker_pools, cpu);
4219
4220 init_pwq(pwq, wq, &cpu_pools[highpri]);
4221
4222 mutex_lock(&wq->mutex);
4223 link_pwq(pwq);
4224 mutex_unlock(&wq->mutex);
4225 }
4226 return 0;
4227 }
4228
4229 cpus_read_lock();
4230 if (wq->flags & __WQ_ORDERED) {
4231 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4232
4233 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4234 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4235 "ordering guarantee broken for workqueue %s\n", wq->name);
4236 } else {
4237 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4238 }
4239 cpus_read_unlock();
4240
4241 return ret;
4242}
4243
4244static int wq_clamp_max_active(int max_active, unsigned int flags,
4245 const char *name)
4246{
4247 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4248
4249 if (max_active < 1 || max_active > lim)
4250 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4251 max_active, name, 1, lim);
4252
4253 return clamp_val(max_active, 1, lim);
4254}
4255
4256
4257
4258
4259
4260static int init_rescuer(struct workqueue_struct *wq)
4261{
4262 struct worker *rescuer;
4263 int ret;
4264
4265 if (!(wq->flags & WQ_MEM_RECLAIM))
4266 return 0;
4267
4268 rescuer = alloc_worker(NUMA_NO_NODE);
4269 if (!rescuer)
4270 return -ENOMEM;
4271
4272 rescuer->rescue_wq = wq;
4273 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4274 if (IS_ERR(rescuer->task)) {
4275 ret = PTR_ERR(rescuer->task);
4276 kfree(rescuer);
4277 return ret;
4278 }
4279
4280 wq->rescuer = rescuer;
4281 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4282 wake_up_process(rescuer->task);
4283
4284 return 0;
4285}
4286
4287__printf(1, 4)
4288struct workqueue_struct *alloc_workqueue(const char *fmt,
4289 unsigned int flags,
4290 int max_active, ...)
4291{
4292 size_t tbl_size = 0;
4293 va_list args;
4294 struct workqueue_struct *wq;
4295 struct pool_workqueue *pwq;
4296
4297
4298
4299
4300
4301
4302
4303
4304 if ((flags & WQ_UNBOUND) && max_active == 1)
4305 flags |= __WQ_ORDERED;
4306
4307
4308 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4309 flags |= WQ_UNBOUND;
4310
4311
4312 if (flags & WQ_UNBOUND)
4313 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4314
4315 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4316 if (!wq)
4317 return NULL;
4318
4319 if (flags & WQ_UNBOUND) {
4320 wq->unbound_attrs = alloc_workqueue_attrs();
4321 if (!wq->unbound_attrs)
4322 goto err_free_wq;
4323 }
4324
4325 va_start(args, max_active);
4326 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4327 va_end(args);
4328
4329 max_active = max_active ?: WQ_DFL_ACTIVE;
4330 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4331
4332
4333 wq->flags = flags;
4334 wq->saved_max_active = max_active;
4335 mutex_init(&wq->mutex);
4336 atomic_set(&wq->nr_pwqs_to_flush, 0);
4337 INIT_LIST_HEAD(&wq->pwqs);
4338 INIT_LIST_HEAD(&wq->flusher_queue);
4339 INIT_LIST_HEAD(&wq->flusher_overflow);
4340 INIT_LIST_HEAD(&wq->maydays);
4341
4342 wq_init_lockdep(wq);
4343 INIT_LIST_HEAD(&wq->list);
4344
4345 if (alloc_and_link_pwqs(wq) < 0)
4346 goto err_unreg_lockdep;
4347
4348 if (wq_online && init_rescuer(wq) < 0)
4349 goto err_destroy;
4350
4351 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4352 goto err_destroy;
4353
4354
4355
4356
4357
4358
4359 mutex_lock(&wq_pool_mutex);
4360
4361 mutex_lock(&wq->mutex);
4362 for_each_pwq(pwq, wq)
4363 pwq_adjust_max_active(pwq);
4364 mutex_unlock(&wq->mutex);
4365
4366 list_add_tail_rcu(&wq->list, &workqueues);
4367
4368 mutex_unlock(&wq_pool_mutex);
4369
4370 return wq;
4371
4372err_unreg_lockdep:
4373 wq_unregister_lockdep(wq);
4374 wq_free_lockdep(wq);
4375err_free_wq:
4376 free_workqueue_attrs(wq->unbound_attrs);
4377 kfree(wq);
4378 return NULL;
4379err_destroy:
4380 destroy_workqueue(wq);
4381 return NULL;
4382}
4383EXPORT_SYMBOL_GPL(alloc_workqueue);
4384
4385static bool pwq_busy(struct pool_workqueue *pwq)
4386{
4387 int i;
4388
4389 for (i = 0; i < WORK_NR_COLORS; i++)
4390 if (pwq->nr_in_flight[i])
4391 return true;
4392
4393 if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4394 return true;
4395 if (pwq->nr_active || !list_empty(&pwq->inactive_works))
4396 return true;
4397
4398 return false;
4399}
4400
4401
4402
4403
4404
4405
4406
4407void destroy_workqueue(struct workqueue_struct *wq)
4408{
4409 struct pool_workqueue *pwq;
4410 int node;
4411
4412
4413
4414
4415
4416 workqueue_sysfs_unregister(wq);
4417
4418
4419 drain_workqueue(wq);
4420
4421
4422 if (wq->rescuer) {
4423 struct worker *rescuer = wq->rescuer;
4424
4425
4426 raw_spin_lock_irq(&wq_mayday_lock);
4427 wq->rescuer = NULL;
4428 raw_spin_unlock_irq(&wq_mayday_lock);
4429
4430
4431 kthread_stop(rescuer->task);
4432 kfree(rescuer);
4433 }
4434
4435
4436
4437
4438
4439 mutex_lock(&wq_pool_mutex);
4440 mutex_lock(&wq->mutex);
4441 for_each_pwq(pwq, wq) {
4442 raw_spin_lock_irq(&pwq->pool->lock);
4443 if (WARN_ON(pwq_busy(pwq))) {
4444 pr_warn("%s: %s has the following busy pwq\n",
4445 __func__, wq->name);
4446 show_pwq(pwq);
4447 raw_spin_unlock_irq(&pwq->pool->lock);
4448 mutex_unlock(&wq->mutex);
4449 mutex_unlock(&wq_pool_mutex);
4450 show_workqueue_state();
4451 return;
4452 }
4453 raw_spin_unlock_irq(&pwq->pool->lock);
4454 }
4455 mutex_unlock(&wq->mutex);
4456
4457
4458
4459
4460
4461 list_del_rcu(&wq->list);
4462 mutex_unlock(&wq_pool_mutex);
4463
4464 if (!(wq->flags & WQ_UNBOUND)) {
4465 wq_unregister_lockdep(wq);
4466
4467
4468
4469
4470 call_rcu(&wq->rcu, rcu_free_wq);
4471 } else {
4472
4473
4474
4475
4476
4477 for_each_node(node) {
4478 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4479 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4480 put_pwq_unlocked(pwq);
4481 }
4482
4483
4484
4485
4486
4487 pwq = wq->dfl_pwq;
4488 wq->dfl_pwq = NULL;
4489 put_pwq_unlocked(pwq);
4490 }
4491}
4492EXPORT_SYMBOL_GPL(destroy_workqueue);
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4505{
4506 struct pool_workqueue *pwq;
4507
4508
4509 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4510 return;
4511
4512 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4513
4514 mutex_lock(&wq->mutex);
4515
4516 wq->flags &= ~__WQ_ORDERED;
4517 wq->saved_max_active = max_active;
4518
4519 for_each_pwq(pwq, wq)
4520 pwq_adjust_max_active(pwq);
4521
4522 mutex_unlock(&wq->mutex);
4523}
4524EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534struct work_struct *current_work(void)
4535{
4536 struct worker *worker = current_wq_worker();
4537
4538 return worker ? worker->current_work : NULL;
4539}
4540EXPORT_SYMBOL(current_work);
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550bool current_is_workqueue_rescuer(void)
4551{
4552 struct worker *worker = current_wq_worker();
4553
4554 return worker && worker->rescue_wq;
4555}
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4576{
4577 struct pool_workqueue *pwq;
4578 bool ret;
4579
4580 rcu_read_lock();
4581 preempt_disable();
4582
4583 if (cpu == WORK_CPU_UNBOUND)
4584 cpu = smp_processor_id();
4585
4586 if (!(wq->flags & WQ_UNBOUND))
4587 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4588 else
4589 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4590
4591 ret = !list_empty(&pwq->inactive_works);
4592 preempt_enable();
4593 rcu_read_unlock();
4594
4595 return ret;
4596}
4597EXPORT_SYMBOL_GPL(workqueue_congested);
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610unsigned int work_busy(struct work_struct *work)
4611{
4612 struct worker_pool *pool;
4613 unsigned long flags;
4614 unsigned int ret = 0;
4615
4616 if (work_pending(work))
4617 ret |= WORK_BUSY_PENDING;
4618
4619 rcu_read_lock();
4620 pool = get_work_pool(work);
4621 if (pool) {
4622 raw_spin_lock_irqsave(&pool->lock, flags);
4623 if (find_worker_executing_work(pool, work))
4624 ret |= WORK_BUSY_RUNNING;
4625 raw_spin_unlock_irqrestore(&pool->lock, flags);
4626 }
4627 rcu_read_unlock();
4628
4629 return ret;
4630}
4631EXPORT_SYMBOL_GPL(work_busy);
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643void set_worker_desc(const char *fmt, ...)
4644{
4645 struct worker *worker = current_wq_worker();
4646 va_list args;
4647
4648 if (worker) {
4649 va_start(args, fmt);
4650 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4651 va_end(args);
4652 }
4653}
4654EXPORT_SYMBOL_GPL(set_worker_desc);
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669void print_worker_info(const char *log_lvl, struct task_struct *task)
4670{
4671 work_func_t *fn = NULL;
4672 char name[WQ_NAME_LEN] = { };
4673 char desc[WORKER_DESC_LEN] = { };
4674 struct pool_workqueue *pwq = NULL;
4675 struct workqueue_struct *wq = NULL;
4676 struct worker *worker;
4677
4678 if (!(task->flags & PF_WQ_WORKER))
4679 return;
4680
4681
4682
4683
4684
4685 worker = kthread_probe_data(task);
4686
4687
4688
4689
4690
4691 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4692 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4693 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4694 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4695 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
4696
4697 if (fn || name[0] || desc[0]) {
4698 printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
4699 if (strcmp(name, desc))
4700 pr_cont(" (%s)", desc);
4701 pr_cont("\n");
4702 }
4703}
4704
4705static void pr_cont_pool_info(struct worker_pool *pool)
4706{
4707 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4708 if (pool->node != NUMA_NO_NODE)
4709 pr_cont(" node=%d", pool->node);
4710 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4711}
4712
4713static void pr_cont_work(bool comma, struct work_struct *work)
4714{
4715 if (work->func == wq_barrier_func) {
4716 struct wq_barrier *barr;
4717
4718 barr = container_of(work, struct wq_barrier, work);
4719
4720 pr_cont("%s BAR(%d)", comma ? "," : "",
4721 task_pid_nr(barr->task));
4722 } else {
4723 pr_cont("%s %ps", comma ? "," : "", work->func);
4724 }
4725}
4726
4727static void show_pwq(struct pool_workqueue *pwq)
4728{
4729 struct worker_pool *pool = pwq->pool;
4730 struct work_struct *work;
4731 struct worker *worker;
4732 bool has_in_flight = false, has_pending = false;
4733 int bkt;
4734
4735 pr_info(" pwq %d:", pool->id);
4736 pr_cont_pool_info(pool);
4737
4738 pr_cont(" active=%d/%d refcnt=%d%s\n",
4739 pwq->nr_active, pwq->max_active, pwq->refcnt,
4740 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4741
4742 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4743 if (worker->current_pwq == pwq) {
4744 has_in_flight = true;
4745 break;
4746 }
4747 }
4748 if (has_in_flight) {
4749 bool comma = false;
4750
4751 pr_info(" in-flight:");
4752 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4753 if (worker->current_pwq != pwq)
4754 continue;
4755
4756 pr_cont("%s %d%s:%ps", comma ? "," : "",
4757 task_pid_nr(worker->task),
4758 worker->rescue_wq ? "(RESCUER)" : "",
4759 worker->current_func);
4760 list_for_each_entry(work, &worker->scheduled, entry)
4761 pr_cont_work(false, work);
4762 comma = true;
4763 }
4764 pr_cont("\n");
4765 }
4766
4767 list_for_each_entry(work, &pool->worklist, entry) {
4768 if (get_work_pwq(work) == pwq) {
4769 has_pending = true;
4770 break;
4771 }
4772 }
4773 if (has_pending) {
4774 bool comma = false;
4775
4776 pr_info(" pending:");
4777 list_for_each_entry(work, &pool->worklist, entry) {
4778 if (get_work_pwq(work) != pwq)
4779 continue;
4780
4781 pr_cont_work(comma, work);
4782 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4783 }
4784 pr_cont("\n");
4785 }
4786
4787 if (!list_empty(&pwq->inactive_works)) {
4788 bool comma = false;
4789
4790 pr_info(" inactive:");
4791 list_for_each_entry(work, &pwq->inactive_works, entry) {
4792 pr_cont_work(comma, work);
4793 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4794 }
4795 pr_cont("\n");
4796 }
4797}
4798
4799
4800
4801
4802
4803
4804
4805void show_workqueue_state(void)
4806{
4807 struct workqueue_struct *wq;
4808 struct worker_pool *pool;
4809 unsigned long flags;
4810 int pi;
4811
4812 rcu_read_lock();
4813
4814 pr_info("Showing busy workqueues and worker pools:\n");
4815
4816 list_for_each_entry_rcu(wq, &workqueues, list) {
4817 struct pool_workqueue *pwq;
4818 bool idle = true;
4819
4820 for_each_pwq(pwq, wq) {
4821 if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
4822 idle = false;
4823 break;
4824 }
4825 }
4826 if (idle)
4827 continue;
4828
4829 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4830
4831 for_each_pwq(pwq, wq) {
4832 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
4833 if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
4834
4835
4836
4837
4838
4839 printk_deferred_enter();
4840 show_pwq(pwq);
4841 printk_deferred_exit();
4842 }
4843 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
4844
4845
4846
4847
4848
4849 touch_nmi_watchdog();
4850 }
4851 }
4852
4853 for_each_pool(pool, pi) {
4854 struct worker *worker;
4855 bool first = true;
4856
4857 raw_spin_lock_irqsave(&pool->lock, flags);
4858 if (pool->nr_workers == pool->nr_idle)
4859 goto next_pool;
4860
4861
4862
4863
4864
4865 printk_deferred_enter();
4866 pr_info("pool %d:", pool->id);
4867 pr_cont_pool_info(pool);
4868 pr_cont(" hung=%us workers=%d",
4869 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4870 pool->nr_workers);
4871 if (pool->manager)
4872 pr_cont(" manager: %d",
4873 task_pid_nr(pool->manager->task));
4874 list_for_each_entry(worker, &pool->idle_list, entry) {
4875 pr_cont(" %s%d", first ? "idle: " : "",
4876 task_pid_nr(worker->task));
4877 first = false;
4878 }
4879 pr_cont("\n");
4880 printk_deferred_exit();
4881 next_pool:
4882 raw_spin_unlock_irqrestore(&pool->lock, flags);
4883
4884
4885
4886
4887
4888 touch_nmi_watchdog();
4889 }
4890
4891 rcu_read_unlock();
4892}
4893
4894
4895void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
4896{
4897 int off;
4898
4899
4900 off = strscpy(buf, task->comm, size);
4901 if (off < 0)
4902 return;
4903
4904
4905 mutex_lock(&wq_pool_attach_mutex);
4906
4907 if (task->flags & PF_WQ_WORKER) {
4908 struct worker *worker = kthread_data(task);
4909 struct worker_pool *pool = worker->pool;
4910
4911 if (pool) {
4912 raw_spin_lock_irq(&pool->lock);
4913
4914
4915
4916
4917
4918 if (worker->desc[0] != '\0') {
4919 if (worker->current_work)
4920 scnprintf(buf + off, size - off, "+%s",
4921 worker->desc);
4922 else
4923 scnprintf(buf + off, size - off, "-%s",
4924 worker->desc);
4925 }
4926 raw_spin_unlock_irq(&pool->lock);
4927 }
4928 }
4929
4930 mutex_unlock(&wq_pool_attach_mutex);
4931}
4932
4933#ifdef CONFIG_SMP
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950static void unbind_workers(int cpu)
4951{
4952 struct worker_pool *pool;
4953 struct worker *worker;
4954
4955 for_each_cpu_worker_pool(pool, cpu) {
4956 mutex_lock(&wq_pool_attach_mutex);
4957 raw_spin_lock_irq(&pool->lock);
4958
4959
4960
4961
4962
4963
4964
4965
4966 for_each_pool_worker(worker, pool)
4967 worker->flags |= WORKER_UNBOUND;
4968
4969 pool->flags |= POOL_DISASSOCIATED;
4970
4971 raw_spin_unlock_irq(&pool->lock);
4972
4973 for_each_pool_worker(worker, pool) {
4974 kthread_set_per_cpu(worker->task, -1);
4975 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
4976 }
4977
4978 mutex_unlock(&wq_pool_attach_mutex);
4979
4980
4981
4982
4983
4984
4985
4986 schedule();
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996 atomic_set(&pool->nr_running, 0);
4997
4998
4999
5000
5001
5002
5003 raw_spin_lock_irq(&pool->lock);
5004 wake_up_worker(pool);
5005 raw_spin_unlock_irq(&pool->lock);
5006 }
5007}
5008
5009
5010
5011
5012
5013
5014
5015static void rebind_workers(struct worker_pool *pool)
5016{
5017 struct worker *worker;
5018
5019 lockdep_assert_held(&wq_pool_attach_mutex);
5020
5021
5022
5023
5024
5025
5026
5027
5028 for_each_pool_worker(worker, pool) {
5029 kthread_set_per_cpu(worker->task, pool->cpu);
5030 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
5031 pool->attrs->cpumask) < 0);
5032 }
5033
5034 raw_spin_lock_irq(&pool->lock);
5035
5036 pool->flags &= ~POOL_DISASSOCIATED;
5037
5038 for_each_pool_worker(worker, pool) {
5039 unsigned int worker_flags = worker->flags;
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049 if (worker_flags & WORKER_IDLE)
5050 wake_up_process(worker->task);
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
5068 worker_flags |= WORKER_REBOUND;
5069 worker_flags &= ~WORKER_UNBOUND;
5070 WRITE_ONCE(worker->flags, worker_flags);
5071 }
5072
5073 raw_spin_unlock_irq(&pool->lock);
5074}
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
5087{
5088 static cpumask_t cpumask;
5089 struct worker *worker;
5090
5091 lockdep_assert_held(&wq_pool_attach_mutex);
5092
5093
5094 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
5095 return;
5096
5097 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
5098
5099
5100 for_each_pool_worker(worker, pool)
5101 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
5102}
5103
5104int workqueue_prepare_cpu(unsigned int cpu)
5105{
5106 struct worker_pool *pool;
5107
5108 for_each_cpu_worker_pool(pool, cpu) {
5109 if (pool->nr_workers)
5110 continue;
5111 if (!create_worker(pool))
5112 return -ENOMEM;
5113 }
5114 return 0;
5115}
5116
5117int workqueue_online_cpu(unsigned int cpu)
5118{
5119 struct worker_pool *pool;
5120 struct workqueue_struct *wq;
5121 int pi;
5122
5123 mutex_lock(&wq_pool_mutex);
5124
5125 for_each_pool(pool, pi) {
5126 mutex_lock(&wq_pool_attach_mutex);
5127
5128 if (pool->cpu == cpu)
5129 rebind_workers(pool);
5130 else if (pool->cpu < 0)
5131 restore_unbound_workers_cpumask(pool, cpu);
5132
5133 mutex_unlock(&wq_pool_attach_mutex);
5134 }
5135
5136
5137 list_for_each_entry(wq, &workqueues, list)
5138 wq_update_unbound_numa(wq, cpu, true);
5139
5140 mutex_unlock(&wq_pool_mutex);
5141 return 0;
5142}
5143
5144int workqueue_offline_cpu(unsigned int cpu)
5145{
5146 struct workqueue_struct *wq;
5147
5148
5149 if (WARN_ON(cpu != smp_processor_id()))
5150 return -1;
5151
5152 unbind_workers(cpu);
5153
5154
5155 mutex_lock(&wq_pool_mutex);
5156 list_for_each_entry(wq, &workqueues, list)
5157 wq_update_unbound_numa(wq, cpu, false);
5158 mutex_unlock(&wq_pool_mutex);
5159
5160 return 0;
5161}
5162
5163struct work_for_cpu {
5164 struct work_struct work;
5165 long (*fn)(void *);
5166 void *arg;
5167 long ret;
5168};
5169
5170static void work_for_cpu_fn(struct work_struct *work)
5171{
5172 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5173
5174 wfc->ret = wfc->fn(wfc->arg);
5175}
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
5189{
5190 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
5191
5192 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5193 schedule_work_on(cpu, &wfc.work);
5194 flush_work(&wfc.work);
5195 destroy_work_on_stack(&wfc.work);
5196 return wfc.ret;
5197}
5198EXPORT_SYMBOL_GPL(work_on_cpu);
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5212{
5213 long ret = -ENODEV;
5214
5215 cpus_read_lock();
5216 if (cpu_online(cpu))
5217 ret = work_on_cpu(cpu, fn, arg);
5218 cpus_read_unlock();
5219 return ret;
5220}
5221EXPORT_SYMBOL_GPL(work_on_cpu_safe);
5222#endif
5223
5224#ifdef CONFIG_FREEZER
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236void freeze_workqueues_begin(void)
5237{
5238 struct workqueue_struct *wq;
5239 struct pool_workqueue *pwq;
5240
5241 mutex_lock(&wq_pool_mutex);
5242
5243 WARN_ON_ONCE(workqueue_freezing);
5244 workqueue_freezing = true;
5245
5246 list_for_each_entry(wq, &workqueues, list) {
5247 mutex_lock(&wq->mutex);
5248 for_each_pwq(pwq, wq)
5249 pwq_adjust_max_active(pwq);
5250 mutex_unlock(&wq->mutex);
5251 }
5252
5253 mutex_unlock(&wq_pool_mutex);
5254}
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269bool freeze_workqueues_busy(void)
5270{
5271 bool busy = false;
5272 struct workqueue_struct *wq;
5273 struct pool_workqueue *pwq;
5274
5275 mutex_lock(&wq_pool_mutex);
5276
5277 WARN_ON_ONCE(!workqueue_freezing);
5278
5279 list_for_each_entry(wq, &workqueues, list) {
5280 if (!(wq->flags & WQ_FREEZABLE))
5281 continue;
5282
5283
5284
5285
5286 rcu_read_lock();
5287 for_each_pwq(pwq, wq) {
5288 WARN_ON_ONCE(pwq->nr_active < 0);
5289 if (pwq->nr_active) {
5290 busy = true;
5291 rcu_read_unlock();
5292 goto out_unlock;
5293 }
5294 }
5295 rcu_read_unlock();
5296 }
5297out_unlock:
5298 mutex_unlock(&wq_pool_mutex);
5299 return busy;
5300}
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311void thaw_workqueues(void)
5312{
5313 struct workqueue_struct *wq;
5314 struct pool_workqueue *pwq;
5315
5316 mutex_lock(&wq_pool_mutex);
5317
5318 if (!workqueue_freezing)
5319 goto out_unlock;
5320
5321 workqueue_freezing = false;
5322
5323
5324 list_for_each_entry(wq, &workqueues, list) {
5325 mutex_lock(&wq->mutex);
5326 for_each_pwq(pwq, wq)
5327 pwq_adjust_max_active(pwq);
5328 mutex_unlock(&wq->mutex);
5329 }
5330
5331out_unlock:
5332 mutex_unlock(&wq_pool_mutex);
5333}
5334#endif
5335
5336static int workqueue_apply_unbound_cpumask(void)
5337{
5338 LIST_HEAD(ctxs);
5339 int ret = 0;
5340 struct workqueue_struct *wq;
5341 struct apply_wqattrs_ctx *ctx, *n;
5342
5343 lockdep_assert_held(&wq_pool_mutex);
5344
5345 list_for_each_entry(wq, &workqueues, list) {
5346 if (!(wq->flags & WQ_UNBOUND))
5347 continue;
5348
5349 if (wq->flags & __WQ_ORDERED)
5350 continue;
5351
5352 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
5353 if (!ctx) {
5354 ret = -ENOMEM;
5355 break;
5356 }
5357
5358 list_add_tail(&ctx->list, &ctxs);
5359 }
5360
5361 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5362 if (!ret)
5363 apply_wqattrs_commit(ctx);
5364 apply_wqattrs_cleanup(ctx);
5365 }
5366
5367 return ret;
5368}
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5383{
5384 int ret = -EINVAL;
5385 cpumask_var_t saved_cpumask;
5386
5387 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
5388 return -ENOMEM;
5389
5390
5391
5392
5393
5394 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5395 if (!cpumask_empty(cpumask)) {
5396 apply_wqattrs_lock();
5397
5398
5399 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5400
5401
5402 cpumask_copy(wq_unbound_cpumask, cpumask);
5403 ret = workqueue_apply_unbound_cpumask();
5404
5405
5406 if (ret < 0)
5407 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5408
5409 apply_wqattrs_unlock();
5410 }
5411
5412 free_cpumask_var(saved_cpumask);
5413 return ret;
5414}
5415
5416#ifdef CONFIG_SYSFS
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432struct wq_device {
5433 struct workqueue_struct *wq;
5434 struct device dev;
5435};
5436
5437static struct workqueue_struct *dev_to_wq(struct device *dev)
5438{
5439 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5440
5441 return wq_dev->wq;
5442}
5443
5444static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5445 char *buf)
5446{
5447 struct workqueue_struct *wq = dev_to_wq(dev);
5448
5449 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5450}
5451static DEVICE_ATTR_RO(per_cpu);
5452
5453static ssize_t max_active_show(struct device *dev,
5454 struct device_attribute *attr, char *buf)
5455{
5456 struct workqueue_struct *wq = dev_to_wq(dev);
5457
5458 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5459}
5460
5461static ssize_t max_active_store(struct device *dev,
5462 struct device_attribute *attr, const char *buf,
5463 size_t count)
5464{
5465 struct workqueue_struct *wq = dev_to_wq(dev);
5466 int val;
5467
5468 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5469 return -EINVAL;
5470
5471 workqueue_set_max_active(wq, val);
5472 return count;
5473}
5474static DEVICE_ATTR_RW(max_active);
5475
5476static struct attribute *wq_sysfs_attrs[] = {
5477 &dev_attr_per_cpu.attr,
5478 &dev_attr_max_active.attr,
5479 NULL,
5480};
5481ATTRIBUTE_GROUPS(wq_sysfs);
5482
5483static ssize_t wq_pool_ids_show(struct device *dev,
5484 struct device_attribute *attr, char *buf)
5485{
5486 struct workqueue_struct *wq = dev_to_wq(dev);
5487 const char *delim = "";
5488 int node, written = 0;
5489
5490 cpus_read_lock();
5491 rcu_read_lock();
5492 for_each_node(node) {
5493 written += scnprintf(buf + written, PAGE_SIZE - written,
5494 "%s%d:%d", delim, node,
5495 unbound_pwq_by_node(wq, node)->pool->id);
5496 delim = " ";
5497 }
5498 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5499 rcu_read_unlock();
5500 cpus_read_unlock();
5501
5502 return written;
5503}
5504
5505static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5506 char *buf)
5507{
5508 struct workqueue_struct *wq = dev_to_wq(dev);
5509 int written;
5510
5511 mutex_lock(&wq->mutex);
5512 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5513 mutex_unlock(&wq->mutex);
5514
5515 return written;
5516}
5517
5518
5519static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5520{
5521 struct workqueue_attrs *attrs;
5522
5523 lockdep_assert_held(&wq_pool_mutex);
5524
5525 attrs = alloc_workqueue_attrs();
5526 if (!attrs)
5527 return NULL;
5528
5529 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5530 return attrs;
5531}
5532
5533static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5534 const char *buf, size_t count)
5535{
5536 struct workqueue_struct *wq = dev_to_wq(dev);
5537 struct workqueue_attrs *attrs;
5538 int ret = -ENOMEM;
5539
5540 apply_wqattrs_lock();
5541
5542 attrs = wq_sysfs_prep_attrs(wq);
5543 if (!attrs)
5544 goto out_unlock;
5545
5546 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5547 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5548 ret = apply_workqueue_attrs_locked(wq, attrs);
5549 else
5550 ret = -EINVAL;
5551
5552out_unlock:
5553 apply_wqattrs_unlock();
5554 free_workqueue_attrs(attrs);
5555 return ret ?: count;
5556}
5557
5558static ssize_t wq_cpumask_show(struct device *dev,
5559 struct device_attribute *attr, char *buf)
5560{
5561 struct workqueue_struct *wq = dev_to_wq(dev);
5562 int written;
5563
5564 mutex_lock(&wq->mutex);
5565 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5566 cpumask_pr_args(wq->unbound_attrs->cpumask));
5567 mutex_unlock(&wq->mutex);
5568 return written;
5569}
5570
5571static ssize_t wq_cpumask_store(struct device *dev,
5572 struct device_attribute *attr,
5573 const char *buf, size_t count)
5574{
5575 struct workqueue_struct *wq = dev_to_wq(dev);
5576 struct workqueue_attrs *attrs;
5577 int ret = -ENOMEM;
5578
5579 apply_wqattrs_lock();
5580
5581 attrs = wq_sysfs_prep_attrs(wq);
5582 if (!attrs)
5583 goto out_unlock;
5584
5585 ret = cpumask_parse(buf, attrs->cpumask);
5586 if (!ret)
5587 ret = apply_workqueue_attrs_locked(wq, attrs);
5588
5589out_unlock:
5590 apply_wqattrs_unlock();
5591 free_workqueue_attrs(attrs);
5592 return ret ?: count;
5593}
5594
5595static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5596 char *buf)
5597{
5598 struct workqueue_struct *wq = dev_to_wq(dev);
5599 int written;
5600
5601 mutex_lock(&wq->mutex);
5602 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5603 !wq->unbound_attrs->no_numa);
5604 mutex_unlock(&wq->mutex);
5605
5606 return written;
5607}
5608
5609static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5610 const char *buf, size_t count)
5611{
5612 struct workqueue_struct *wq = dev_to_wq(dev);
5613 struct workqueue_attrs *attrs;
5614 int v, ret = -ENOMEM;
5615
5616 apply_wqattrs_lock();
5617
5618 attrs = wq_sysfs_prep_attrs(wq);
5619 if (!attrs)
5620 goto out_unlock;
5621
5622 ret = -EINVAL;
5623 if (sscanf(buf, "%d", &v) == 1) {
5624 attrs->no_numa = !v;
5625 ret = apply_workqueue_attrs_locked(wq, attrs);
5626 }
5627
5628out_unlock:
5629 apply_wqattrs_unlock();
5630 free_workqueue_attrs(attrs);
5631 return ret ?: count;
5632}
5633
5634static struct device_attribute wq_sysfs_unbound_attrs[] = {
5635 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5636 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5637 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5638 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5639 __ATTR_NULL,
5640};
5641
5642static struct bus_type wq_subsys = {
5643 .name = "workqueue",
5644 .dev_groups = wq_sysfs_groups,
5645};
5646
5647static ssize_t wq_unbound_cpumask_show(struct device *dev,
5648 struct device_attribute *attr, char *buf)
5649{
5650 int written;
5651
5652 mutex_lock(&wq_pool_mutex);
5653 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5654 cpumask_pr_args(wq_unbound_cpumask));
5655 mutex_unlock(&wq_pool_mutex);
5656
5657 return written;
5658}
5659
5660static ssize_t wq_unbound_cpumask_store(struct device *dev,
5661 struct device_attribute *attr, const char *buf, size_t count)
5662{
5663 cpumask_var_t cpumask;
5664 int ret;
5665
5666 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5667 return -ENOMEM;
5668
5669 ret = cpumask_parse(buf, cpumask);
5670 if (!ret)
5671 ret = workqueue_set_unbound_cpumask(cpumask);
5672
5673 free_cpumask_var(cpumask);
5674 return ret ? ret : count;
5675}
5676
5677static struct device_attribute wq_sysfs_cpumask_attr =
5678 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5679 wq_unbound_cpumask_store);
5680
5681static int __init wq_sysfs_init(void)
5682{
5683 int err;
5684
5685 err = subsys_virtual_register(&wq_subsys, NULL);
5686 if (err)
5687 return err;
5688
5689 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5690}
5691core_initcall(wq_sysfs_init);
5692
5693static void wq_device_release(struct device *dev)
5694{
5695 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5696
5697 kfree(wq_dev);
5698}
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715int workqueue_sysfs_register(struct workqueue_struct *wq)
5716{
5717 struct wq_device *wq_dev;
5718 int ret;
5719
5720
5721
5722
5723
5724
5725 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5726 return -EINVAL;
5727
5728 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5729 if (!wq_dev)
5730 return -ENOMEM;
5731
5732 wq_dev->wq = wq;
5733 wq_dev->dev.bus = &wq_subsys;
5734 wq_dev->dev.release = wq_device_release;
5735 dev_set_name(&wq_dev->dev, "%s", wq->name);
5736
5737
5738
5739
5740
5741 dev_set_uevent_suppress(&wq_dev->dev, true);
5742
5743 ret = device_register(&wq_dev->dev);
5744 if (ret) {
5745 put_device(&wq_dev->dev);
5746 wq->wq_dev = NULL;
5747 return ret;
5748 }
5749
5750 if (wq->flags & WQ_UNBOUND) {
5751 struct device_attribute *attr;
5752
5753 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5754 ret = device_create_file(&wq_dev->dev, attr);
5755 if (ret) {
5756 device_unregister(&wq_dev->dev);
5757 wq->wq_dev = NULL;
5758 return ret;
5759 }
5760 }
5761 }
5762
5763 dev_set_uevent_suppress(&wq_dev->dev, false);
5764 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5765 return 0;
5766}
5767
5768
5769
5770
5771
5772
5773
5774static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5775{
5776 struct wq_device *wq_dev = wq->wq_dev;
5777
5778 if (!wq->wq_dev)
5779 return;
5780
5781 wq->wq_dev = NULL;
5782 device_unregister(&wq_dev->dev);
5783}
5784#else
5785static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5786#endif
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805#ifdef CONFIG_WQ_WATCHDOG
5806
5807static unsigned long wq_watchdog_thresh = 30;
5808static struct timer_list wq_watchdog_timer;
5809
5810static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5811static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5812
5813static void wq_watchdog_reset_touched(void)
5814{
5815 int cpu;
5816
5817 wq_watchdog_touched = jiffies;
5818 for_each_possible_cpu(cpu)
5819 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5820}
5821
5822static void wq_watchdog_timer_fn(struct timer_list *unused)
5823{
5824 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5825 bool lockup_detected = false;
5826 unsigned long now = jiffies;
5827 struct worker_pool *pool;
5828 int pi;
5829
5830 if (!thresh)
5831 return;
5832
5833 rcu_read_lock();
5834
5835 for_each_pool(pool, pi) {
5836 unsigned long pool_ts, touched, ts;
5837
5838 if (list_empty(&pool->worklist))
5839 continue;
5840
5841
5842
5843
5844
5845 kvm_check_and_clear_guest_paused();
5846
5847
5848 if (pool->cpu >= 0)
5849 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
5850 else
5851 touched = READ_ONCE(wq_watchdog_touched);
5852 pool_ts = READ_ONCE(pool->watchdog_ts);
5853
5854 if (time_after(pool_ts, touched))
5855 ts = pool_ts;
5856 else
5857 ts = touched;
5858
5859
5860 if (time_after(now, ts + thresh)) {
5861 lockup_detected = true;
5862 pr_emerg("BUG: workqueue lockup - pool");
5863 pr_cont_pool_info(pool);
5864 pr_cont(" stuck for %us!\n",
5865 jiffies_to_msecs(now - pool_ts) / 1000);
5866 }
5867 }
5868
5869 rcu_read_unlock();
5870
5871 if (lockup_detected)
5872 show_workqueue_state();
5873
5874 wq_watchdog_reset_touched();
5875 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5876}
5877
5878notrace void wq_watchdog_touch(int cpu)
5879{
5880 if (cpu >= 0)
5881 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5882
5883 wq_watchdog_touched = jiffies;
5884}
5885
5886static void wq_watchdog_set_thresh(unsigned long thresh)
5887{
5888 wq_watchdog_thresh = 0;
5889 del_timer_sync(&wq_watchdog_timer);
5890
5891 if (thresh) {
5892 wq_watchdog_thresh = thresh;
5893 wq_watchdog_reset_touched();
5894 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5895 }
5896}
5897
5898static int wq_watchdog_param_set_thresh(const char *val,
5899 const struct kernel_param *kp)
5900{
5901 unsigned long thresh;
5902 int ret;
5903
5904 ret = kstrtoul(val, 0, &thresh);
5905 if (ret)
5906 return ret;
5907
5908 if (system_wq)
5909 wq_watchdog_set_thresh(thresh);
5910 else
5911 wq_watchdog_thresh = thresh;
5912
5913 return 0;
5914}
5915
5916static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5917 .set = wq_watchdog_param_set_thresh,
5918 .get = param_get_ulong,
5919};
5920
5921module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5922 0644);
5923
5924static void wq_watchdog_init(void)
5925{
5926 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
5927 wq_watchdog_set_thresh(wq_watchdog_thresh);
5928}
5929
5930#else
5931
5932static inline void wq_watchdog_init(void) { }
5933
5934#endif
5935
5936static void __init wq_numa_init(void)
5937{
5938 cpumask_var_t *tbl;
5939 int node, cpu;
5940
5941 if (num_possible_nodes() <= 1)
5942 return;
5943
5944 if (wq_disable_numa) {
5945 pr_info("workqueue: NUMA affinity support disabled\n");
5946 return;
5947 }
5948
5949 for_each_possible_cpu(cpu) {
5950 if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5951 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5952 return;
5953 }
5954 }
5955
5956 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
5957 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5958
5959
5960
5961
5962
5963
5964 tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
5965 BUG_ON(!tbl);
5966
5967 for_each_node(node)
5968 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5969 node_online(node) ? node : NUMA_NO_NODE));
5970
5971 for_each_possible_cpu(cpu) {
5972 node = cpu_to_node(cpu);
5973 cpumask_set_cpu(cpu, tbl[node]);
5974 }
5975
5976 wq_numa_possible_cpumask = tbl;
5977 wq_numa_enabled = true;
5978}
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990void __init workqueue_init_early(void)
5991{
5992 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5993 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
5994 int i, cpu;
5995
5996 BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5997
5998 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5999 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
6000
6001 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
6002
6003
6004 for_each_possible_cpu(cpu) {
6005 struct worker_pool *pool;
6006
6007 i = 0;
6008 for_each_cpu_worker_pool(pool, cpu) {
6009 BUG_ON(init_worker_pool(pool));
6010 pool->cpu = cpu;
6011 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
6012 pool->attrs->nice = std_nice[i++];
6013 pool->node = cpu_to_node(cpu);
6014
6015
6016 mutex_lock(&wq_pool_mutex);
6017 BUG_ON(worker_pool_assign_id(pool));
6018 mutex_unlock(&wq_pool_mutex);
6019 }
6020 }
6021
6022
6023 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
6024 struct workqueue_attrs *attrs;
6025
6026 BUG_ON(!(attrs = alloc_workqueue_attrs()));
6027 attrs->nice = std_nice[i];
6028 unbound_std_wq_attrs[i] = attrs;
6029
6030
6031
6032
6033
6034
6035 BUG_ON(!(attrs = alloc_workqueue_attrs()));
6036 attrs->nice = std_nice[i];
6037 attrs->no_numa = true;
6038 ordered_wq_attrs[i] = attrs;
6039 }
6040
6041 system_wq = alloc_workqueue("events", 0, 0);
6042 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
6043 system_long_wq = alloc_workqueue("events_long", 0, 0);
6044 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
6045 WQ_UNBOUND_MAX_ACTIVE);
6046 system_freezable_wq = alloc_workqueue("events_freezable",
6047 WQ_FREEZABLE, 0);
6048 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
6049 WQ_POWER_EFFICIENT, 0);
6050 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
6051 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
6052 0);
6053 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
6054 !system_unbound_wq || !system_freezable_wq ||
6055 !system_power_efficient_wq ||
6056 !system_freezable_power_efficient_wq);
6057}
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068void __init workqueue_init(void)
6069{
6070 struct workqueue_struct *wq;
6071 struct worker_pool *pool;
6072 int cpu, bkt;
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083 wq_numa_init();
6084
6085 mutex_lock(&wq_pool_mutex);
6086
6087 for_each_possible_cpu(cpu) {
6088 for_each_cpu_worker_pool(pool, cpu) {
6089 pool->node = cpu_to_node(cpu);
6090 }
6091 }
6092
6093 list_for_each_entry(wq, &workqueues, list) {
6094 wq_update_unbound_numa(wq, smp_processor_id(), true);
6095 WARN(init_rescuer(wq),
6096 "workqueue: failed to create early rescuer for %s",
6097 wq->name);
6098 }
6099
6100 mutex_unlock(&wq_pool_mutex);
6101
6102
6103 for_each_online_cpu(cpu) {
6104 for_each_cpu_worker_pool(pool, cpu) {
6105 pool->flags &= ~POOL_DISASSOCIATED;
6106 BUG_ON(!create_worker(pool));
6107 }
6108 }
6109
6110 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
6111 BUG_ON(!create_worker(pool));
6112
6113 wq_online = true;
6114 wq_watchdog_init();
6115}
6116