1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/export.h>
29#include <linux/kernel.h>
30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/signal.h>
33#include <linux/completion.h>
34#include <linux/workqueue.h>
35#include <linux/slab.h>
36#include <linux/cpu.h>
37#include <linux/notifier.h>
38#include <linux/kthread.h>
39#include <linux/hardirq.h>
40#include <linux/mempolicy.h>
41#include <linux/freezer.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51#include <linux/sched/isolation.h>
52#include <linux/nmi.h>
53#include <linux/kvm_para.h>
54
55#include "workqueue_internal.h"
56
57enum {
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 POOL_MANAGER_ACTIVE = 1 << 0,
75 POOL_DISASSOCIATED = 1 << 2,
76
77
78 WORKER_DIE = 1 << 1,
79 WORKER_IDLE = 1 << 2,
80 WORKER_PREP = 1 << 3,
81 WORKER_CPU_INTENSIVE = 1 << 6,
82 WORKER_UNBOUND = 1 << 7,
83 WORKER_REBOUND = 1 << 8,
84
85 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
86 WORKER_UNBOUND | WORKER_REBOUND,
87
88 NR_STD_WORKER_POOLS = 2,
89
90 UNBOUND_POOL_HASH_ORDER = 6,
91 BUSY_WORKER_HASH_ORDER = 6,
92
93 MAX_IDLE_WORKERS_RATIO = 4,
94 IDLE_WORKER_TIMEOUT = 300 * HZ,
95
96 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
97
98
99 MAYDAY_INTERVAL = HZ / 10,
100 CREATE_COOLDOWN = HZ,
101
102
103
104
105
106 RESCUER_NICE_LEVEL = MIN_NICE,
107 HIGHPRI_NICE_LEVEL = MIN_NICE,
108
109 WQ_NAME_LEN = 24,
110};
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148struct worker_pool {
149 raw_spinlock_t lock;
150 int cpu;
151 int node;
152 int id;
153 unsigned int flags;
154
155 unsigned long watchdog_ts;
156
157 struct list_head worklist;
158
159 int nr_workers;
160 int nr_idle;
161
162 struct list_head idle_list;
163 struct timer_list idle_timer;
164 struct timer_list mayday_timer;
165
166
167 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
168
169
170 struct worker *manager;
171 struct list_head workers;
172 struct completion *detach_completion;
173
174 struct ida worker_ida;
175
176 struct workqueue_attrs *attrs;
177 struct hlist_node hash_node;
178 int refcnt;
179
180
181
182
183
184
185 atomic_t nr_running ____cacheline_aligned_in_smp;
186
187
188
189
190
191 struct rcu_head rcu;
192} ____cacheline_aligned_in_smp;
193
194
195
196
197
198
199
200struct pool_workqueue {
201 struct worker_pool *pool;
202 struct workqueue_struct *wq;
203 int work_color;
204 int flush_color;
205 int refcnt;
206 int nr_in_flight[WORK_NR_COLORS];
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225 int nr_active;
226 int max_active;
227 struct list_head inactive_works;
228 struct list_head pwqs_node;
229 struct list_head mayday_node;
230
231
232
233
234
235
236
237 struct work_struct unbound_release_work;
238 struct rcu_head rcu;
239} __aligned(1 << WORK_STRUCT_FLAG_BITS);
240
241
242
243
244struct wq_flusher {
245 struct list_head list;
246 int flush_color;
247 struct completion done;
248};
249
250struct wq_device;
251
252
253
254
255
256struct workqueue_struct {
257 struct list_head pwqs;
258 struct list_head list;
259
260 struct mutex mutex;
261 int work_color;
262 int flush_color;
263 atomic_t nr_pwqs_to_flush;
264 struct wq_flusher *first_flusher;
265 struct list_head flusher_queue;
266 struct list_head flusher_overflow;
267
268 struct list_head maydays;
269 struct worker *rescuer;
270
271 int nr_drainers;
272 int saved_max_active;
273
274 struct workqueue_attrs *unbound_attrs;
275 struct pool_workqueue *dfl_pwq;
276
277#ifdef CONFIG_SYSFS
278 struct wq_device *wq_dev;
279#endif
280#ifdef CONFIG_LOCKDEP
281 char *lock_name;
282 struct lock_class_key key;
283 struct lockdep_map lockdep_map;
284#endif
285 char name[WQ_NAME_LEN];
286
287
288
289
290
291
292 struct rcu_head rcu;
293
294
295 unsigned int flags ____cacheline_aligned;
296 struct pool_workqueue __percpu *cpu_pwqs;
297 struct pool_workqueue __rcu *numa_pwq_tbl[];
298};
299
300static struct kmem_cache *pwq_cache;
301
302static cpumask_var_t *wq_numa_possible_cpumask;
303
304
305static bool wq_disable_numa;
306module_param_named(disable_numa, wq_disable_numa, bool, 0444);
307
308
309static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
310module_param_named(power_efficient, wq_power_efficient, bool, 0444);
311
312static bool wq_online;
313
314static bool wq_numa_enabled;
315
316
317static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
318
319static DEFINE_MUTEX(wq_pool_mutex);
320static DEFINE_MUTEX(wq_pool_attach_mutex);
321static DEFINE_RAW_SPINLOCK(wq_mayday_lock);
322
323static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
324
325static LIST_HEAD(workqueues);
326static bool workqueue_freezing;
327
328
329static cpumask_var_t wq_unbound_cpumask;
330
331
332static DEFINE_PER_CPU(int, wq_rr_cpu_last);
333
334
335
336
337
338
339#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
340static bool wq_debug_force_rr_cpu = true;
341#else
342static bool wq_debug_force_rr_cpu = false;
343#endif
344module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
345
346
347static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
348
349static DEFINE_IDR(worker_pool_idr);
350
351
352static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
353
354
355static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
356
357
358static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
359
360struct workqueue_struct *system_wq __read_mostly;
361EXPORT_SYMBOL(system_wq);
362struct workqueue_struct *system_highpri_wq __read_mostly;
363EXPORT_SYMBOL_GPL(system_highpri_wq);
364struct workqueue_struct *system_long_wq __read_mostly;
365EXPORT_SYMBOL_GPL(system_long_wq);
366struct workqueue_struct *system_unbound_wq __read_mostly;
367EXPORT_SYMBOL_GPL(system_unbound_wq);
368struct workqueue_struct *system_freezable_wq __read_mostly;
369EXPORT_SYMBOL_GPL(system_freezable_wq);
370struct workqueue_struct *system_power_efficient_wq __read_mostly;
371EXPORT_SYMBOL_GPL(system_power_efficient_wq);
372struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
373EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
374
375static int worker_thread(void *__worker);
376static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
377static void show_pwq(struct pool_workqueue *pwq);
378static void show_one_worker_pool(struct worker_pool *pool);
379
380#define CREATE_TRACE_POINTS
381#include <trace/events/workqueue.h>
382
383#define assert_rcu_or_pool_mutex() \
384 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
385 !lockdep_is_held(&wq_pool_mutex), \
386 "RCU or wq_pool_mutex should be held")
387
388#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
389 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
390 !lockdep_is_held(&wq->mutex) && \
391 !lockdep_is_held(&wq_pool_mutex), \
392 "RCU, wq->mutex or wq_pool_mutex should be held")
393
394#define for_each_cpu_worker_pool(pool, cpu) \
395 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
396 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
397 (pool)++)
398
399
400
401
402
403
404
405
406
407
408
409
410
411#define for_each_pool(pool, pi) \
412 idr_for_each_entry(&worker_pool_idr, pool, pi) \
413 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
414 else
415
416
417
418
419
420
421
422
423
424
425
426#define for_each_pool_worker(worker, pool) \
427 list_for_each_entry((worker), &(pool)->workers, node) \
428 if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
429 else
430
431
432
433
434
435
436
437
438
439
440
441
442
443#define for_each_pwq(pwq, wq) \
444 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
445 lockdep_is_held(&(wq->mutex)))
446
447#ifdef CONFIG_DEBUG_OBJECTS_WORK
448
449static const struct debug_obj_descr work_debug_descr;
450
451static void *work_debug_hint(void *addr)
452{
453 return ((struct work_struct *) addr)->func;
454}
455
456static bool work_is_static_object(void *addr)
457{
458 struct work_struct *work = addr;
459
460 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
461}
462
463
464
465
466
467static bool work_fixup_init(void *addr, enum debug_obj_state state)
468{
469 struct work_struct *work = addr;
470
471 switch (state) {
472 case ODEBUG_STATE_ACTIVE:
473 cancel_work_sync(work);
474 debug_object_init(work, &work_debug_descr);
475 return true;
476 default:
477 return false;
478 }
479}
480
481
482
483
484
485static bool work_fixup_free(void *addr, enum debug_obj_state state)
486{
487 struct work_struct *work = addr;
488
489 switch (state) {
490 case ODEBUG_STATE_ACTIVE:
491 cancel_work_sync(work);
492 debug_object_free(work, &work_debug_descr);
493 return true;
494 default:
495 return false;
496 }
497}
498
499static const struct debug_obj_descr work_debug_descr = {
500 .name = "work_struct",
501 .debug_hint = work_debug_hint,
502 .is_static_object = work_is_static_object,
503 .fixup_init = work_fixup_init,
504 .fixup_free = work_fixup_free,
505};
506
507static inline void debug_work_activate(struct work_struct *work)
508{
509 debug_object_activate(work, &work_debug_descr);
510}
511
512static inline void debug_work_deactivate(struct work_struct *work)
513{
514 debug_object_deactivate(work, &work_debug_descr);
515}
516
517void __init_work(struct work_struct *work, int onstack)
518{
519 if (onstack)
520 debug_object_init_on_stack(work, &work_debug_descr);
521 else
522 debug_object_init(work, &work_debug_descr);
523}
524EXPORT_SYMBOL_GPL(__init_work);
525
526void destroy_work_on_stack(struct work_struct *work)
527{
528 debug_object_free(work, &work_debug_descr);
529}
530EXPORT_SYMBOL_GPL(destroy_work_on_stack);
531
532void destroy_delayed_work_on_stack(struct delayed_work *work)
533{
534 destroy_timer_on_stack(&work->timer);
535 debug_object_free(&work->work, &work_debug_descr);
536}
537EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
538
539#else
540static inline void debug_work_activate(struct work_struct *work) { }
541static inline void debug_work_deactivate(struct work_struct *work) { }
542#endif
543
544
545
546
547
548
549
550
551static int worker_pool_assign_id(struct worker_pool *pool)
552{
553 int ret;
554
555 lockdep_assert_held(&wq_pool_mutex);
556
557 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
558 GFP_KERNEL);
559 if (ret >= 0) {
560 pool->id = ret;
561 return 0;
562 }
563 return ret;
564}
565
566
567
568
569
570
571
572
573
574
575
576
577
578static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
579 int node)
580{
581 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
582
583
584
585
586
587
588
589 if (unlikely(node == NUMA_NO_NODE))
590 return wq->dfl_pwq;
591
592 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
593}
594
595static unsigned int work_color_to_flags(int color)
596{
597 return color << WORK_STRUCT_COLOR_SHIFT;
598}
599
600static int get_work_color(unsigned long work_data)
601{
602 return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
603 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
604}
605
606static int work_next_color(int color)
607{
608 return (color + 1) % WORK_NR_COLORS;
609}
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631static inline void set_work_data(struct work_struct *work, unsigned long data,
632 unsigned long flags)
633{
634 WARN_ON_ONCE(!work_pending(work));
635 atomic_long_set(&work->data, data | flags | work_static(work));
636}
637
638static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
639 unsigned long extra_flags)
640{
641 set_work_data(work, (unsigned long)pwq,
642 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
643}
644
645static void set_work_pool_and_keep_pending(struct work_struct *work,
646 int pool_id)
647{
648 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
649 WORK_STRUCT_PENDING);
650}
651
652static void set_work_pool_and_clear_pending(struct work_struct *work,
653 int pool_id)
654{
655
656
657
658
659
660
661 smp_wmb();
662 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691 smp_mb();
692}
693
694static void clear_work_data(struct work_struct *work)
695{
696 smp_wmb();
697 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
698}
699
700static struct pool_workqueue *get_work_pwq(struct work_struct *work)
701{
702 unsigned long data = atomic_long_read(&work->data);
703
704 if (data & WORK_STRUCT_PWQ)
705 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
706 else
707 return NULL;
708}
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725static struct worker_pool *get_work_pool(struct work_struct *work)
726{
727 unsigned long data = atomic_long_read(&work->data);
728 int pool_id;
729
730 assert_rcu_or_pool_mutex();
731
732 if (data & WORK_STRUCT_PWQ)
733 return ((struct pool_workqueue *)
734 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
735
736 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
737 if (pool_id == WORK_OFFQ_POOL_NONE)
738 return NULL;
739
740 return idr_find(&worker_pool_idr, pool_id);
741}
742
743
744
745
746
747
748
749
750static int get_work_pool_id(struct work_struct *work)
751{
752 unsigned long data = atomic_long_read(&work->data);
753
754 if (data & WORK_STRUCT_PWQ)
755 return ((struct pool_workqueue *)
756 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
757
758 return data >> WORK_OFFQ_POOL_SHIFT;
759}
760
761static void mark_work_canceling(struct work_struct *work)
762{
763 unsigned long pool_id = get_work_pool_id(work);
764
765 pool_id <<= WORK_OFFQ_POOL_SHIFT;
766 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
767}
768
769static bool work_is_canceling(struct work_struct *work)
770{
771 unsigned long data = atomic_long_read(&work->data);
772
773 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
774}
775
776
777
778
779
780
781
782static bool __need_more_worker(struct worker_pool *pool)
783{
784 return !atomic_read(&pool->nr_running);
785}
786
787
788
789
790
791
792
793
794
795static bool need_more_worker(struct worker_pool *pool)
796{
797 return !list_empty(&pool->worklist) && __need_more_worker(pool);
798}
799
800
801static bool may_start_working(struct worker_pool *pool)
802{
803 return pool->nr_idle;
804}
805
806
807static bool keep_working(struct worker_pool *pool)
808{
809 return !list_empty(&pool->worklist) &&
810 atomic_read(&pool->nr_running) <= 1;
811}
812
813
814static bool need_to_create_worker(struct worker_pool *pool)
815{
816 return need_more_worker(pool) && !may_start_working(pool);
817}
818
819
820static bool too_many_workers(struct worker_pool *pool)
821{
822 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
823 int nr_idle = pool->nr_idle + managing;
824 int nr_busy = pool->nr_workers - nr_idle;
825
826 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
827}
828
829
830
831
832
833
834static struct worker *first_idle_worker(struct worker_pool *pool)
835{
836 if (unlikely(list_empty(&pool->idle_list)))
837 return NULL;
838
839 return list_first_entry(&pool->idle_list, struct worker, entry);
840}
841
842
843
844
845
846
847
848
849
850
851static void wake_up_worker(struct worker_pool *pool)
852{
853 struct worker *worker = first_idle_worker(pool);
854
855 if (likely(worker))
856 wake_up_process(worker->task);
857}
858
859
860
861
862
863
864
865void wq_worker_running(struct task_struct *task)
866{
867 struct worker *worker = kthread_data(task);
868
869 if (!worker->sleeping)
870 return;
871 if (!(worker->flags & WORKER_NOT_RUNNING))
872 atomic_inc(&worker->pool->nr_running);
873 worker->sleeping = 0;
874}
875
876
877
878
879
880
881
882
883
884void wq_worker_sleeping(struct task_struct *task)
885{
886 struct worker *next, *worker = kthread_data(task);
887 struct worker_pool *pool;
888
889
890
891
892
893
894 if (worker->flags & WORKER_NOT_RUNNING)
895 return;
896
897 pool = worker->pool;
898
899
900 if (worker->sleeping)
901 return;
902
903 worker->sleeping = 1;
904 raw_spin_lock_irq(&pool->lock);
905
906
907
908
909
910
911
912
913
914
915
916
917 if (atomic_dec_and_test(&pool->nr_running) &&
918 !list_empty(&pool->worklist)) {
919 next = first_idle_worker(pool);
920 if (next)
921 wake_up_process(next->task);
922 }
923 raw_spin_unlock_irq(&pool->lock);
924}
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950work_func_t wq_worker_last_func(struct task_struct *task)
951{
952 struct worker *worker = kthread_data(task);
953
954 return worker->last_func;
955}
956
957
958
959
960
961
962
963
964
965
966
967static inline void worker_set_flags(struct worker *worker, unsigned int flags)
968{
969 struct worker_pool *pool = worker->pool;
970
971 WARN_ON_ONCE(worker->task != current);
972
973
974 if ((flags & WORKER_NOT_RUNNING) &&
975 !(worker->flags & WORKER_NOT_RUNNING)) {
976 atomic_dec(&pool->nr_running);
977 }
978
979 worker->flags |= flags;
980}
981
982
983
984
985
986
987
988
989
990
991
992static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
993{
994 struct worker_pool *pool = worker->pool;
995 unsigned int oflags = worker->flags;
996
997 WARN_ON_ONCE(worker->task != current);
998
999 worker->flags &= ~flags;
1000
1001
1002
1003
1004
1005
1006 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
1007 if (!(worker->flags & WORKER_NOT_RUNNING))
1008 atomic_inc(&pool->nr_running);
1009}
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044static struct worker *find_worker_executing_work(struct worker_pool *pool,
1045 struct work_struct *work)
1046{
1047 struct worker *worker;
1048
1049 hash_for_each_possible(pool->busy_hash, worker, hentry,
1050 (unsigned long)work)
1051 if (worker->current_work == work &&
1052 worker->current_func == work->func)
1053 return worker;
1054
1055 return NULL;
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075static void move_linked_works(struct work_struct *work, struct list_head *head,
1076 struct work_struct **nextp)
1077{
1078 struct work_struct *n;
1079
1080
1081
1082
1083
1084 list_for_each_entry_safe_from(work, n, NULL, entry) {
1085 list_move_tail(&work->entry, head);
1086 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1087 break;
1088 }
1089
1090
1091
1092
1093
1094
1095 if (nextp)
1096 *nextp = n;
1097}
1098
1099
1100
1101
1102
1103
1104
1105
1106static void get_pwq(struct pool_workqueue *pwq)
1107{
1108 lockdep_assert_held(&pwq->pool->lock);
1109 WARN_ON_ONCE(pwq->refcnt <= 0);
1110 pwq->refcnt++;
1111}
1112
1113
1114
1115
1116
1117
1118
1119
1120static void put_pwq(struct pool_workqueue *pwq)
1121{
1122 lockdep_assert_held(&pwq->pool->lock);
1123 if (likely(--pwq->refcnt))
1124 return;
1125 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1126 return;
1127
1128
1129
1130
1131
1132
1133
1134
1135 schedule_work(&pwq->unbound_release_work);
1136}
1137
1138
1139
1140
1141
1142
1143
1144static void put_pwq_unlocked(struct pool_workqueue *pwq)
1145{
1146 if (pwq) {
1147
1148
1149
1150
1151 raw_spin_lock_irq(&pwq->pool->lock);
1152 put_pwq(pwq);
1153 raw_spin_unlock_irq(&pwq->pool->lock);
1154 }
1155}
1156
1157static void pwq_activate_inactive_work(struct work_struct *work)
1158{
1159 struct pool_workqueue *pwq = get_work_pwq(work);
1160
1161 trace_workqueue_activate_work(work);
1162 if (list_empty(&pwq->pool->worklist))
1163 pwq->pool->watchdog_ts = jiffies;
1164 move_linked_works(work, &pwq->pool->worklist, NULL);
1165 __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
1166 pwq->nr_active++;
1167}
1168
1169static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
1170{
1171 struct work_struct *work = list_first_entry(&pwq->inactive_works,
1172 struct work_struct, entry);
1173
1174 pwq_activate_inactive_work(work);
1175}
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
1189{
1190 int color = get_work_color(work_data);
1191
1192 if (!(work_data & WORK_STRUCT_INACTIVE)) {
1193 pwq->nr_active--;
1194 if (!list_empty(&pwq->inactive_works)) {
1195
1196 if (pwq->nr_active < pwq->max_active)
1197 pwq_activate_first_inactive(pwq);
1198 }
1199 }
1200
1201 pwq->nr_in_flight[color]--;
1202
1203
1204 if (likely(pwq->flush_color != color))
1205 goto out_put;
1206
1207
1208 if (pwq->nr_in_flight[color])
1209 goto out_put;
1210
1211
1212 pwq->flush_color = -1;
1213
1214
1215
1216
1217
1218 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1219 complete(&pwq->wq->first_flusher->done);
1220out_put:
1221 put_pwq(pwq);
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1255 unsigned long *flags)
1256{
1257 struct worker_pool *pool;
1258 struct pool_workqueue *pwq;
1259
1260 local_irq_save(*flags);
1261
1262
1263 if (is_dwork) {
1264 struct delayed_work *dwork = to_delayed_work(work);
1265
1266
1267
1268
1269
1270
1271 if (likely(del_timer(&dwork->timer)))
1272 return 1;
1273 }
1274
1275
1276 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1277 return 0;
1278
1279 rcu_read_lock();
1280
1281
1282
1283
1284 pool = get_work_pool(work);
1285 if (!pool)
1286 goto fail;
1287
1288 raw_spin_lock(&pool->lock);
1289
1290
1291
1292
1293
1294
1295
1296
1297 pwq = get_work_pwq(work);
1298 if (pwq && pwq->pool == pool) {
1299 debug_work_deactivate(work);
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312 if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
1313 pwq_activate_inactive_work(work);
1314
1315 list_del_init(&work->entry);
1316 pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
1317
1318
1319 set_work_pool_and_keep_pending(work, pool->id);
1320
1321 raw_spin_unlock(&pool->lock);
1322 rcu_read_unlock();
1323 return 1;
1324 }
1325 raw_spin_unlock(&pool->lock);
1326fail:
1327 rcu_read_unlock();
1328 local_irq_restore(*flags);
1329 if (work_is_canceling(work))
1330 return -ENOENT;
1331 cpu_relax();
1332 return -EAGAIN;
1333}
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1349 struct list_head *head, unsigned int extra_flags)
1350{
1351 struct worker_pool *pool = pwq->pool;
1352
1353
1354 kasan_record_aux_stack_noalloc(work);
1355
1356
1357 set_work_pwq(work, pwq, extra_flags);
1358 list_add_tail(&work->entry, head);
1359 get_pwq(pwq);
1360
1361
1362
1363
1364
1365
1366 smp_mb();
1367
1368 if (__need_more_worker(pool))
1369 wake_up_worker(pool);
1370}
1371
1372
1373
1374
1375
1376static bool is_chained_work(struct workqueue_struct *wq)
1377{
1378 struct worker *worker;
1379
1380 worker = current_wq_worker();
1381
1382
1383
1384
1385 return worker && worker->current_pwq->wq == wq;
1386}
1387
1388
1389
1390
1391
1392
1393static int wq_select_unbound_cpu(int cpu)
1394{
1395 static bool printed_dbg_warning;
1396 int new_cpu;
1397
1398 if (likely(!wq_debug_force_rr_cpu)) {
1399 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1400 return cpu;
1401 } else if (!printed_dbg_warning) {
1402 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1403 printed_dbg_warning = true;
1404 }
1405
1406 if (cpumask_empty(wq_unbound_cpumask))
1407 return cpu;
1408
1409 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1410 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1411 if (unlikely(new_cpu >= nr_cpu_ids)) {
1412 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1413 if (unlikely(new_cpu >= nr_cpu_ids))
1414 return cpu;
1415 }
1416 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1417
1418 return new_cpu;
1419}
1420
1421static void __queue_work(int cpu, struct workqueue_struct *wq,
1422 struct work_struct *work)
1423{
1424 struct pool_workqueue *pwq;
1425 struct worker_pool *last_pool;
1426 struct list_head *worklist;
1427 unsigned int work_flags;
1428 unsigned int req_cpu = cpu;
1429
1430
1431
1432
1433
1434
1435
1436 lockdep_assert_irqs_disabled();
1437
1438
1439
1440 if (unlikely(wq->flags & __WQ_DRAINING) &&
1441 WARN_ON_ONCE(!is_chained_work(wq)))
1442 return;
1443 rcu_read_lock();
1444retry:
1445
1446 if (wq->flags & WQ_UNBOUND) {
1447 if (req_cpu == WORK_CPU_UNBOUND)
1448 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1449 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1450 } else {
1451 if (req_cpu == WORK_CPU_UNBOUND)
1452 cpu = raw_smp_processor_id();
1453 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1454 }
1455
1456
1457
1458
1459
1460
1461 last_pool = get_work_pool(work);
1462 if (last_pool && last_pool != pwq->pool) {
1463 struct worker *worker;
1464
1465 raw_spin_lock(&last_pool->lock);
1466
1467 worker = find_worker_executing_work(last_pool, work);
1468
1469 if (worker && worker->current_pwq->wq == wq) {
1470 pwq = worker->current_pwq;
1471 } else {
1472
1473 raw_spin_unlock(&last_pool->lock);
1474 raw_spin_lock(&pwq->pool->lock);
1475 }
1476 } else {
1477 raw_spin_lock(&pwq->pool->lock);
1478 }
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488 if (unlikely(!pwq->refcnt)) {
1489 if (wq->flags & WQ_UNBOUND) {
1490 raw_spin_unlock(&pwq->pool->lock);
1491 cpu_relax();
1492 goto retry;
1493 }
1494
1495 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1496 wq->name, cpu);
1497 }
1498
1499
1500 trace_workqueue_queue_work(req_cpu, pwq, work);
1501
1502 if (WARN_ON(!list_empty(&work->entry)))
1503 goto out;
1504
1505 pwq->nr_in_flight[pwq->work_color]++;
1506 work_flags = work_color_to_flags(pwq->work_color);
1507
1508 if (likely(pwq->nr_active < pwq->max_active)) {
1509 trace_workqueue_activate_work(work);
1510 pwq->nr_active++;
1511 worklist = &pwq->pool->worklist;
1512 if (list_empty(worklist))
1513 pwq->pool->watchdog_ts = jiffies;
1514 } else {
1515 work_flags |= WORK_STRUCT_INACTIVE;
1516 worklist = &pwq->inactive_works;
1517 }
1518
1519 debug_work_activate(work);
1520 insert_work(pwq, work, worklist, work_flags);
1521
1522out:
1523 raw_spin_unlock(&pwq->pool->lock);
1524 rcu_read_unlock();
1525}
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538bool queue_work_on(int cpu, struct workqueue_struct *wq,
1539 struct work_struct *work)
1540{
1541 bool ret = false;
1542 unsigned long flags;
1543
1544 local_irq_save(flags);
1545
1546 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1547 __queue_work(cpu, wq, work);
1548 ret = true;
1549 }
1550
1551 local_irq_restore(flags);
1552 return ret;
1553}
1554EXPORT_SYMBOL(queue_work_on);
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565static int workqueue_select_cpu_near(int node)
1566{
1567 int cpu;
1568
1569
1570 if (!wq_numa_enabled)
1571 return WORK_CPU_UNBOUND;
1572
1573
1574 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1575 return WORK_CPU_UNBOUND;
1576
1577
1578 cpu = raw_smp_processor_id();
1579 if (node == cpu_to_node(cpu))
1580 return cpu;
1581
1582
1583 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1584
1585
1586 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1587}
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609bool queue_work_node(int node, struct workqueue_struct *wq,
1610 struct work_struct *work)
1611{
1612 unsigned long flags;
1613 bool ret = false;
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1625
1626 local_irq_save(flags);
1627
1628 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1629 int cpu = workqueue_select_cpu_near(node);
1630
1631 __queue_work(cpu, wq, work);
1632 ret = true;
1633 }
1634
1635 local_irq_restore(flags);
1636 return ret;
1637}
1638EXPORT_SYMBOL_GPL(queue_work_node);
1639
1640void delayed_work_timer_fn(struct timer_list *t)
1641{
1642 struct delayed_work *dwork = from_timer(dwork, t, timer);
1643
1644
1645 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1646}
1647EXPORT_SYMBOL(delayed_work_timer_fn);
1648
1649static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1650 struct delayed_work *dwork, unsigned long delay)
1651{
1652 struct timer_list *timer = &dwork->timer;
1653 struct work_struct *work = &dwork->work;
1654
1655 WARN_ON_ONCE(!wq);
1656 WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn);
1657 WARN_ON_ONCE(timer_pending(timer));
1658 WARN_ON_ONCE(!list_empty(&work->entry));
1659
1660
1661
1662
1663
1664
1665
1666 if (!delay) {
1667 __queue_work(cpu, wq, &dwork->work);
1668 return;
1669 }
1670
1671 dwork->wq = wq;
1672 dwork->cpu = cpu;
1673 timer->expires = jiffies + delay;
1674
1675 if (unlikely(cpu != WORK_CPU_UNBOUND))
1676 add_timer_on(timer, cpu);
1677 else
1678 add_timer(timer);
1679}
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1693 struct delayed_work *dwork, unsigned long delay)
1694{
1695 struct work_struct *work = &dwork->work;
1696 bool ret = false;
1697 unsigned long flags;
1698
1699
1700 local_irq_save(flags);
1701
1702 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1703 __queue_delayed_work(cpu, wq, dwork, delay);
1704 ret = true;
1705 }
1706
1707 local_irq_restore(flags);
1708 return ret;
1709}
1710EXPORT_SYMBOL(queue_delayed_work_on);
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1731 struct delayed_work *dwork, unsigned long delay)
1732{
1733 unsigned long flags;
1734 int ret;
1735
1736 do {
1737 ret = try_to_grab_pending(&dwork->work, true, &flags);
1738 } while (unlikely(ret == -EAGAIN));
1739
1740 if (likely(ret >= 0)) {
1741 __queue_delayed_work(cpu, wq, dwork, delay);
1742 local_irq_restore(flags);
1743 }
1744
1745
1746 return ret;
1747}
1748EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1749
1750static void rcu_work_rcufn(struct rcu_head *rcu)
1751{
1752 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1753
1754
1755 local_irq_disable();
1756 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1757 local_irq_enable();
1758}
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1771{
1772 struct work_struct *work = &rwork->work;
1773
1774 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1775 rwork->wq = wq;
1776 call_rcu(&rwork->rcu, rcu_work_rcufn);
1777 return true;
1778 }
1779
1780 return false;
1781}
1782EXPORT_SYMBOL(queue_rcu_work);
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794static void worker_enter_idle(struct worker *worker)
1795{
1796 struct worker_pool *pool = worker->pool;
1797
1798 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1799 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1800 (worker->hentry.next || worker->hentry.pprev)))
1801 return;
1802
1803
1804 worker->flags |= WORKER_IDLE;
1805 pool->nr_idle++;
1806 worker->last_active = jiffies;
1807
1808
1809 list_add(&worker->entry, &pool->idle_list);
1810
1811 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1812 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1813
1814
1815
1816
1817
1818
1819
1820 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1821 pool->nr_workers == pool->nr_idle &&
1822 atomic_read(&pool->nr_running));
1823}
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834static void worker_leave_idle(struct worker *worker)
1835{
1836 struct worker_pool *pool = worker->pool;
1837
1838 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1839 return;
1840 worker_clr_flags(worker, WORKER_IDLE);
1841 pool->nr_idle--;
1842 list_del_init(&worker->entry);
1843}
1844
1845static struct worker *alloc_worker(int node)
1846{
1847 struct worker *worker;
1848
1849 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1850 if (worker) {
1851 INIT_LIST_HEAD(&worker->entry);
1852 INIT_LIST_HEAD(&worker->scheduled);
1853 INIT_LIST_HEAD(&worker->node);
1854
1855 worker->flags = WORKER_PREP;
1856 }
1857 return worker;
1858}
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869static void worker_attach_to_pool(struct worker *worker,
1870 struct worker_pool *pool)
1871{
1872 mutex_lock(&wq_pool_attach_mutex);
1873
1874
1875
1876
1877
1878
1879 if (pool->flags & POOL_DISASSOCIATED)
1880 worker->flags |= WORKER_UNBOUND;
1881 else
1882 kthread_set_per_cpu(worker->task, pool->cpu);
1883
1884 if (worker->rescue_wq)
1885 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1886
1887 list_add_tail(&worker->node, &pool->workers);
1888 worker->pool = pool;
1889
1890 mutex_unlock(&wq_pool_attach_mutex);
1891}
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901static void worker_detach_from_pool(struct worker *worker)
1902{
1903 struct worker_pool *pool = worker->pool;
1904 struct completion *detach_completion = NULL;
1905
1906 mutex_lock(&wq_pool_attach_mutex);
1907
1908 kthread_set_per_cpu(worker->task, -1);
1909 list_del(&worker->node);
1910 worker->pool = NULL;
1911
1912 if (list_empty(&pool->workers))
1913 detach_completion = pool->detach_completion;
1914 mutex_unlock(&wq_pool_attach_mutex);
1915
1916
1917 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1918
1919 if (detach_completion)
1920 complete(detach_completion);
1921}
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935static struct worker *create_worker(struct worker_pool *pool)
1936{
1937 struct worker *worker;
1938 int id;
1939 char id_buf[16];
1940
1941
1942 id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
1943 if (id < 0)
1944 return NULL;
1945
1946 worker = alloc_worker(pool->node);
1947 if (!worker)
1948 goto fail;
1949
1950 worker->id = id;
1951
1952 if (pool->cpu >= 0)
1953 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1954 pool->attrs->nice < 0 ? "H" : "");
1955 else
1956 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1957
1958 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1959 "kworker/%s", id_buf);
1960 if (IS_ERR(worker->task))
1961 goto fail;
1962
1963 set_user_nice(worker->task, pool->attrs->nice);
1964 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1965
1966
1967 worker_attach_to_pool(worker, pool);
1968
1969
1970 raw_spin_lock_irq(&pool->lock);
1971 worker->pool->nr_workers++;
1972 worker_enter_idle(worker);
1973 wake_up_process(worker->task);
1974 raw_spin_unlock_irq(&pool->lock);
1975
1976 return worker;
1977
1978fail:
1979 ida_free(&pool->worker_ida, id);
1980 kfree(worker);
1981 return NULL;
1982}
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994static void destroy_worker(struct worker *worker)
1995{
1996 struct worker_pool *pool = worker->pool;
1997
1998 lockdep_assert_held(&pool->lock);
1999
2000
2001 if (WARN_ON(worker->current_work) ||
2002 WARN_ON(!list_empty(&worker->scheduled)) ||
2003 WARN_ON(!(worker->flags & WORKER_IDLE)))
2004 return;
2005
2006 pool->nr_workers--;
2007 pool->nr_idle--;
2008
2009 list_del_init(&worker->entry);
2010 worker->flags |= WORKER_DIE;
2011 wake_up_process(worker->task);
2012}
2013
2014static void idle_worker_timeout(struct timer_list *t)
2015{
2016 struct worker_pool *pool = from_timer(pool, t, idle_timer);
2017
2018 raw_spin_lock_irq(&pool->lock);
2019
2020 while (too_many_workers(pool)) {
2021 struct worker *worker;
2022 unsigned long expires;
2023
2024
2025 worker = list_entry(pool->idle_list.prev, struct worker, entry);
2026 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2027
2028 if (time_before(jiffies, expires)) {
2029 mod_timer(&pool->idle_timer, expires);
2030 break;
2031 }
2032
2033 destroy_worker(worker);
2034 }
2035
2036 raw_spin_unlock_irq(&pool->lock);
2037}
2038
2039static void send_mayday(struct work_struct *work)
2040{
2041 struct pool_workqueue *pwq = get_work_pwq(work);
2042 struct workqueue_struct *wq = pwq->wq;
2043
2044 lockdep_assert_held(&wq_mayday_lock);
2045
2046 if (!wq->rescuer)
2047 return;
2048
2049
2050 if (list_empty(&pwq->mayday_node)) {
2051
2052
2053
2054
2055
2056 get_pwq(pwq);
2057 list_add_tail(&pwq->mayday_node, &wq->maydays);
2058 wake_up_process(wq->rescuer->task);
2059 }
2060}
2061
2062static void pool_mayday_timeout(struct timer_list *t)
2063{
2064 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
2065 struct work_struct *work;
2066
2067 raw_spin_lock_irq(&pool->lock);
2068 raw_spin_lock(&wq_mayday_lock);
2069
2070 if (need_to_create_worker(pool)) {
2071
2072
2073
2074
2075
2076
2077 list_for_each_entry(work, &pool->worklist, entry)
2078 send_mayday(work);
2079 }
2080
2081 raw_spin_unlock(&wq_mayday_lock);
2082 raw_spin_unlock_irq(&pool->lock);
2083
2084 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
2085}
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105static void maybe_create_worker(struct worker_pool *pool)
2106__releases(&pool->lock)
2107__acquires(&pool->lock)
2108{
2109restart:
2110 raw_spin_unlock_irq(&pool->lock);
2111
2112
2113 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
2114
2115 while (true) {
2116 if (create_worker(pool) || !need_to_create_worker(pool))
2117 break;
2118
2119 schedule_timeout_interruptible(CREATE_COOLDOWN);
2120
2121 if (!need_to_create_worker(pool))
2122 break;
2123 }
2124
2125 del_timer_sync(&pool->mayday_timer);
2126 raw_spin_lock_irq(&pool->lock);
2127
2128
2129
2130
2131
2132 if (need_to_create_worker(pool))
2133 goto restart;
2134}
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158static bool manage_workers(struct worker *worker)
2159{
2160 struct worker_pool *pool = worker->pool;
2161
2162 if (pool->flags & POOL_MANAGER_ACTIVE)
2163 return false;
2164
2165 pool->flags |= POOL_MANAGER_ACTIVE;
2166 pool->manager = worker;
2167
2168 maybe_create_worker(pool);
2169
2170 pool->manager = NULL;
2171 pool->flags &= ~POOL_MANAGER_ACTIVE;
2172 rcuwait_wake_up(&manager_wait);
2173 return true;
2174}
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190static void process_one_work(struct worker *worker, struct work_struct *work)
2191__releases(&pool->lock)
2192__acquires(&pool->lock)
2193{
2194 struct pool_workqueue *pwq = get_work_pwq(work);
2195 struct worker_pool *pool = worker->pool;
2196 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2197 unsigned long work_data;
2198 struct worker *collision;
2199#ifdef CONFIG_LOCKDEP
2200
2201
2202
2203
2204
2205
2206
2207 struct lockdep_map lockdep_map;
2208
2209 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2210#endif
2211
2212 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2213 raw_smp_processor_id() != pool->cpu);
2214
2215
2216
2217
2218
2219
2220
2221 collision = find_worker_executing_work(pool, work);
2222 if (unlikely(collision)) {
2223 move_linked_works(work, &collision->scheduled, NULL);
2224 return;
2225 }
2226
2227
2228 debug_work_deactivate(work);
2229 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2230 worker->current_work = work;
2231 worker->current_func = work->func;
2232 worker->current_pwq = pwq;
2233 work_data = *work_data_bits(work);
2234 worker->current_color = get_work_color(work_data);
2235
2236
2237
2238
2239
2240 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2241
2242 list_del_init(&work->entry);
2243
2244
2245
2246
2247
2248
2249
2250 if (unlikely(cpu_intensive))
2251 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2252
2253
2254
2255
2256
2257
2258
2259
2260 if (need_more_worker(pool))
2261 wake_up_worker(pool);
2262
2263
2264
2265
2266
2267
2268
2269 set_work_pool_and_clear_pending(work, pool->id);
2270
2271 raw_spin_unlock_irq(&pool->lock);
2272
2273 lock_map_acquire(&pwq->wq->lockdep_map);
2274 lock_map_acquire(&lockdep_map);
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296 lockdep_invariant_state(true);
2297 trace_workqueue_execute_start(work);
2298 worker->current_func(work);
2299
2300
2301
2302
2303 trace_workqueue_execute_end(work, worker->current_func);
2304 lock_map_release(&lockdep_map);
2305 lock_map_release(&pwq->wq->lockdep_map);
2306
2307 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2308 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2309 " last function: %ps\n",
2310 current->comm, preempt_count(), task_pid_nr(current),
2311 worker->current_func);
2312 debug_show_held_locks(current);
2313 dump_stack();
2314 }
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324 cond_resched();
2325
2326 raw_spin_lock_irq(&pool->lock);
2327
2328
2329 if (unlikely(cpu_intensive))
2330 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2331
2332
2333 worker->last_func = worker->current_func;
2334
2335
2336 hash_del(&worker->hentry);
2337 worker->current_work = NULL;
2338 worker->current_func = NULL;
2339 worker->current_pwq = NULL;
2340 worker->current_color = INT_MAX;
2341 pwq_dec_nr_in_flight(pwq, work_data);
2342}
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356static void process_scheduled_works(struct worker *worker)
2357{
2358 while (!list_empty(&worker->scheduled)) {
2359 struct work_struct *work = list_first_entry(&worker->scheduled,
2360 struct work_struct, entry);
2361 process_one_work(worker, work);
2362 }
2363}
2364
2365static void set_pf_worker(bool val)
2366{
2367 mutex_lock(&wq_pool_attach_mutex);
2368 if (val)
2369 current->flags |= PF_WQ_WORKER;
2370 else
2371 current->flags &= ~PF_WQ_WORKER;
2372 mutex_unlock(&wq_pool_attach_mutex);
2373}
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387static int worker_thread(void *__worker)
2388{
2389 struct worker *worker = __worker;
2390 struct worker_pool *pool = worker->pool;
2391
2392
2393 set_pf_worker(true);
2394woke_up:
2395 raw_spin_lock_irq(&pool->lock);
2396
2397
2398 if (unlikely(worker->flags & WORKER_DIE)) {
2399 raw_spin_unlock_irq(&pool->lock);
2400 WARN_ON_ONCE(!list_empty(&worker->entry));
2401 set_pf_worker(false);
2402
2403 set_task_comm(worker->task, "kworker/dying");
2404 ida_free(&pool->worker_ida, worker->id);
2405 worker_detach_from_pool(worker);
2406 kfree(worker);
2407 return 0;
2408 }
2409
2410 worker_leave_idle(worker);
2411recheck:
2412
2413 if (!need_more_worker(pool))
2414 goto sleep;
2415
2416
2417 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2418 goto recheck;
2419
2420
2421
2422
2423
2424
2425 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2426
2427
2428
2429
2430
2431
2432
2433
2434 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2435
2436 do {
2437 struct work_struct *work =
2438 list_first_entry(&pool->worklist,
2439 struct work_struct, entry);
2440
2441 pool->watchdog_ts = jiffies;
2442
2443 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2444
2445 process_one_work(worker, work);
2446 if (unlikely(!list_empty(&worker->scheduled)))
2447 process_scheduled_works(worker);
2448 } else {
2449 move_linked_works(work, &worker->scheduled, NULL);
2450 process_scheduled_works(worker);
2451 }
2452 } while (keep_working(pool));
2453
2454 worker_set_flags(worker, WORKER_PREP);
2455sleep:
2456
2457
2458
2459
2460
2461
2462
2463 worker_enter_idle(worker);
2464 __set_current_state(TASK_IDLE);
2465 raw_spin_unlock_irq(&pool->lock);
2466 schedule();
2467 goto woke_up;
2468}
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491static int rescuer_thread(void *__rescuer)
2492{
2493 struct worker *rescuer = __rescuer;
2494 struct workqueue_struct *wq = rescuer->rescue_wq;
2495 struct list_head *scheduled = &rescuer->scheduled;
2496 bool should_stop;
2497
2498 set_user_nice(current, RESCUER_NICE_LEVEL);
2499
2500
2501
2502
2503
2504 set_pf_worker(true);
2505repeat:
2506 set_current_state(TASK_IDLE);
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516 should_stop = kthread_should_stop();
2517
2518
2519 raw_spin_lock_irq(&wq_mayday_lock);
2520
2521 while (!list_empty(&wq->maydays)) {
2522 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2523 struct pool_workqueue, mayday_node);
2524 struct worker_pool *pool = pwq->pool;
2525 struct work_struct *work, *n;
2526 bool first = true;
2527
2528 __set_current_state(TASK_RUNNING);
2529 list_del_init(&pwq->mayday_node);
2530
2531 raw_spin_unlock_irq(&wq_mayday_lock);
2532
2533 worker_attach_to_pool(rescuer, pool);
2534
2535 raw_spin_lock_irq(&pool->lock);
2536
2537
2538
2539
2540
2541 WARN_ON_ONCE(!list_empty(scheduled));
2542 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2543 if (get_work_pwq(work) == pwq) {
2544 if (first)
2545 pool->watchdog_ts = jiffies;
2546 move_linked_works(work, scheduled, &n);
2547 }
2548 first = false;
2549 }
2550
2551 if (!list_empty(scheduled)) {
2552 process_scheduled_works(rescuer);
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563 if (pwq->nr_active && need_to_create_worker(pool)) {
2564 raw_spin_lock(&wq_mayday_lock);
2565
2566
2567
2568
2569 if (wq->rescuer && list_empty(&pwq->mayday_node)) {
2570 get_pwq(pwq);
2571 list_add_tail(&pwq->mayday_node, &wq->maydays);
2572 }
2573 raw_spin_unlock(&wq_mayday_lock);
2574 }
2575 }
2576
2577
2578
2579
2580
2581 put_pwq(pwq);
2582
2583
2584
2585
2586
2587
2588 if (need_more_worker(pool))
2589 wake_up_worker(pool);
2590
2591 raw_spin_unlock_irq(&pool->lock);
2592
2593 worker_detach_from_pool(rescuer);
2594
2595 raw_spin_lock_irq(&wq_mayday_lock);
2596 }
2597
2598 raw_spin_unlock_irq(&wq_mayday_lock);
2599
2600 if (should_stop) {
2601 __set_current_state(TASK_RUNNING);
2602 set_pf_worker(false);
2603 return 0;
2604 }
2605
2606
2607 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2608 schedule();
2609 goto repeat;
2610}
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623static void check_flush_dependency(struct workqueue_struct *target_wq,
2624 struct work_struct *target_work)
2625{
2626 work_func_t target_func = target_work ? target_work->func : NULL;
2627 struct worker *worker;
2628
2629 if (target_wq->flags & WQ_MEM_RECLAIM)
2630 return;
2631
2632 worker = current_wq_worker();
2633
2634 WARN_ONCE(current->flags & PF_MEMALLOC,
2635 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2636 current->pid, current->comm, target_wq->name, target_func);
2637 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2638 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2639 "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2640 worker->current_pwq->wq->name, worker->current_func,
2641 target_wq->name, target_func);
2642}
2643
2644struct wq_barrier {
2645 struct work_struct work;
2646 struct completion done;
2647 struct task_struct *task;
2648};
2649
2650static void wq_barrier_func(struct work_struct *work)
2651{
2652 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2653 complete(&barr->done);
2654}
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680static void insert_wq_barrier(struct pool_workqueue *pwq,
2681 struct wq_barrier *barr,
2682 struct work_struct *target, struct worker *worker)
2683{
2684 unsigned int work_flags = 0;
2685 unsigned int work_color;
2686 struct list_head *head;
2687
2688
2689
2690
2691
2692
2693
2694 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2695 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2696
2697 init_completion_map(&barr->done, &target->lockdep_map);
2698
2699 barr->task = current;
2700
2701
2702 work_flags |= WORK_STRUCT_INACTIVE;
2703
2704
2705
2706
2707
2708 if (worker) {
2709 head = worker->scheduled.next;
2710 work_color = worker->current_color;
2711 } else {
2712 unsigned long *bits = work_data_bits(target);
2713
2714 head = target->entry.next;
2715
2716 work_flags |= *bits & WORK_STRUCT_LINKED;
2717 work_color = get_work_color(*bits);
2718 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2719 }
2720
2721 pwq->nr_in_flight[work_color]++;
2722 work_flags |= work_color_to_flags(work_color);
2723
2724 debug_work_activate(&barr->work);
2725 insert_work(pwq, &barr->work, head, work_flags);
2726}
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2760 int flush_color, int work_color)
2761{
2762 bool wait = false;
2763 struct pool_workqueue *pwq;
2764
2765 if (flush_color >= 0) {
2766 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2767 atomic_set(&wq->nr_pwqs_to_flush, 1);
2768 }
2769
2770 for_each_pwq(pwq, wq) {
2771 struct worker_pool *pool = pwq->pool;
2772
2773 raw_spin_lock_irq(&pool->lock);
2774
2775 if (flush_color >= 0) {
2776 WARN_ON_ONCE(pwq->flush_color != -1);
2777
2778 if (pwq->nr_in_flight[flush_color]) {
2779 pwq->flush_color = flush_color;
2780 atomic_inc(&wq->nr_pwqs_to_flush);
2781 wait = true;
2782 }
2783 }
2784
2785 if (work_color >= 0) {
2786 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2787 pwq->work_color = work_color;
2788 }
2789
2790 raw_spin_unlock_irq(&pool->lock);
2791 }
2792
2793 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2794 complete(&wq->first_flusher->done);
2795
2796 return wait;
2797}
2798
2799
2800
2801
2802
2803
2804
2805
2806void flush_workqueue(struct workqueue_struct *wq)
2807{
2808 struct wq_flusher this_flusher = {
2809 .list = LIST_HEAD_INIT(this_flusher.list),
2810 .flush_color = -1,
2811 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
2812 };
2813 int next_color;
2814
2815 if (WARN_ON(!wq_online))
2816 return;
2817
2818 lock_map_acquire(&wq->lockdep_map);
2819 lock_map_release(&wq->lockdep_map);
2820
2821 mutex_lock(&wq->mutex);
2822
2823
2824
2825
2826 next_color = work_next_color(wq->work_color);
2827
2828 if (next_color != wq->flush_color) {
2829
2830
2831
2832
2833
2834 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2835 this_flusher.flush_color = wq->work_color;
2836 wq->work_color = next_color;
2837
2838 if (!wq->first_flusher) {
2839
2840 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2841
2842 wq->first_flusher = &this_flusher;
2843
2844 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2845 wq->work_color)) {
2846
2847 wq->flush_color = next_color;
2848 wq->first_flusher = NULL;
2849 goto out_unlock;
2850 }
2851 } else {
2852
2853 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2854 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2855 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2856 }
2857 } else {
2858
2859
2860
2861
2862
2863 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2864 }
2865
2866 check_flush_dependency(wq, NULL);
2867
2868 mutex_unlock(&wq->mutex);
2869
2870 wait_for_completion(&this_flusher.done);
2871
2872
2873
2874
2875
2876
2877
2878 if (READ_ONCE(wq->first_flusher) != &this_flusher)
2879 return;
2880
2881 mutex_lock(&wq->mutex);
2882
2883
2884 if (wq->first_flusher != &this_flusher)
2885 goto out_unlock;
2886
2887 WRITE_ONCE(wq->first_flusher, NULL);
2888
2889 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2890 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2891
2892 while (true) {
2893 struct wq_flusher *next, *tmp;
2894
2895
2896 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2897 if (next->flush_color != wq->flush_color)
2898 break;
2899 list_del_init(&next->list);
2900 complete(&next->done);
2901 }
2902
2903 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2904 wq->flush_color != work_next_color(wq->work_color));
2905
2906
2907 wq->flush_color = work_next_color(wq->flush_color);
2908
2909
2910 if (!list_empty(&wq->flusher_overflow)) {
2911
2912
2913
2914
2915
2916
2917 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2918 tmp->flush_color = wq->work_color;
2919
2920 wq->work_color = work_next_color(wq->work_color);
2921
2922 list_splice_tail_init(&wq->flusher_overflow,
2923 &wq->flusher_queue);
2924 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2925 }
2926
2927 if (list_empty(&wq->flusher_queue)) {
2928 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2929 break;
2930 }
2931
2932
2933
2934
2935
2936 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2937 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2938
2939 list_del_init(&next->list);
2940 wq->first_flusher = next;
2941
2942 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2943 break;
2944
2945
2946
2947
2948
2949 wq->first_flusher = NULL;
2950 }
2951
2952out_unlock:
2953 mutex_unlock(&wq->mutex);
2954}
2955EXPORT_SYMBOL(flush_workqueue);
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968void drain_workqueue(struct workqueue_struct *wq)
2969{
2970 unsigned int flush_cnt = 0;
2971 struct pool_workqueue *pwq;
2972
2973
2974
2975
2976
2977
2978 mutex_lock(&wq->mutex);
2979 if (!wq->nr_drainers++)
2980 wq->flags |= __WQ_DRAINING;
2981 mutex_unlock(&wq->mutex);
2982reflush:
2983 flush_workqueue(wq);
2984
2985 mutex_lock(&wq->mutex);
2986
2987 for_each_pwq(pwq, wq) {
2988 bool drained;
2989
2990 raw_spin_lock_irq(&pwq->pool->lock);
2991 drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
2992 raw_spin_unlock_irq(&pwq->pool->lock);
2993
2994 if (drained)
2995 continue;
2996
2997 if (++flush_cnt == 10 ||
2998 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2999 pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
3000 wq->name, __func__, flush_cnt);
3001
3002 mutex_unlock(&wq->mutex);
3003 goto reflush;
3004 }
3005
3006 if (!--wq->nr_drainers)
3007 wq->flags &= ~__WQ_DRAINING;
3008 mutex_unlock(&wq->mutex);
3009}
3010EXPORT_SYMBOL_GPL(drain_workqueue);
3011
3012static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
3013 bool from_cancel)
3014{
3015 struct worker *worker = NULL;
3016 struct worker_pool *pool;
3017 struct pool_workqueue *pwq;
3018
3019 might_sleep();
3020
3021 rcu_read_lock();
3022 pool = get_work_pool(work);
3023 if (!pool) {
3024 rcu_read_unlock();
3025 return false;
3026 }
3027
3028 raw_spin_lock_irq(&pool->lock);
3029
3030 pwq = get_work_pwq(work);
3031 if (pwq) {
3032 if (unlikely(pwq->pool != pool))
3033 goto already_gone;
3034 } else {
3035 worker = find_worker_executing_work(pool, work);
3036 if (!worker)
3037 goto already_gone;
3038 pwq = worker->current_pwq;
3039 }
3040
3041 check_flush_dependency(pwq->wq, work);
3042
3043 insert_wq_barrier(pwq, barr, work, worker);
3044 raw_spin_unlock_irq(&pool->lock);
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055 if (!from_cancel &&
3056 (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
3057 lock_map_acquire(&pwq->wq->lockdep_map);
3058 lock_map_release(&pwq->wq->lockdep_map);
3059 }
3060 rcu_read_unlock();
3061 return true;
3062already_gone:
3063 raw_spin_unlock_irq(&pool->lock);
3064 rcu_read_unlock();
3065 return false;
3066}
3067
3068static bool __flush_work(struct work_struct *work, bool from_cancel)
3069{
3070 struct wq_barrier barr;
3071
3072 if (WARN_ON(!wq_online))
3073 return false;
3074
3075 if (WARN_ON(!work->func))
3076 return false;
3077
3078 if (!from_cancel) {
3079 lock_map_acquire(&work->lockdep_map);
3080 lock_map_release(&work->lockdep_map);
3081 }
3082
3083 if (start_flush_work(work, &barr, from_cancel)) {
3084 wait_for_completion(&barr.done);
3085 destroy_work_on_stack(&barr.work);
3086 return true;
3087 } else {
3088 return false;
3089 }
3090}
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103bool flush_work(struct work_struct *work)
3104{
3105 return __flush_work(work, false);
3106}
3107EXPORT_SYMBOL_GPL(flush_work);
3108
3109struct cwt_wait {
3110 wait_queue_entry_t wait;
3111 struct work_struct *work;
3112};
3113
3114static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
3115{
3116 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3117
3118 if (cwait->work != key)
3119 return 0;
3120 return autoremove_wake_function(wait, mode, sync, key);
3121}
3122
3123static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
3124{
3125 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
3126 unsigned long flags;
3127 int ret;
3128
3129 do {
3130 ret = try_to_grab_pending(work, is_dwork, &flags);
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147 if (unlikely(ret == -ENOENT)) {
3148 struct cwt_wait cwait;
3149
3150 init_wait(&cwait.wait);
3151 cwait.wait.func = cwt_wakefn;
3152 cwait.work = work;
3153
3154 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3155 TASK_UNINTERRUPTIBLE);
3156 if (work_is_canceling(work))
3157 schedule();
3158 finish_wait(&cancel_waitq, &cwait.wait);
3159 }
3160 } while (unlikely(ret < 0));
3161
3162
3163 mark_work_canceling(work);
3164 local_irq_restore(flags);
3165
3166
3167
3168
3169
3170 if (wq_online)
3171 __flush_work(work, true);
3172
3173 clear_work_data(work);
3174
3175
3176
3177
3178
3179
3180 smp_mb();
3181 if (waitqueue_active(&cancel_waitq))
3182 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3183
3184 return ret;
3185}
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205bool cancel_work_sync(struct work_struct *work)
3206{
3207 return __cancel_work_timer(work, false);
3208}
3209EXPORT_SYMBOL_GPL(cancel_work_sync);
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223bool flush_delayed_work(struct delayed_work *dwork)
3224{
3225 local_irq_disable();
3226 if (del_timer_sync(&dwork->timer))
3227 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3228 local_irq_enable();
3229 return flush_work(&dwork->work);
3230}
3231EXPORT_SYMBOL(flush_delayed_work);
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241bool flush_rcu_work(struct rcu_work *rwork)
3242{
3243 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3244 rcu_barrier();
3245 flush_work(&rwork->work);
3246 return true;
3247 } else {
3248 return flush_work(&rwork->work);
3249 }
3250}
3251EXPORT_SYMBOL(flush_rcu_work);
3252
3253static bool __cancel_work(struct work_struct *work, bool is_dwork)
3254{
3255 unsigned long flags;
3256 int ret;
3257
3258 do {
3259 ret = try_to_grab_pending(work, is_dwork, &flags);
3260 } while (unlikely(ret == -EAGAIN));
3261
3262 if (unlikely(ret < 0))
3263 return false;
3264
3265 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3266 local_irq_restore(flags);
3267 return ret;
3268}
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286bool cancel_delayed_work(struct delayed_work *dwork)
3287{
3288 return __cancel_work(&dwork->work, true);
3289}
3290EXPORT_SYMBOL(cancel_delayed_work);
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301bool cancel_delayed_work_sync(struct delayed_work *dwork)
3302{
3303 return __cancel_work_timer(&dwork->work, true);
3304}
3305EXPORT_SYMBOL(cancel_delayed_work_sync);
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318int schedule_on_each_cpu(work_func_t func)
3319{
3320 int cpu;
3321 struct work_struct __percpu *works;
3322
3323 works = alloc_percpu(struct work_struct);
3324 if (!works)
3325 return -ENOMEM;
3326
3327 cpus_read_lock();
3328
3329 for_each_online_cpu(cpu) {
3330 struct work_struct *work = per_cpu_ptr(works, cpu);
3331
3332 INIT_WORK(work, func);
3333 schedule_work_on(cpu, work);
3334 }
3335
3336 for_each_online_cpu(cpu)
3337 flush_work(per_cpu_ptr(works, cpu));
3338
3339 cpus_read_unlock();
3340 free_percpu(works);
3341 return 0;
3342}
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3357{
3358 if (!in_interrupt()) {
3359 fn(&ew->work);
3360 return 0;
3361 }
3362
3363 INIT_WORK(&ew->work, fn);
3364 schedule_work(&ew->work);
3365
3366 return 1;
3367}
3368EXPORT_SYMBOL_GPL(execute_in_process_context);
3369
3370
3371
3372
3373
3374
3375
3376void free_workqueue_attrs(struct workqueue_attrs *attrs)
3377{
3378 if (attrs) {
3379 free_cpumask_var(attrs->cpumask);
3380 kfree(attrs);
3381 }
3382}
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392struct workqueue_attrs *alloc_workqueue_attrs(void)
3393{
3394 struct workqueue_attrs *attrs;
3395
3396 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
3397 if (!attrs)
3398 goto fail;
3399 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
3400 goto fail;
3401
3402 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3403 return attrs;
3404fail:
3405 free_workqueue_attrs(attrs);
3406 return NULL;
3407}
3408
3409static void copy_workqueue_attrs(struct workqueue_attrs *to,
3410 const struct workqueue_attrs *from)
3411{
3412 to->nice = from->nice;
3413 cpumask_copy(to->cpumask, from->cpumask);
3414
3415
3416
3417
3418
3419 to->no_numa = from->no_numa;
3420}
3421
3422
3423static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3424{
3425 u32 hash = 0;
3426
3427 hash = jhash_1word(attrs->nice, hash);
3428 hash = jhash(cpumask_bits(attrs->cpumask),
3429 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3430 return hash;
3431}
3432
3433
3434static bool wqattrs_equal(const struct workqueue_attrs *a,
3435 const struct workqueue_attrs *b)
3436{
3437 if (a->nice != b->nice)
3438 return false;
3439 if (!cpumask_equal(a->cpumask, b->cpumask))
3440 return false;
3441 return true;
3442}
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454static int init_worker_pool(struct worker_pool *pool)
3455{
3456 raw_spin_lock_init(&pool->lock);
3457 pool->id = -1;
3458 pool->cpu = -1;
3459 pool->node = NUMA_NO_NODE;
3460 pool->flags |= POOL_DISASSOCIATED;
3461 pool->watchdog_ts = jiffies;
3462 INIT_LIST_HEAD(&pool->worklist);
3463 INIT_LIST_HEAD(&pool->idle_list);
3464 hash_init(pool->busy_hash);
3465
3466 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3467
3468 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3469
3470 INIT_LIST_HEAD(&pool->workers);
3471
3472 ida_init(&pool->worker_ida);
3473 INIT_HLIST_NODE(&pool->hash_node);
3474 pool->refcnt = 1;
3475
3476
3477 pool->attrs = alloc_workqueue_attrs();
3478 if (!pool->attrs)
3479 return -ENOMEM;
3480 return 0;
3481}
3482
3483#ifdef CONFIG_LOCKDEP
3484static void wq_init_lockdep(struct workqueue_struct *wq)
3485{
3486 char *lock_name;
3487
3488 lockdep_register_key(&wq->key);
3489 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3490 if (!lock_name)
3491 lock_name = wq->name;
3492
3493 wq->lock_name = lock_name;
3494 lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3495}
3496
3497static void wq_unregister_lockdep(struct workqueue_struct *wq)
3498{
3499 lockdep_unregister_key(&wq->key);
3500}
3501
3502static void wq_free_lockdep(struct workqueue_struct *wq)
3503{
3504 if (wq->lock_name != wq->name)
3505 kfree(wq->lock_name);
3506}
3507#else
3508static void wq_init_lockdep(struct workqueue_struct *wq)
3509{
3510}
3511
3512static void wq_unregister_lockdep(struct workqueue_struct *wq)
3513{
3514}
3515
3516static void wq_free_lockdep(struct workqueue_struct *wq)
3517{
3518}
3519#endif
3520
3521static void rcu_free_wq(struct rcu_head *rcu)
3522{
3523 struct workqueue_struct *wq =
3524 container_of(rcu, struct workqueue_struct, rcu);
3525
3526 wq_free_lockdep(wq);
3527
3528 if (!(wq->flags & WQ_UNBOUND))
3529 free_percpu(wq->cpu_pwqs);
3530 else
3531 free_workqueue_attrs(wq->unbound_attrs);
3532
3533 kfree(wq);
3534}
3535
3536static void rcu_free_pool(struct rcu_head *rcu)
3537{
3538 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3539
3540 ida_destroy(&pool->worker_ida);
3541 free_workqueue_attrs(pool->attrs);
3542 kfree(pool);
3543}
3544
3545
3546static bool wq_manager_inactive(struct worker_pool *pool)
3547{
3548 raw_spin_lock_irq(&pool->lock);
3549
3550 if (pool->flags & POOL_MANAGER_ACTIVE) {
3551 raw_spin_unlock_irq(&pool->lock);
3552 return false;
3553 }
3554 return true;
3555}
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568static void put_unbound_pool(struct worker_pool *pool)
3569{
3570 DECLARE_COMPLETION_ONSTACK(detach_completion);
3571 struct worker *worker;
3572
3573 lockdep_assert_held(&wq_pool_mutex);
3574
3575 if (--pool->refcnt)
3576 return;
3577
3578
3579 if (WARN_ON(!(pool->cpu < 0)) ||
3580 WARN_ON(!list_empty(&pool->worklist)))
3581 return;
3582
3583
3584 if (pool->id >= 0)
3585 idr_remove(&worker_pool_idr, pool->id);
3586 hash_del(&pool->hash_node);
3587
3588
3589
3590
3591
3592
3593
3594
3595 rcuwait_wait_event(&manager_wait, wq_manager_inactive(pool),
3596 TASK_UNINTERRUPTIBLE);
3597 pool->flags |= POOL_MANAGER_ACTIVE;
3598
3599 while ((worker = first_idle_worker(pool)))
3600 destroy_worker(worker);
3601 WARN_ON(pool->nr_workers || pool->nr_idle);
3602 raw_spin_unlock_irq(&pool->lock);
3603
3604 mutex_lock(&wq_pool_attach_mutex);
3605 if (!list_empty(&pool->workers))
3606 pool->detach_completion = &detach_completion;
3607 mutex_unlock(&wq_pool_attach_mutex);
3608
3609 if (pool->detach_completion)
3610 wait_for_completion(pool->detach_completion);
3611
3612
3613 del_timer_sync(&pool->idle_timer);
3614 del_timer_sync(&pool->mayday_timer);
3615
3616
3617 call_rcu(&pool->rcu, rcu_free_pool);
3618}
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3635{
3636 u32 hash = wqattrs_hash(attrs);
3637 struct worker_pool *pool;
3638 int node;
3639 int target_node = NUMA_NO_NODE;
3640
3641 lockdep_assert_held(&wq_pool_mutex);
3642
3643
3644 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3645 if (wqattrs_equal(pool->attrs, attrs)) {
3646 pool->refcnt++;
3647 return pool;
3648 }
3649 }
3650
3651
3652 if (wq_numa_enabled) {
3653 for_each_node(node) {
3654 if (cpumask_subset(attrs->cpumask,
3655 wq_numa_possible_cpumask[node])) {
3656 target_node = node;
3657 break;
3658 }
3659 }
3660 }
3661
3662
3663 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3664 if (!pool || init_worker_pool(pool) < 0)
3665 goto fail;
3666
3667 lockdep_set_subclass(&pool->lock, 1);
3668 copy_workqueue_attrs(pool->attrs, attrs);
3669 pool->node = target_node;
3670
3671
3672
3673
3674
3675 pool->attrs->no_numa = false;
3676
3677 if (worker_pool_assign_id(pool) < 0)
3678 goto fail;
3679
3680
3681 if (wq_online && !create_worker(pool))
3682 goto fail;
3683
3684
3685 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3686
3687 return pool;
3688fail:
3689 if (pool)
3690 put_unbound_pool(pool);
3691 return NULL;
3692}
3693
3694static void rcu_free_pwq(struct rcu_head *rcu)
3695{
3696 kmem_cache_free(pwq_cache,
3697 container_of(rcu, struct pool_workqueue, rcu));
3698}
3699
3700
3701
3702
3703
3704static void pwq_unbound_release_workfn(struct work_struct *work)
3705{
3706 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3707 unbound_release_work);
3708 struct workqueue_struct *wq = pwq->wq;
3709 struct worker_pool *pool = pwq->pool;
3710 bool is_last = false;
3711
3712
3713
3714
3715
3716 if (!list_empty(&pwq->pwqs_node)) {
3717 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3718 return;
3719
3720 mutex_lock(&wq->mutex);
3721 list_del_rcu(&pwq->pwqs_node);
3722 is_last = list_empty(&wq->pwqs);
3723 mutex_unlock(&wq->mutex);
3724 }
3725
3726 mutex_lock(&wq_pool_mutex);
3727 put_unbound_pool(pool);
3728 mutex_unlock(&wq_pool_mutex);
3729
3730 call_rcu(&pwq->rcu, rcu_free_pwq);
3731
3732
3733
3734
3735
3736 if (is_last) {
3737 wq_unregister_lockdep(wq);
3738 call_rcu(&wq->rcu, rcu_free_wq);
3739 }
3740}
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3751{
3752 struct workqueue_struct *wq = pwq->wq;
3753 bool freezable = wq->flags & WQ_FREEZABLE;
3754 unsigned long flags;
3755
3756
3757 lockdep_assert_held(&wq->mutex);
3758
3759
3760 if (!freezable && pwq->max_active == wq->saved_max_active)
3761 return;
3762
3763
3764 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
3765
3766
3767
3768
3769
3770
3771 if (!freezable || !workqueue_freezing) {
3772 bool kick = false;
3773
3774 pwq->max_active = wq->saved_max_active;
3775
3776 while (!list_empty(&pwq->inactive_works) &&
3777 pwq->nr_active < pwq->max_active) {
3778 pwq_activate_first_inactive(pwq);
3779 kick = true;
3780 }
3781
3782
3783
3784
3785
3786
3787
3788 if (kick)
3789 wake_up_worker(pwq->pool);
3790 } else {
3791 pwq->max_active = 0;
3792 }
3793
3794 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
3795}
3796
3797
3798static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3799 struct worker_pool *pool)
3800{
3801 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3802
3803 memset(pwq, 0, sizeof(*pwq));
3804
3805 pwq->pool = pool;
3806 pwq->wq = wq;
3807 pwq->flush_color = -1;
3808 pwq->refcnt = 1;
3809 INIT_LIST_HEAD(&pwq->inactive_works);
3810 INIT_LIST_HEAD(&pwq->pwqs_node);
3811 INIT_LIST_HEAD(&pwq->mayday_node);
3812 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3813}
3814
3815
3816static void link_pwq(struct pool_workqueue *pwq)
3817{
3818 struct workqueue_struct *wq = pwq->wq;
3819
3820 lockdep_assert_held(&wq->mutex);
3821
3822
3823 if (!list_empty(&pwq->pwqs_node))
3824 return;
3825
3826
3827 pwq->work_color = wq->work_color;
3828
3829
3830 pwq_adjust_max_active(pwq);
3831
3832
3833 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3834}
3835
3836
3837static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3838 const struct workqueue_attrs *attrs)
3839{
3840 struct worker_pool *pool;
3841 struct pool_workqueue *pwq;
3842
3843 lockdep_assert_held(&wq_pool_mutex);
3844
3845 pool = get_unbound_pool(attrs);
3846 if (!pool)
3847 return NULL;
3848
3849 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3850 if (!pwq) {
3851 put_unbound_pool(pool);
3852 return NULL;
3853 }
3854
3855 init_pwq(pwq, wq, pool);
3856 return pwq;
3857}
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3882 int cpu_going_down, cpumask_t *cpumask)
3883{
3884 if (!wq_numa_enabled || attrs->no_numa)
3885 goto use_dfl;
3886
3887
3888 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3889 if (cpu_going_down >= 0)
3890 cpumask_clear_cpu(cpu_going_down, cpumask);
3891
3892 if (cpumask_empty(cpumask))
3893 goto use_dfl;
3894
3895
3896 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3897
3898 if (cpumask_empty(cpumask)) {
3899 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3900 "possible intersect\n");
3901 return false;
3902 }
3903
3904 return !cpumask_equal(cpumask, attrs->cpumask);
3905
3906use_dfl:
3907 cpumask_copy(cpumask, attrs->cpumask);
3908 return false;
3909}
3910
3911
3912static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3913 int node,
3914 struct pool_workqueue *pwq)
3915{
3916 struct pool_workqueue *old_pwq;
3917
3918 lockdep_assert_held(&wq_pool_mutex);
3919 lockdep_assert_held(&wq->mutex);
3920
3921
3922 link_pwq(pwq);
3923
3924 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3925 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3926 return old_pwq;
3927}
3928
3929
3930struct apply_wqattrs_ctx {
3931 struct workqueue_struct *wq;
3932 struct workqueue_attrs *attrs;
3933 struct list_head list;
3934 struct pool_workqueue *dfl_pwq;
3935 struct pool_workqueue *pwq_tbl[];
3936};
3937
3938
3939static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3940{
3941 if (ctx) {
3942 int node;
3943
3944 for_each_node(node)
3945 put_pwq_unlocked(ctx->pwq_tbl[node]);
3946 put_pwq_unlocked(ctx->dfl_pwq);
3947
3948 free_workqueue_attrs(ctx->attrs);
3949
3950 kfree(ctx);
3951 }
3952}
3953
3954
3955static struct apply_wqattrs_ctx *
3956apply_wqattrs_prepare(struct workqueue_struct *wq,
3957 const struct workqueue_attrs *attrs)
3958{
3959 struct apply_wqattrs_ctx *ctx;
3960 struct workqueue_attrs *new_attrs, *tmp_attrs;
3961 int node;
3962
3963 lockdep_assert_held(&wq_pool_mutex);
3964
3965 ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
3966
3967 new_attrs = alloc_workqueue_attrs();
3968 tmp_attrs = alloc_workqueue_attrs();
3969 if (!ctx || !new_attrs || !tmp_attrs)
3970 goto out_free;
3971
3972
3973
3974
3975
3976
3977 copy_workqueue_attrs(new_attrs, attrs);
3978 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3979 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3980 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3981
3982
3983
3984
3985
3986
3987 copy_workqueue_attrs(tmp_attrs, new_attrs);
3988
3989
3990
3991
3992
3993
3994 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3995 if (!ctx->dfl_pwq)
3996 goto out_free;
3997
3998 for_each_node(node) {
3999 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
4000 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
4001 if (!ctx->pwq_tbl[node])
4002 goto out_free;
4003 } else {
4004 ctx->dfl_pwq->refcnt++;
4005 ctx->pwq_tbl[node] = ctx->dfl_pwq;
4006 }
4007 }
4008
4009
4010 copy_workqueue_attrs(new_attrs, attrs);
4011 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
4012 ctx->attrs = new_attrs;
4013
4014 ctx->wq = wq;
4015 free_workqueue_attrs(tmp_attrs);
4016 return ctx;
4017
4018out_free:
4019 free_workqueue_attrs(tmp_attrs);
4020 free_workqueue_attrs(new_attrs);
4021 apply_wqattrs_cleanup(ctx);
4022 return NULL;
4023}
4024
4025
4026static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
4027{
4028 int node;
4029
4030
4031 mutex_lock(&ctx->wq->mutex);
4032
4033 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
4034
4035
4036 for_each_node(node)
4037 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
4038 ctx->pwq_tbl[node]);
4039
4040
4041 link_pwq(ctx->dfl_pwq);
4042 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
4043
4044 mutex_unlock(&ctx->wq->mutex);
4045}
4046
4047static void apply_wqattrs_lock(void)
4048{
4049
4050 cpus_read_lock();
4051 mutex_lock(&wq_pool_mutex);
4052}
4053
4054static void apply_wqattrs_unlock(void)
4055{
4056 mutex_unlock(&wq_pool_mutex);
4057 cpus_read_unlock();
4058}
4059
4060static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4061 const struct workqueue_attrs *attrs)
4062{
4063 struct apply_wqattrs_ctx *ctx;
4064
4065
4066 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4067 return -EINVAL;
4068
4069
4070 if (!list_empty(&wq->pwqs)) {
4071 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4072 return -EINVAL;
4073
4074 wq->flags &= ~__WQ_ORDERED;
4075 }
4076
4077 ctx = apply_wqattrs_prepare(wq, attrs);
4078 if (!ctx)
4079 return -ENOMEM;
4080
4081
4082 apply_wqattrs_commit(ctx);
4083 apply_wqattrs_cleanup(ctx);
4084
4085 return 0;
4086}
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106int apply_workqueue_attrs(struct workqueue_struct *wq,
4107 const struct workqueue_attrs *attrs)
4108{
4109 int ret;
4110
4111 lockdep_assert_cpus_held();
4112
4113 mutex_lock(&wq_pool_mutex);
4114 ret = apply_workqueue_attrs_locked(wq, attrs);
4115 mutex_unlock(&wq_pool_mutex);
4116
4117 return ret;
4118}
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4143 bool online)
4144{
4145 int node = cpu_to_node(cpu);
4146 int cpu_off = online ? -1 : cpu;
4147 struct pool_workqueue *old_pwq = NULL, *pwq;
4148 struct workqueue_attrs *target_attrs;
4149 cpumask_t *cpumask;
4150
4151 lockdep_assert_held(&wq_pool_mutex);
4152
4153 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4154 wq->unbound_attrs->no_numa)
4155 return;
4156
4157
4158
4159
4160
4161
4162 target_attrs = wq_update_unbound_numa_attrs_buf;
4163 cpumask = target_attrs->cpumask;
4164
4165 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4166 pwq = unbound_pwq_by_node(wq, node);
4167
4168
4169
4170
4171
4172
4173
4174 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
4175 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4176 return;
4177 } else {
4178 goto use_dfl_pwq;
4179 }
4180
4181
4182 pwq = alloc_unbound_pwq(wq, target_attrs);
4183 if (!pwq) {
4184 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4185 wq->name);
4186 goto use_dfl_pwq;
4187 }
4188
4189
4190 mutex_lock(&wq->mutex);
4191 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4192 goto out_unlock;
4193
4194use_dfl_pwq:
4195 mutex_lock(&wq->mutex);
4196 raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
4197 get_pwq(wq->dfl_pwq);
4198 raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4199 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4200out_unlock:
4201 mutex_unlock(&wq->mutex);
4202 put_pwq_unlocked(old_pwq);
4203}
4204
4205static int alloc_and_link_pwqs(struct workqueue_struct *wq)
4206{
4207 bool highpri = wq->flags & WQ_HIGHPRI;
4208 int cpu, ret;
4209
4210 if (!(wq->flags & WQ_UNBOUND)) {
4211 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4212 if (!wq->cpu_pwqs)
4213 return -ENOMEM;
4214
4215 for_each_possible_cpu(cpu) {
4216 struct pool_workqueue *pwq =
4217 per_cpu_ptr(wq->cpu_pwqs, cpu);
4218 struct worker_pool *cpu_pools =
4219 per_cpu(cpu_worker_pools, cpu);
4220
4221 init_pwq(pwq, wq, &cpu_pools[highpri]);
4222
4223 mutex_lock(&wq->mutex);
4224 link_pwq(pwq);
4225 mutex_unlock(&wq->mutex);
4226 }
4227 return 0;
4228 }
4229
4230 cpus_read_lock();
4231 if (wq->flags & __WQ_ORDERED) {
4232 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4233
4234 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4235 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4236 "ordering guarantee broken for workqueue %s\n", wq->name);
4237 } else {
4238 ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4239 }
4240 cpus_read_unlock();
4241
4242 return ret;
4243}
4244
4245static int wq_clamp_max_active(int max_active, unsigned int flags,
4246 const char *name)
4247{
4248 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4249
4250 if (max_active < 1 || max_active > lim)
4251 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4252 max_active, name, 1, lim);
4253
4254 return clamp_val(max_active, 1, lim);
4255}
4256
4257
4258
4259
4260
4261static int init_rescuer(struct workqueue_struct *wq)
4262{
4263 struct worker *rescuer;
4264 int ret;
4265
4266 if (!(wq->flags & WQ_MEM_RECLAIM))
4267 return 0;
4268
4269 rescuer = alloc_worker(NUMA_NO_NODE);
4270 if (!rescuer)
4271 return -ENOMEM;
4272
4273 rescuer->rescue_wq = wq;
4274 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4275 if (IS_ERR(rescuer->task)) {
4276 ret = PTR_ERR(rescuer->task);
4277 kfree(rescuer);
4278 return ret;
4279 }
4280
4281 wq->rescuer = rescuer;
4282 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4283 wake_up_process(rescuer->task);
4284
4285 return 0;
4286}
4287
4288__printf(1, 4)
4289struct workqueue_struct *alloc_workqueue(const char *fmt,
4290 unsigned int flags,
4291 int max_active, ...)
4292{
4293 size_t tbl_size = 0;
4294 va_list args;
4295 struct workqueue_struct *wq;
4296 struct pool_workqueue *pwq;
4297
4298
4299
4300
4301
4302
4303
4304
4305 if ((flags & WQ_UNBOUND) && max_active == 1)
4306 flags |= __WQ_ORDERED;
4307
4308
4309 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4310 flags |= WQ_UNBOUND;
4311
4312
4313 if (flags & WQ_UNBOUND)
4314 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4315
4316 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4317 if (!wq)
4318 return NULL;
4319
4320 if (flags & WQ_UNBOUND) {
4321 wq->unbound_attrs = alloc_workqueue_attrs();
4322 if (!wq->unbound_attrs)
4323 goto err_free_wq;
4324 }
4325
4326 va_start(args, max_active);
4327 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4328 va_end(args);
4329
4330 max_active = max_active ?: WQ_DFL_ACTIVE;
4331 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4332
4333
4334 wq->flags = flags;
4335 wq->saved_max_active = max_active;
4336 mutex_init(&wq->mutex);
4337 atomic_set(&wq->nr_pwqs_to_flush, 0);
4338 INIT_LIST_HEAD(&wq->pwqs);
4339 INIT_LIST_HEAD(&wq->flusher_queue);
4340 INIT_LIST_HEAD(&wq->flusher_overflow);
4341 INIT_LIST_HEAD(&wq->maydays);
4342
4343 wq_init_lockdep(wq);
4344 INIT_LIST_HEAD(&wq->list);
4345
4346 if (alloc_and_link_pwqs(wq) < 0)
4347 goto err_unreg_lockdep;
4348
4349 if (wq_online && init_rescuer(wq) < 0)
4350 goto err_destroy;
4351
4352 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4353 goto err_destroy;
4354
4355
4356
4357
4358
4359
4360 mutex_lock(&wq_pool_mutex);
4361
4362 mutex_lock(&wq->mutex);
4363 for_each_pwq(pwq, wq)
4364 pwq_adjust_max_active(pwq);
4365 mutex_unlock(&wq->mutex);
4366
4367 list_add_tail_rcu(&wq->list, &workqueues);
4368
4369 mutex_unlock(&wq_pool_mutex);
4370
4371 return wq;
4372
4373err_unreg_lockdep:
4374 wq_unregister_lockdep(wq);
4375 wq_free_lockdep(wq);
4376err_free_wq:
4377 free_workqueue_attrs(wq->unbound_attrs);
4378 kfree(wq);
4379 return NULL;
4380err_destroy:
4381 destroy_workqueue(wq);
4382 return NULL;
4383}
4384EXPORT_SYMBOL_GPL(alloc_workqueue);
4385
4386static bool pwq_busy(struct pool_workqueue *pwq)
4387{
4388 int i;
4389
4390 for (i = 0; i < WORK_NR_COLORS; i++)
4391 if (pwq->nr_in_flight[i])
4392 return true;
4393
4394 if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4395 return true;
4396 if (pwq->nr_active || !list_empty(&pwq->inactive_works))
4397 return true;
4398
4399 return false;
4400}
4401
4402
4403
4404
4405
4406
4407
4408void destroy_workqueue(struct workqueue_struct *wq)
4409{
4410 struct pool_workqueue *pwq;
4411 int node;
4412
4413
4414
4415
4416
4417 workqueue_sysfs_unregister(wq);
4418
4419
4420 drain_workqueue(wq);
4421
4422
4423 if (wq->rescuer) {
4424 struct worker *rescuer = wq->rescuer;
4425
4426
4427 raw_spin_lock_irq(&wq_mayday_lock);
4428 wq->rescuer = NULL;
4429 raw_spin_unlock_irq(&wq_mayday_lock);
4430
4431
4432 kthread_stop(rescuer->task);
4433 kfree(rescuer);
4434 }
4435
4436
4437
4438
4439
4440 mutex_lock(&wq_pool_mutex);
4441 mutex_lock(&wq->mutex);
4442 for_each_pwq(pwq, wq) {
4443 raw_spin_lock_irq(&pwq->pool->lock);
4444 if (WARN_ON(pwq_busy(pwq))) {
4445 pr_warn("%s: %s has the following busy pwq\n",
4446 __func__, wq->name);
4447 show_pwq(pwq);
4448 raw_spin_unlock_irq(&pwq->pool->lock);
4449 mutex_unlock(&wq->mutex);
4450 mutex_unlock(&wq_pool_mutex);
4451 show_one_workqueue(wq);
4452 return;
4453 }
4454 raw_spin_unlock_irq(&pwq->pool->lock);
4455 }
4456 mutex_unlock(&wq->mutex);
4457
4458
4459
4460
4461
4462 list_del_rcu(&wq->list);
4463 mutex_unlock(&wq_pool_mutex);
4464
4465 if (!(wq->flags & WQ_UNBOUND)) {
4466 wq_unregister_lockdep(wq);
4467
4468
4469
4470
4471 call_rcu(&wq->rcu, rcu_free_wq);
4472 } else {
4473
4474
4475
4476
4477
4478 for_each_node(node) {
4479 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4480 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4481 put_pwq_unlocked(pwq);
4482 }
4483
4484
4485
4486
4487
4488 pwq = wq->dfl_pwq;
4489 wq->dfl_pwq = NULL;
4490 put_pwq_unlocked(pwq);
4491 }
4492}
4493EXPORT_SYMBOL_GPL(destroy_workqueue);
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4506{
4507 struct pool_workqueue *pwq;
4508
4509
4510 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4511 return;
4512
4513 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4514
4515 mutex_lock(&wq->mutex);
4516
4517 wq->flags &= ~__WQ_ORDERED;
4518 wq->saved_max_active = max_active;
4519
4520 for_each_pwq(pwq, wq)
4521 pwq_adjust_max_active(pwq);
4522
4523 mutex_unlock(&wq->mutex);
4524}
4525EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535struct work_struct *current_work(void)
4536{
4537 struct worker *worker = current_wq_worker();
4538
4539 return worker ? worker->current_work : NULL;
4540}
4541EXPORT_SYMBOL(current_work);
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551bool current_is_workqueue_rescuer(void)
4552{
4553 struct worker *worker = current_wq_worker();
4554
4555 return worker && worker->rescue_wq;
4556}
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4577{
4578 struct pool_workqueue *pwq;
4579 bool ret;
4580
4581 rcu_read_lock();
4582 preempt_disable();
4583
4584 if (cpu == WORK_CPU_UNBOUND)
4585 cpu = smp_processor_id();
4586
4587 if (!(wq->flags & WQ_UNBOUND))
4588 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4589 else
4590 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4591
4592 ret = !list_empty(&pwq->inactive_works);
4593 preempt_enable();
4594 rcu_read_unlock();
4595
4596 return ret;
4597}
4598EXPORT_SYMBOL_GPL(workqueue_congested);
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611unsigned int work_busy(struct work_struct *work)
4612{
4613 struct worker_pool *pool;
4614 unsigned long flags;
4615 unsigned int ret = 0;
4616
4617 if (work_pending(work))
4618 ret |= WORK_BUSY_PENDING;
4619
4620 rcu_read_lock();
4621 pool = get_work_pool(work);
4622 if (pool) {
4623 raw_spin_lock_irqsave(&pool->lock, flags);
4624 if (find_worker_executing_work(pool, work))
4625 ret |= WORK_BUSY_RUNNING;
4626 raw_spin_unlock_irqrestore(&pool->lock, flags);
4627 }
4628 rcu_read_unlock();
4629
4630 return ret;
4631}
4632EXPORT_SYMBOL_GPL(work_busy);
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644void set_worker_desc(const char *fmt, ...)
4645{
4646 struct worker *worker = current_wq_worker();
4647 va_list args;
4648
4649 if (worker) {
4650 va_start(args, fmt);
4651 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4652 va_end(args);
4653 }
4654}
4655EXPORT_SYMBOL_GPL(set_worker_desc);
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670void print_worker_info(const char *log_lvl, struct task_struct *task)
4671{
4672 work_func_t *fn = NULL;
4673 char name[WQ_NAME_LEN] = { };
4674 char desc[WORKER_DESC_LEN] = { };
4675 struct pool_workqueue *pwq = NULL;
4676 struct workqueue_struct *wq = NULL;
4677 struct worker *worker;
4678
4679 if (!(task->flags & PF_WQ_WORKER))
4680 return;
4681
4682
4683
4684
4685
4686 worker = kthread_probe_data(task);
4687
4688
4689
4690
4691
4692 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
4693 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
4694 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
4695 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
4696 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
4697
4698 if (fn || name[0] || desc[0]) {
4699 printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
4700 if (strcmp(name, desc))
4701 pr_cont(" (%s)", desc);
4702 pr_cont("\n");
4703 }
4704}
4705
4706static void pr_cont_pool_info(struct worker_pool *pool)
4707{
4708 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4709 if (pool->node != NUMA_NO_NODE)
4710 pr_cont(" node=%d", pool->node);
4711 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4712}
4713
4714static void pr_cont_work(bool comma, struct work_struct *work)
4715{
4716 if (work->func == wq_barrier_func) {
4717 struct wq_barrier *barr;
4718
4719 barr = container_of(work, struct wq_barrier, work);
4720
4721 pr_cont("%s BAR(%d)", comma ? "," : "",
4722 task_pid_nr(barr->task));
4723 } else {
4724 pr_cont("%s %ps", comma ? "," : "", work->func);
4725 }
4726}
4727
4728static void show_pwq(struct pool_workqueue *pwq)
4729{
4730 struct worker_pool *pool = pwq->pool;
4731 struct work_struct *work;
4732 struct worker *worker;
4733 bool has_in_flight = false, has_pending = false;
4734 int bkt;
4735
4736 pr_info(" pwq %d:", pool->id);
4737 pr_cont_pool_info(pool);
4738
4739 pr_cont(" active=%d/%d refcnt=%d%s\n",
4740 pwq->nr_active, pwq->max_active, pwq->refcnt,
4741 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4742
4743 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4744 if (worker->current_pwq == pwq) {
4745 has_in_flight = true;
4746 break;
4747 }
4748 }
4749 if (has_in_flight) {
4750 bool comma = false;
4751
4752 pr_info(" in-flight:");
4753 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4754 if (worker->current_pwq != pwq)
4755 continue;
4756
4757 pr_cont("%s %d%s:%ps", comma ? "," : "",
4758 task_pid_nr(worker->task),
4759 worker->rescue_wq ? "(RESCUER)" : "",
4760 worker->current_func);
4761 list_for_each_entry(work, &worker->scheduled, entry)
4762 pr_cont_work(false, work);
4763 comma = true;
4764 }
4765 pr_cont("\n");
4766 }
4767
4768 list_for_each_entry(work, &pool->worklist, entry) {
4769 if (get_work_pwq(work) == pwq) {
4770 has_pending = true;
4771 break;
4772 }
4773 }
4774 if (has_pending) {
4775 bool comma = false;
4776
4777 pr_info(" pending:");
4778 list_for_each_entry(work, &pool->worklist, entry) {
4779 if (get_work_pwq(work) != pwq)
4780 continue;
4781
4782 pr_cont_work(comma, work);
4783 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4784 }
4785 pr_cont("\n");
4786 }
4787
4788 if (!list_empty(&pwq->inactive_works)) {
4789 bool comma = false;
4790
4791 pr_info(" inactive:");
4792 list_for_each_entry(work, &pwq->inactive_works, entry) {
4793 pr_cont_work(comma, work);
4794 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4795 }
4796 pr_cont("\n");
4797 }
4798}
4799
4800
4801
4802
4803
4804void show_one_workqueue(struct workqueue_struct *wq)
4805{
4806 struct pool_workqueue *pwq;
4807 bool idle = true;
4808 unsigned long flags;
4809
4810 for_each_pwq(pwq, wq) {
4811 if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
4812 idle = false;
4813 break;
4814 }
4815 }
4816 if (idle)
4817 return;
4818
4819 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4820
4821 for_each_pwq(pwq, wq) {
4822 raw_spin_lock_irqsave(&pwq->pool->lock, flags);
4823 if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
4824
4825
4826
4827
4828
4829 printk_deferred_enter();
4830 show_pwq(pwq);
4831 printk_deferred_exit();
4832 }
4833 raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
4834
4835
4836
4837
4838
4839 touch_nmi_watchdog();
4840 }
4841
4842}
4843
4844
4845
4846
4847
4848static void show_one_worker_pool(struct worker_pool *pool)
4849{
4850 struct worker *worker;
4851 bool first = true;
4852 unsigned long flags;
4853
4854 raw_spin_lock_irqsave(&pool->lock, flags);
4855 if (pool->nr_workers == pool->nr_idle)
4856 goto next_pool;
4857
4858
4859
4860
4861
4862 printk_deferred_enter();
4863 pr_info("pool %d:", pool->id);
4864 pr_cont_pool_info(pool);
4865 pr_cont(" hung=%us workers=%d",
4866 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4867 pool->nr_workers);
4868 if (pool->manager)
4869 pr_cont(" manager: %d",
4870 task_pid_nr(pool->manager->task));
4871 list_for_each_entry(worker, &pool->idle_list, entry) {
4872 pr_cont(" %s%d", first ? "idle: " : "",
4873 task_pid_nr(worker->task));
4874 first = false;
4875 }
4876 pr_cont("\n");
4877 printk_deferred_exit();
4878next_pool:
4879 raw_spin_unlock_irqrestore(&pool->lock, flags);
4880
4881
4882
4883
4884
4885 touch_nmi_watchdog();
4886
4887}
4888
4889
4890
4891
4892
4893
4894
4895void show_all_workqueues(void)
4896{
4897 struct workqueue_struct *wq;
4898 struct worker_pool *pool;
4899 int pi;
4900
4901 rcu_read_lock();
4902
4903 pr_info("Showing busy workqueues and worker pools:\n");
4904
4905 list_for_each_entry_rcu(wq, &workqueues, list)
4906 show_one_workqueue(wq);
4907
4908 for_each_pool(pool, pi)
4909 show_one_worker_pool(pool);
4910
4911 rcu_read_unlock();
4912}
4913
4914
4915void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
4916{
4917 int off;
4918
4919
4920 off = strscpy(buf, task->comm, size);
4921 if (off < 0)
4922 return;
4923
4924
4925 mutex_lock(&wq_pool_attach_mutex);
4926
4927 if (task->flags & PF_WQ_WORKER) {
4928 struct worker *worker = kthread_data(task);
4929 struct worker_pool *pool = worker->pool;
4930
4931 if (pool) {
4932 raw_spin_lock_irq(&pool->lock);
4933
4934
4935
4936
4937
4938 if (worker->desc[0] != '\0') {
4939 if (worker->current_work)
4940 scnprintf(buf + off, size - off, "+%s",
4941 worker->desc);
4942 else
4943 scnprintf(buf + off, size - off, "-%s",
4944 worker->desc);
4945 }
4946 raw_spin_unlock_irq(&pool->lock);
4947 }
4948 }
4949
4950 mutex_unlock(&wq_pool_attach_mutex);
4951}
4952
4953#ifdef CONFIG_SMP
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970static void unbind_workers(int cpu)
4971{
4972 struct worker_pool *pool;
4973 struct worker *worker;
4974
4975 for_each_cpu_worker_pool(pool, cpu) {
4976 mutex_lock(&wq_pool_attach_mutex);
4977 raw_spin_lock_irq(&pool->lock);
4978
4979
4980
4981
4982
4983
4984
4985
4986 for_each_pool_worker(worker, pool)
4987 worker->flags |= WORKER_UNBOUND;
4988
4989 pool->flags |= POOL_DISASSOCIATED;
4990
4991 raw_spin_unlock_irq(&pool->lock);
4992
4993 for_each_pool_worker(worker, pool) {
4994 kthread_set_per_cpu(worker->task, -1);
4995 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
4996 }
4997
4998 mutex_unlock(&wq_pool_attach_mutex);
4999
5000
5001
5002
5003
5004
5005
5006 schedule();
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016 atomic_set(&pool->nr_running, 0);
5017
5018
5019
5020
5021
5022
5023 raw_spin_lock_irq(&pool->lock);
5024 wake_up_worker(pool);
5025 raw_spin_unlock_irq(&pool->lock);
5026 }
5027}
5028
5029
5030
5031
5032
5033
5034
5035static void rebind_workers(struct worker_pool *pool)
5036{
5037 struct worker *worker;
5038
5039 lockdep_assert_held(&wq_pool_attach_mutex);
5040
5041
5042
5043
5044
5045
5046
5047
5048 for_each_pool_worker(worker, pool) {
5049 kthread_set_per_cpu(worker->task, pool->cpu);
5050 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
5051 pool->attrs->cpumask) < 0);
5052 }
5053
5054 raw_spin_lock_irq(&pool->lock);
5055
5056 pool->flags &= ~POOL_DISASSOCIATED;
5057
5058 for_each_pool_worker(worker, pool) {
5059 unsigned int worker_flags = worker->flags;
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069 if (worker_flags & WORKER_IDLE)
5070 wake_up_process(worker->task);
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
5088 worker_flags |= WORKER_REBOUND;
5089 worker_flags &= ~WORKER_UNBOUND;
5090 WRITE_ONCE(worker->flags, worker_flags);
5091 }
5092
5093 raw_spin_unlock_irq(&pool->lock);
5094}
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
5107{
5108 static cpumask_t cpumask;
5109 struct worker *worker;
5110
5111 lockdep_assert_held(&wq_pool_attach_mutex);
5112
5113
5114 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
5115 return;
5116
5117 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
5118
5119
5120 for_each_pool_worker(worker, pool)
5121 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
5122}
5123
5124int workqueue_prepare_cpu(unsigned int cpu)
5125{
5126 struct worker_pool *pool;
5127
5128 for_each_cpu_worker_pool(pool, cpu) {
5129 if (pool->nr_workers)
5130 continue;
5131 if (!create_worker(pool))
5132 return -ENOMEM;
5133 }
5134 return 0;
5135}
5136
5137int workqueue_online_cpu(unsigned int cpu)
5138{
5139 struct worker_pool *pool;
5140 struct workqueue_struct *wq;
5141 int pi;
5142
5143 mutex_lock(&wq_pool_mutex);
5144
5145 for_each_pool(pool, pi) {
5146 mutex_lock(&wq_pool_attach_mutex);
5147
5148 if (pool->cpu == cpu)
5149 rebind_workers(pool);
5150 else if (pool->cpu < 0)
5151 restore_unbound_workers_cpumask(pool, cpu);
5152
5153 mutex_unlock(&wq_pool_attach_mutex);
5154 }
5155
5156
5157 list_for_each_entry(wq, &workqueues, list)
5158 wq_update_unbound_numa(wq, cpu, true);
5159
5160 mutex_unlock(&wq_pool_mutex);
5161 return 0;
5162}
5163
5164int workqueue_offline_cpu(unsigned int cpu)
5165{
5166 struct workqueue_struct *wq;
5167
5168
5169 if (WARN_ON(cpu != smp_processor_id()))
5170 return -1;
5171
5172 unbind_workers(cpu);
5173
5174
5175 mutex_lock(&wq_pool_mutex);
5176 list_for_each_entry(wq, &workqueues, list)
5177 wq_update_unbound_numa(wq, cpu, false);
5178 mutex_unlock(&wq_pool_mutex);
5179
5180 return 0;
5181}
5182
5183struct work_for_cpu {
5184 struct work_struct work;
5185 long (*fn)(void *);
5186 void *arg;
5187 long ret;
5188};
5189
5190static void work_for_cpu_fn(struct work_struct *work)
5191{
5192 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5193
5194 wfc->ret = wfc->fn(wfc->arg);
5195}
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
5209{
5210 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
5211
5212 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5213 schedule_work_on(cpu, &wfc.work);
5214 flush_work(&wfc.work);
5215 destroy_work_on_stack(&wfc.work);
5216 return wfc.ret;
5217}
5218EXPORT_SYMBOL_GPL(work_on_cpu);
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5232{
5233 long ret = -ENODEV;
5234
5235 cpus_read_lock();
5236 if (cpu_online(cpu))
5237 ret = work_on_cpu(cpu, fn, arg);
5238 cpus_read_unlock();
5239 return ret;
5240}
5241EXPORT_SYMBOL_GPL(work_on_cpu_safe);
5242#endif
5243
5244#ifdef CONFIG_FREEZER
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256void freeze_workqueues_begin(void)
5257{
5258 struct workqueue_struct *wq;
5259 struct pool_workqueue *pwq;
5260
5261 mutex_lock(&wq_pool_mutex);
5262
5263 WARN_ON_ONCE(workqueue_freezing);
5264 workqueue_freezing = true;
5265
5266 list_for_each_entry(wq, &workqueues, list) {
5267 mutex_lock(&wq->mutex);
5268 for_each_pwq(pwq, wq)
5269 pwq_adjust_max_active(pwq);
5270 mutex_unlock(&wq->mutex);
5271 }
5272
5273 mutex_unlock(&wq_pool_mutex);
5274}
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289bool freeze_workqueues_busy(void)
5290{
5291 bool busy = false;
5292 struct workqueue_struct *wq;
5293 struct pool_workqueue *pwq;
5294
5295 mutex_lock(&wq_pool_mutex);
5296
5297 WARN_ON_ONCE(!workqueue_freezing);
5298
5299 list_for_each_entry(wq, &workqueues, list) {
5300 if (!(wq->flags & WQ_FREEZABLE))
5301 continue;
5302
5303
5304
5305
5306 rcu_read_lock();
5307 for_each_pwq(pwq, wq) {
5308 WARN_ON_ONCE(pwq->nr_active < 0);
5309 if (pwq->nr_active) {
5310 busy = true;
5311 rcu_read_unlock();
5312 goto out_unlock;
5313 }
5314 }
5315 rcu_read_unlock();
5316 }
5317out_unlock:
5318 mutex_unlock(&wq_pool_mutex);
5319 return busy;
5320}
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331void thaw_workqueues(void)
5332{
5333 struct workqueue_struct *wq;
5334 struct pool_workqueue *pwq;
5335
5336 mutex_lock(&wq_pool_mutex);
5337
5338 if (!workqueue_freezing)
5339 goto out_unlock;
5340
5341 workqueue_freezing = false;
5342
5343
5344 list_for_each_entry(wq, &workqueues, list) {
5345 mutex_lock(&wq->mutex);
5346 for_each_pwq(pwq, wq)
5347 pwq_adjust_max_active(pwq);
5348 mutex_unlock(&wq->mutex);
5349 }
5350
5351out_unlock:
5352 mutex_unlock(&wq_pool_mutex);
5353}
5354#endif
5355
5356static int workqueue_apply_unbound_cpumask(void)
5357{
5358 LIST_HEAD(ctxs);
5359 int ret = 0;
5360 struct workqueue_struct *wq;
5361 struct apply_wqattrs_ctx *ctx, *n;
5362
5363 lockdep_assert_held(&wq_pool_mutex);
5364
5365 list_for_each_entry(wq, &workqueues, list) {
5366 if (!(wq->flags & WQ_UNBOUND))
5367 continue;
5368
5369 if (wq->flags & __WQ_ORDERED)
5370 continue;
5371
5372 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
5373 if (!ctx) {
5374 ret = -ENOMEM;
5375 break;
5376 }
5377
5378 list_add_tail(&ctx->list, &ctxs);
5379 }
5380
5381 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5382 if (!ret)
5383 apply_wqattrs_commit(ctx);
5384 apply_wqattrs_cleanup(ctx);
5385 }
5386
5387 return ret;
5388}
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5403{
5404 int ret = -EINVAL;
5405 cpumask_var_t saved_cpumask;
5406
5407
5408
5409
5410
5411 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5412 if (!cpumask_empty(cpumask)) {
5413 apply_wqattrs_lock();
5414 if (cpumask_equal(cpumask, wq_unbound_cpumask)) {
5415 ret = 0;
5416 goto out_unlock;
5417 }
5418
5419 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) {
5420 ret = -ENOMEM;
5421 goto out_unlock;
5422 }
5423
5424
5425 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5426
5427
5428 cpumask_copy(wq_unbound_cpumask, cpumask);
5429 ret = workqueue_apply_unbound_cpumask();
5430
5431
5432 if (ret < 0)
5433 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5434
5435 free_cpumask_var(saved_cpumask);
5436out_unlock:
5437 apply_wqattrs_unlock();
5438 }
5439
5440 return ret;
5441}
5442
5443#ifdef CONFIG_SYSFS
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459struct wq_device {
5460 struct workqueue_struct *wq;
5461 struct device dev;
5462};
5463
5464static struct workqueue_struct *dev_to_wq(struct device *dev)
5465{
5466 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5467
5468 return wq_dev->wq;
5469}
5470
5471static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5472 char *buf)
5473{
5474 struct workqueue_struct *wq = dev_to_wq(dev);
5475
5476 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5477}
5478static DEVICE_ATTR_RO(per_cpu);
5479
5480static ssize_t max_active_show(struct device *dev,
5481 struct device_attribute *attr, char *buf)
5482{
5483 struct workqueue_struct *wq = dev_to_wq(dev);
5484
5485 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5486}
5487
5488static ssize_t max_active_store(struct device *dev,
5489 struct device_attribute *attr, const char *buf,
5490 size_t count)
5491{
5492 struct workqueue_struct *wq = dev_to_wq(dev);
5493 int val;
5494
5495 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5496 return -EINVAL;
5497
5498 workqueue_set_max_active(wq, val);
5499 return count;
5500}
5501static DEVICE_ATTR_RW(max_active);
5502
5503static struct attribute *wq_sysfs_attrs[] = {
5504 &dev_attr_per_cpu.attr,
5505 &dev_attr_max_active.attr,
5506 NULL,
5507};
5508ATTRIBUTE_GROUPS(wq_sysfs);
5509
5510static ssize_t wq_pool_ids_show(struct device *dev,
5511 struct device_attribute *attr, char *buf)
5512{
5513 struct workqueue_struct *wq = dev_to_wq(dev);
5514 const char *delim = "";
5515 int node, written = 0;
5516
5517 cpus_read_lock();
5518 rcu_read_lock();
5519 for_each_node(node) {
5520 written += scnprintf(buf + written, PAGE_SIZE - written,
5521 "%s%d:%d", delim, node,
5522 unbound_pwq_by_node(wq, node)->pool->id);
5523 delim = " ";
5524 }
5525 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5526 rcu_read_unlock();
5527 cpus_read_unlock();
5528
5529 return written;
5530}
5531
5532static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5533 char *buf)
5534{
5535 struct workqueue_struct *wq = dev_to_wq(dev);
5536 int written;
5537
5538 mutex_lock(&wq->mutex);
5539 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5540 mutex_unlock(&wq->mutex);
5541
5542 return written;
5543}
5544
5545
5546static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5547{
5548 struct workqueue_attrs *attrs;
5549
5550 lockdep_assert_held(&wq_pool_mutex);
5551
5552 attrs = alloc_workqueue_attrs();
5553 if (!attrs)
5554 return NULL;
5555
5556 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5557 return attrs;
5558}
5559
5560static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5561 const char *buf, size_t count)
5562{
5563 struct workqueue_struct *wq = dev_to_wq(dev);
5564 struct workqueue_attrs *attrs;
5565 int ret = -ENOMEM;
5566
5567 apply_wqattrs_lock();
5568
5569 attrs = wq_sysfs_prep_attrs(wq);
5570 if (!attrs)
5571 goto out_unlock;
5572
5573 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5574 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5575 ret = apply_workqueue_attrs_locked(wq, attrs);
5576 else
5577 ret = -EINVAL;
5578
5579out_unlock:
5580 apply_wqattrs_unlock();
5581 free_workqueue_attrs(attrs);
5582 return ret ?: count;
5583}
5584
5585static ssize_t wq_cpumask_show(struct device *dev,
5586 struct device_attribute *attr, char *buf)
5587{
5588 struct workqueue_struct *wq = dev_to_wq(dev);
5589 int written;
5590
5591 mutex_lock(&wq->mutex);
5592 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5593 cpumask_pr_args(wq->unbound_attrs->cpumask));
5594 mutex_unlock(&wq->mutex);
5595 return written;
5596}
5597
5598static ssize_t wq_cpumask_store(struct device *dev,
5599 struct device_attribute *attr,
5600 const char *buf, size_t count)
5601{
5602 struct workqueue_struct *wq = dev_to_wq(dev);
5603 struct workqueue_attrs *attrs;
5604 int ret = -ENOMEM;
5605
5606 apply_wqattrs_lock();
5607
5608 attrs = wq_sysfs_prep_attrs(wq);
5609 if (!attrs)
5610 goto out_unlock;
5611
5612 ret = cpumask_parse(buf, attrs->cpumask);
5613 if (!ret)
5614 ret = apply_workqueue_attrs_locked(wq, attrs);
5615
5616out_unlock:
5617 apply_wqattrs_unlock();
5618 free_workqueue_attrs(attrs);
5619 return ret ?: count;
5620}
5621
5622static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5623 char *buf)
5624{
5625 struct workqueue_struct *wq = dev_to_wq(dev);
5626 int written;
5627
5628 mutex_lock(&wq->mutex);
5629 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5630 !wq->unbound_attrs->no_numa);
5631 mutex_unlock(&wq->mutex);
5632
5633 return written;
5634}
5635
5636static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5637 const char *buf, size_t count)
5638{
5639 struct workqueue_struct *wq = dev_to_wq(dev);
5640 struct workqueue_attrs *attrs;
5641 int v, ret = -ENOMEM;
5642
5643 apply_wqattrs_lock();
5644
5645 attrs = wq_sysfs_prep_attrs(wq);
5646 if (!attrs)
5647 goto out_unlock;
5648
5649 ret = -EINVAL;
5650 if (sscanf(buf, "%d", &v) == 1) {
5651 attrs->no_numa = !v;
5652 ret = apply_workqueue_attrs_locked(wq, attrs);
5653 }
5654
5655out_unlock:
5656 apply_wqattrs_unlock();
5657 free_workqueue_attrs(attrs);
5658 return ret ?: count;
5659}
5660
5661static struct device_attribute wq_sysfs_unbound_attrs[] = {
5662 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5663 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5664 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5665 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5666 __ATTR_NULL,
5667};
5668
5669static struct bus_type wq_subsys = {
5670 .name = "workqueue",
5671 .dev_groups = wq_sysfs_groups,
5672};
5673
5674static ssize_t wq_unbound_cpumask_show(struct device *dev,
5675 struct device_attribute *attr, char *buf)
5676{
5677 int written;
5678
5679 mutex_lock(&wq_pool_mutex);
5680 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5681 cpumask_pr_args(wq_unbound_cpumask));
5682 mutex_unlock(&wq_pool_mutex);
5683
5684 return written;
5685}
5686
5687static ssize_t wq_unbound_cpumask_store(struct device *dev,
5688 struct device_attribute *attr, const char *buf, size_t count)
5689{
5690 cpumask_var_t cpumask;
5691 int ret;
5692
5693 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5694 return -ENOMEM;
5695
5696 ret = cpumask_parse(buf, cpumask);
5697 if (!ret)
5698 ret = workqueue_set_unbound_cpumask(cpumask);
5699
5700 free_cpumask_var(cpumask);
5701 return ret ? ret : count;
5702}
5703
5704static struct device_attribute wq_sysfs_cpumask_attr =
5705 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5706 wq_unbound_cpumask_store);
5707
5708static int __init wq_sysfs_init(void)
5709{
5710 int err;
5711
5712 err = subsys_virtual_register(&wq_subsys, NULL);
5713 if (err)
5714 return err;
5715
5716 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5717}
5718core_initcall(wq_sysfs_init);
5719
5720static void wq_device_release(struct device *dev)
5721{
5722 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5723
5724 kfree(wq_dev);
5725}
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742int workqueue_sysfs_register(struct workqueue_struct *wq)
5743{
5744 struct wq_device *wq_dev;
5745 int ret;
5746
5747
5748
5749
5750
5751
5752 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5753 return -EINVAL;
5754
5755 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5756 if (!wq_dev)
5757 return -ENOMEM;
5758
5759 wq_dev->wq = wq;
5760 wq_dev->dev.bus = &wq_subsys;
5761 wq_dev->dev.release = wq_device_release;
5762 dev_set_name(&wq_dev->dev, "%s", wq->name);
5763
5764
5765
5766
5767
5768 dev_set_uevent_suppress(&wq_dev->dev, true);
5769
5770 ret = device_register(&wq_dev->dev);
5771 if (ret) {
5772 put_device(&wq_dev->dev);
5773 wq->wq_dev = NULL;
5774 return ret;
5775 }
5776
5777 if (wq->flags & WQ_UNBOUND) {
5778 struct device_attribute *attr;
5779
5780 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5781 ret = device_create_file(&wq_dev->dev, attr);
5782 if (ret) {
5783 device_unregister(&wq_dev->dev);
5784 wq->wq_dev = NULL;
5785 return ret;
5786 }
5787 }
5788 }
5789
5790 dev_set_uevent_suppress(&wq_dev->dev, false);
5791 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5792 return 0;
5793}
5794
5795
5796
5797
5798
5799
5800
5801static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5802{
5803 struct wq_device *wq_dev = wq->wq_dev;
5804
5805 if (!wq->wq_dev)
5806 return;
5807
5808 wq->wq_dev = NULL;
5809 device_unregister(&wq_dev->dev);
5810}
5811#else
5812static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5813#endif
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832#ifdef CONFIG_WQ_WATCHDOG
5833
5834static unsigned long wq_watchdog_thresh = 30;
5835static struct timer_list wq_watchdog_timer;
5836
5837static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5838static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5839
5840static void wq_watchdog_reset_touched(void)
5841{
5842 int cpu;
5843
5844 wq_watchdog_touched = jiffies;
5845 for_each_possible_cpu(cpu)
5846 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5847}
5848
5849static void wq_watchdog_timer_fn(struct timer_list *unused)
5850{
5851 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5852 bool lockup_detected = false;
5853 unsigned long now = jiffies;
5854 struct worker_pool *pool;
5855 int pi;
5856
5857 if (!thresh)
5858 return;
5859
5860 rcu_read_lock();
5861
5862 for_each_pool(pool, pi) {
5863 unsigned long pool_ts, touched, ts;
5864
5865 if (list_empty(&pool->worklist))
5866 continue;
5867
5868
5869
5870
5871
5872 kvm_check_and_clear_guest_paused();
5873
5874
5875 if (pool->cpu >= 0)
5876 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
5877 else
5878 touched = READ_ONCE(wq_watchdog_touched);
5879 pool_ts = READ_ONCE(pool->watchdog_ts);
5880
5881 if (time_after(pool_ts, touched))
5882 ts = pool_ts;
5883 else
5884 ts = touched;
5885
5886
5887 if (time_after(now, ts + thresh)) {
5888 lockup_detected = true;
5889 pr_emerg("BUG: workqueue lockup - pool");
5890 pr_cont_pool_info(pool);
5891 pr_cont(" stuck for %us!\n",
5892 jiffies_to_msecs(now - pool_ts) / 1000);
5893 }
5894 }
5895
5896 rcu_read_unlock();
5897
5898 if (lockup_detected)
5899 show_all_workqueues();
5900
5901 wq_watchdog_reset_touched();
5902 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5903}
5904
5905notrace void wq_watchdog_touch(int cpu)
5906{
5907 if (cpu >= 0)
5908 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5909
5910 wq_watchdog_touched = jiffies;
5911}
5912
5913static void wq_watchdog_set_thresh(unsigned long thresh)
5914{
5915 wq_watchdog_thresh = 0;
5916 del_timer_sync(&wq_watchdog_timer);
5917
5918 if (thresh) {
5919 wq_watchdog_thresh = thresh;
5920 wq_watchdog_reset_touched();
5921 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5922 }
5923}
5924
5925static int wq_watchdog_param_set_thresh(const char *val,
5926 const struct kernel_param *kp)
5927{
5928 unsigned long thresh;
5929 int ret;
5930
5931 ret = kstrtoul(val, 0, &thresh);
5932 if (ret)
5933 return ret;
5934
5935 if (system_wq)
5936 wq_watchdog_set_thresh(thresh);
5937 else
5938 wq_watchdog_thresh = thresh;
5939
5940 return 0;
5941}
5942
5943static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5944 .set = wq_watchdog_param_set_thresh,
5945 .get = param_get_ulong,
5946};
5947
5948module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5949 0644);
5950
5951static void wq_watchdog_init(void)
5952{
5953 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
5954 wq_watchdog_set_thresh(wq_watchdog_thresh);
5955}
5956
5957#else
5958
5959static inline void wq_watchdog_init(void) { }
5960
5961#endif
5962
5963static void __init wq_numa_init(void)
5964{
5965 cpumask_var_t *tbl;
5966 int node, cpu;
5967
5968 if (num_possible_nodes() <= 1)
5969 return;
5970
5971 if (wq_disable_numa) {
5972 pr_info("workqueue: NUMA affinity support disabled\n");
5973 return;
5974 }
5975
5976 for_each_possible_cpu(cpu) {
5977 if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
5978 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5979 return;
5980 }
5981 }
5982
5983 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
5984 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5985
5986
5987
5988
5989
5990
5991 tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
5992 BUG_ON(!tbl);
5993
5994 for_each_node(node)
5995 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5996 node_online(node) ? node : NUMA_NO_NODE));
5997
5998 for_each_possible_cpu(cpu) {
5999 node = cpu_to_node(cpu);
6000 cpumask_set_cpu(cpu, tbl[node]);
6001 }
6002
6003 wq_numa_possible_cpumask = tbl;
6004 wq_numa_enabled = true;
6005}
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017void __init workqueue_init_early(void)
6018{
6019 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
6020 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
6021 int i, cpu;
6022
6023 BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
6024
6025 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
6026 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
6027
6028 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
6029
6030
6031 for_each_possible_cpu(cpu) {
6032 struct worker_pool *pool;
6033
6034 i = 0;
6035 for_each_cpu_worker_pool(pool, cpu) {
6036 BUG_ON(init_worker_pool(pool));
6037 pool->cpu = cpu;
6038 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
6039 pool->attrs->nice = std_nice[i++];
6040 pool->node = cpu_to_node(cpu);
6041
6042
6043 mutex_lock(&wq_pool_mutex);
6044 BUG_ON(worker_pool_assign_id(pool));
6045 mutex_unlock(&wq_pool_mutex);
6046 }
6047 }
6048
6049
6050 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
6051 struct workqueue_attrs *attrs;
6052
6053 BUG_ON(!(attrs = alloc_workqueue_attrs()));
6054 attrs->nice = std_nice[i];
6055 unbound_std_wq_attrs[i] = attrs;
6056
6057
6058
6059
6060
6061
6062 BUG_ON(!(attrs = alloc_workqueue_attrs()));
6063 attrs->nice = std_nice[i];
6064 attrs->no_numa = true;
6065 ordered_wq_attrs[i] = attrs;
6066 }
6067
6068 system_wq = alloc_workqueue("events", 0, 0);
6069 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
6070 system_long_wq = alloc_workqueue("events_long", 0, 0);
6071 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
6072 WQ_UNBOUND_MAX_ACTIVE);
6073 system_freezable_wq = alloc_workqueue("events_freezable",
6074 WQ_FREEZABLE, 0);
6075 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
6076 WQ_POWER_EFFICIENT, 0);
6077 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
6078 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
6079 0);
6080 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
6081 !system_unbound_wq || !system_freezable_wq ||
6082 !system_power_efficient_wq ||
6083 !system_freezable_power_efficient_wq);
6084}
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095void __init workqueue_init(void)
6096{
6097 struct workqueue_struct *wq;
6098 struct worker_pool *pool;
6099 int cpu, bkt;
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110 wq_numa_init();
6111
6112 mutex_lock(&wq_pool_mutex);
6113
6114 for_each_possible_cpu(cpu) {
6115 for_each_cpu_worker_pool(pool, cpu) {
6116 pool->node = cpu_to_node(cpu);
6117 }
6118 }
6119
6120 list_for_each_entry(wq, &workqueues, list) {
6121 wq_update_unbound_numa(wq, smp_processor_id(), true);
6122 WARN(init_rescuer(wq),
6123 "workqueue: failed to create early rescuer for %s",
6124 wq->name);
6125 }
6126
6127 mutex_unlock(&wq_pool_mutex);
6128
6129
6130 for_each_online_cpu(cpu) {
6131 for_each_cpu_worker_pool(pool, cpu) {
6132 pool->flags &= ~POOL_DISASSOCIATED;
6133 BUG_ON(!create_worker(pool));
6134 }
6135 }
6136
6137 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
6138 BUG_ON(!create_worker(pool));
6139
6140 wq_online = true;
6141 wq_watchdog_init();
6142}
6143