1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/export.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
38#include <linux/hardirq.h>
39#include <linux/mempolicy.h>
40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51
52#include "workqueue_internal.h"
53
54enum {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 POOL_MANAGER_ACTIVE = 1 << 0,
72 POOL_DISASSOCIATED = 1 << 2,
73
74
75 WORKER_DIE = 1 << 1,
76 WORKER_IDLE = 1 << 2,
77 WORKER_PREP = 1 << 3,
78 WORKER_CPU_INTENSIVE = 1 << 6,
79 WORKER_UNBOUND = 1 << 7,
80 WORKER_REBOUND = 1 << 8,
81
82 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
83 WORKER_UNBOUND | WORKER_REBOUND,
84
85 NR_STD_WORKER_POOLS = 2,
86
87 UNBOUND_POOL_HASH_ORDER = 6,
88 BUSY_WORKER_HASH_ORDER = 6,
89
90 MAX_IDLE_WORKERS_RATIO = 4,
91 IDLE_WORKER_TIMEOUT = 300 * HZ,
92
93 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
94
95
96 MAYDAY_INTERVAL = HZ / 10,
97 CREATE_COOLDOWN = HZ,
98
99
100
101
102
103 RESCUER_NICE_LEVEL = MIN_NICE,
104 HIGHPRI_NICE_LEVEL = MIN_NICE,
105
106 WQ_NAME_LEN = 24,
107};
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145struct worker_pool {
146 spinlock_t lock;
147 int cpu;
148 int node;
149 int id;
150 unsigned int flags;
151
152 unsigned long watchdog_ts;
153
154 struct list_head worklist;
155 int nr_workers;
156
157
158 int nr_idle;
159
160 struct list_head idle_list;
161 struct timer_list idle_timer;
162 struct timer_list mayday_timer;
163
164
165 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
166
167
168
169 struct worker *manager;
170 struct mutex attach_mutex;
171 struct list_head workers;
172 struct completion *detach_completion;
173
174 struct ida worker_ida;
175
176 struct workqueue_attrs *attrs;
177 struct hlist_node hash_node;
178 int refcnt;
179
180
181
182
183
184
185 atomic_t nr_running ____cacheline_aligned_in_smp;
186
187
188
189
190
191 struct rcu_head rcu;
192} ____cacheline_aligned_in_smp;
193
194
195
196
197
198
199
200struct pool_workqueue {
201 struct worker_pool *pool;
202 struct workqueue_struct *wq;
203 int work_color;
204 int flush_color;
205 int refcnt;
206 int nr_in_flight[WORK_NR_COLORS];
207
208 int nr_active;
209 int max_active;
210 struct list_head delayed_works;
211 struct list_head pwqs_node;
212 struct list_head mayday_node;
213
214
215
216
217
218
219
220 struct work_struct unbound_release_work;
221 struct rcu_head rcu;
222} __aligned(1 << WORK_STRUCT_FLAG_BITS);
223
224
225
226
227struct wq_flusher {
228 struct list_head list;
229 int flush_color;
230 struct completion done;
231};
232
233struct wq_device;
234
235
236
237
238
239struct workqueue_struct {
240 struct list_head pwqs;
241 struct list_head list;
242
243 struct mutex mutex;
244 int work_color;
245 int flush_color;
246 atomic_t nr_pwqs_to_flush;
247 struct wq_flusher *first_flusher;
248 struct list_head flusher_queue;
249 struct list_head flusher_overflow;
250
251 struct list_head maydays;
252 struct worker *rescuer;
253
254 int nr_drainers;
255 int saved_max_active;
256
257 struct workqueue_attrs *unbound_attrs;
258 struct pool_workqueue *dfl_pwq;
259
260#ifdef CONFIG_SYSFS
261 struct wq_device *wq_dev;
262#endif
263#ifdef CONFIG_LOCKDEP
264 struct lockdep_map lockdep_map;
265#endif
266 char name[WQ_NAME_LEN];
267
268
269
270
271
272
273 struct rcu_head rcu;
274
275
276 unsigned int flags ____cacheline_aligned;
277 struct pool_workqueue __percpu *cpu_pwqs;
278 struct pool_workqueue __rcu *numa_pwq_tbl[];
279};
280
281static struct kmem_cache *pwq_cache;
282
283static cpumask_var_t *wq_numa_possible_cpumask;
284
285
286static bool wq_disable_numa;
287module_param_named(disable_numa, wq_disable_numa, bool, 0444);
288
289
290static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
291module_param_named(power_efficient, wq_power_efficient, bool, 0444);
292
293static bool wq_online;
294
295static bool wq_numa_enabled;
296
297
298static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299
300static DEFINE_MUTEX(wq_pool_mutex);
301static DEFINE_SPINLOCK(wq_mayday_lock);
302static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait);
303
304static LIST_HEAD(workqueues);
305static bool workqueue_freezing;
306
307
308static cpumask_var_t wq_unbound_cpumask;
309
310
311static DEFINE_PER_CPU(int, wq_rr_cpu_last);
312
313
314
315
316
317
318#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
319static bool wq_debug_force_rr_cpu = true;
320#else
321static bool wq_debug_force_rr_cpu = false;
322#endif
323module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
324
325
326static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
327
328static DEFINE_IDR(worker_pool_idr);
329
330
331static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
332
333
334static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
335
336
337static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
338
339struct workqueue_struct *system_wq __read_mostly;
340EXPORT_SYMBOL(system_wq);
341struct workqueue_struct *system_highpri_wq __read_mostly;
342EXPORT_SYMBOL_GPL(system_highpri_wq);
343struct workqueue_struct *system_long_wq __read_mostly;
344EXPORT_SYMBOL_GPL(system_long_wq);
345struct workqueue_struct *system_unbound_wq __read_mostly;
346EXPORT_SYMBOL_GPL(system_unbound_wq);
347struct workqueue_struct *system_freezable_wq __read_mostly;
348EXPORT_SYMBOL_GPL(system_freezable_wq);
349struct workqueue_struct *system_power_efficient_wq __read_mostly;
350EXPORT_SYMBOL_GPL(system_power_efficient_wq);
351struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
352EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
353
354static int worker_thread(void *__worker);
355static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
356
357#define CREATE_TRACE_POINTS
358#include <trace/events/workqueue.h>
359
360#define assert_rcu_or_pool_mutex() \
361 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
362 !lockdep_is_held(&wq_pool_mutex), \
363 "sched RCU or wq_pool_mutex should be held")
364
365#define assert_rcu_or_wq_mutex(wq) \
366 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
367 !lockdep_is_held(&wq->mutex), \
368 "sched RCU or wq->mutex should be held")
369
370#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
371 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
372 !lockdep_is_held(&wq->mutex) && \
373 !lockdep_is_held(&wq_pool_mutex), \
374 "sched RCU, wq->mutex or wq_pool_mutex should be held")
375
376#define for_each_cpu_worker_pool(pool, cpu) \
377 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
378 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
379 (pool)++)
380
381
382
383
384
385
386
387
388
389
390
391
392
393#define for_each_pool(pool, pi) \
394 idr_for_each_entry(&worker_pool_idr, pool, pi) \
395 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
396 else
397
398
399
400
401
402
403
404
405
406
407
408#define for_each_pool_worker(worker, pool) \
409 list_for_each_entry((worker), &(pool)->workers, node) \
410 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
411 else
412
413
414
415
416
417
418
419
420
421
422
423
424
425#define for_each_pwq(pwq, wq) \
426 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
427 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
428 else
429
430#ifdef CONFIG_DEBUG_OBJECTS_WORK
431
432static struct debug_obj_descr work_debug_descr;
433
434static void *work_debug_hint(void *addr)
435{
436 return ((struct work_struct *) addr)->func;
437}
438
439static bool work_is_static_object(void *addr)
440{
441 struct work_struct *work = addr;
442
443 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
444}
445
446
447
448
449
450static bool work_fixup_init(void *addr, enum debug_obj_state state)
451{
452 struct work_struct *work = addr;
453
454 switch (state) {
455 case ODEBUG_STATE_ACTIVE:
456 cancel_work_sync(work);
457 debug_object_init(work, &work_debug_descr);
458 return true;
459 default:
460 return false;
461 }
462}
463
464
465
466
467
468static bool work_fixup_free(void *addr, enum debug_obj_state state)
469{
470 struct work_struct *work = addr;
471
472 switch (state) {
473 case ODEBUG_STATE_ACTIVE:
474 cancel_work_sync(work);
475 debug_object_free(work, &work_debug_descr);
476 return true;
477 default:
478 return false;
479 }
480}
481
482static struct debug_obj_descr work_debug_descr = {
483 .name = "work_struct",
484 .debug_hint = work_debug_hint,
485 .is_static_object = work_is_static_object,
486 .fixup_init = work_fixup_init,
487 .fixup_free = work_fixup_free,
488};
489
490static inline void debug_work_activate(struct work_struct *work)
491{
492 debug_object_activate(work, &work_debug_descr);
493}
494
495static inline void debug_work_deactivate(struct work_struct *work)
496{
497 debug_object_deactivate(work, &work_debug_descr);
498}
499
500void __init_work(struct work_struct *work, int onstack)
501{
502 if (onstack)
503 debug_object_init_on_stack(work, &work_debug_descr);
504 else
505 debug_object_init(work, &work_debug_descr);
506}
507EXPORT_SYMBOL_GPL(__init_work);
508
509void destroy_work_on_stack(struct work_struct *work)
510{
511 debug_object_free(work, &work_debug_descr);
512}
513EXPORT_SYMBOL_GPL(destroy_work_on_stack);
514
515void destroy_delayed_work_on_stack(struct delayed_work *work)
516{
517 destroy_timer_on_stack(&work->timer);
518 debug_object_free(&work->work, &work_debug_descr);
519}
520EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
521
522#else
523static inline void debug_work_activate(struct work_struct *work) { }
524static inline void debug_work_deactivate(struct work_struct *work) { }
525#endif
526
527
528
529
530
531
532
533
534static int worker_pool_assign_id(struct worker_pool *pool)
535{
536 int ret;
537
538 lockdep_assert_held(&wq_pool_mutex);
539
540 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
541 GFP_KERNEL);
542 if (ret >= 0) {
543 pool->id = ret;
544 return 0;
545 }
546 return ret;
547}
548
549
550
551
552
553
554
555
556
557
558
559
560
561static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
562 int node)
563{
564 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
565
566
567
568
569
570
571
572 if (unlikely(node == NUMA_NO_NODE))
573 return wq->dfl_pwq;
574
575 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
576}
577
578static unsigned int work_color_to_flags(int color)
579{
580 return color << WORK_STRUCT_COLOR_SHIFT;
581}
582
583static int get_work_color(struct work_struct *work)
584{
585 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
586 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
587}
588
589static int work_next_color(int color)
590{
591 return (color + 1) % WORK_NR_COLORS;
592}
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614static inline void set_work_data(struct work_struct *work, unsigned long data,
615 unsigned long flags)
616{
617 WARN_ON_ONCE(!work_pending(work));
618 atomic_long_set(&work->data, data | flags | work_static(work));
619}
620
621static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
622 unsigned long extra_flags)
623{
624 set_work_data(work, (unsigned long)pwq,
625 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
626}
627
628static void set_work_pool_and_keep_pending(struct work_struct *work,
629 int pool_id)
630{
631 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
632 WORK_STRUCT_PENDING);
633}
634
635static void set_work_pool_and_clear_pending(struct work_struct *work,
636 int pool_id)
637{
638
639
640
641
642
643
644 smp_wmb();
645 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674 smp_mb();
675}
676
677static void clear_work_data(struct work_struct *work)
678{
679 smp_wmb();
680 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
681}
682
683static struct pool_workqueue *get_work_pwq(struct work_struct *work)
684{
685 unsigned long data = atomic_long_read(&work->data);
686
687 if (data & WORK_STRUCT_PWQ)
688 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
689 else
690 return NULL;
691}
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708static struct worker_pool *get_work_pool(struct work_struct *work)
709{
710 unsigned long data = atomic_long_read(&work->data);
711 int pool_id;
712
713 assert_rcu_or_pool_mutex();
714
715 if (data & WORK_STRUCT_PWQ)
716 return ((struct pool_workqueue *)
717 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
718
719 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
720 if (pool_id == WORK_OFFQ_POOL_NONE)
721 return NULL;
722
723 return idr_find(&worker_pool_idr, pool_id);
724}
725
726
727
728
729
730
731
732
733static int get_work_pool_id(struct work_struct *work)
734{
735 unsigned long data = atomic_long_read(&work->data);
736
737 if (data & WORK_STRUCT_PWQ)
738 return ((struct pool_workqueue *)
739 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
740
741 return data >> WORK_OFFQ_POOL_SHIFT;
742}
743
744static void mark_work_canceling(struct work_struct *work)
745{
746 unsigned long pool_id = get_work_pool_id(work);
747
748 pool_id <<= WORK_OFFQ_POOL_SHIFT;
749 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
750}
751
752static bool work_is_canceling(struct work_struct *work)
753{
754 unsigned long data = atomic_long_read(&work->data);
755
756 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
757}
758
759
760
761
762
763
764
765static bool __need_more_worker(struct worker_pool *pool)
766{
767 return !atomic_read(&pool->nr_running);
768}
769
770
771
772
773
774
775
776
777
778static bool need_more_worker(struct worker_pool *pool)
779{
780 return !list_empty(&pool->worklist) && __need_more_worker(pool);
781}
782
783
784static bool may_start_working(struct worker_pool *pool)
785{
786 return pool->nr_idle;
787}
788
789
790static bool keep_working(struct worker_pool *pool)
791{
792 return !list_empty(&pool->worklist) &&
793 atomic_read(&pool->nr_running) <= 1;
794}
795
796
797static bool need_to_create_worker(struct worker_pool *pool)
798{
799 return need_more_worker(pool) && !may_start_working(pool);
800}
801
802
803static bool too_many_workers(struct worker_pool *pool)
804{
805 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
806 int nr_idle = pool->nr_idle + managing;
807 int nr_busy = pool->nr_workers - nr_idle;
808
809 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
810}
811
812
813
814
815
816
817static struct worker *first_idle_worker(struct worker_pool *pool)
818{
819 if (unlikely(list_empty(&pool->idle_list)))
820 return NULL;
821
822 return list_first_entry(&pool->idle_list, struct worker, entry);
823}
824
825
826
827
828
829
830
831
832
833
834static void wake_up_worker(struct worker_pool *pool)
835{
836 struct worker *worker = first_idle_worker(pool);
837
838 if (likely(worker))
839 wake_up_process(worker->task);
840}
841
842
843
844
845
846
847
848
849
850
851
852
853void wq_worker_waking_up(struct task_struct *task, int cpu)
854{
855 struct worker *worker = kthread_data(task);
856
857 if (!(worker->flags & WORKER_NOT_RUNNING)) {
858 WARN_ON_ONCE(worker->pool->cpu != cpu);
859 atomic_inc(&worker->pool->nr_running);
860 }
861}
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877struct task_struct *wq_worker_sleeping(struct task_struct *task)
878{
879 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
880 struct worker_pool *pool;
881
882
883
884
885
886
887 if (worker->flags & WORKER_NOT_RUNNING)
888 return NULL;
889
890 pool = worker->pool;
891
892
893 if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
894 return NULL;
895
896
897
898
899
900
901
902
903
904
905
906
907 if (atomic_dec_and_test(&pool->nr_running) &&
908 !list_empty(&pool->worklist))
909 to_wakeup = first_idle_worker(pool);
910 return to_wakeup ? to_wakeup->task : NULL;
911}
912
913
914
915
916
917
918
919
920
921
922
923static inline void worker_set_flags(struct worker *worker, unsigned int flags)
924{
925 struct worker_pool *pool = worker->pool;
926
927 WARN_ON_ONCE(worker->task != current);
928
929
930 if ((flags & WORKER_NOT_RUNNING) &&
931 !(worker->flags & WORKER_NOT_RUNNING)) {
932 atomic_dec(&pool->nr_running);
933 }
934
935 worker->flags |= flags;
936}
937
938
939
940
941
942
943
944
945
946
947
948static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
949{
950 struct worker_pool *pool = worker->pool;
951 unsigned int oflags = worker->flags;
952
953 WARN_ON_ONCE(worker->task != current);
954
955 worker->flags &= ~flags;
956
957
958
959
960
961
962 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
963 if (!(worker->flags & WORKER_NOT_RUNNING))
964 atomic_inc(&pool->nr_running);
965}
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000static struct worker *find_worker_executing_work(struct worker_pool *pool,
1001 struct work_struct *work)
1002{
1003 struct worker *worker;
1004
1005 hash_for_each_possible(pool->busy_hash, worker, hentry,
1006 (unsigned long)work)
1007 if (worker->current_work == work &&
1008 worker->current_func == work->func)
1009 return worker;
1010
1011 return NULL;
1012}
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031static void move_linked_works(struct work_struct *work, struct list_head *head,
1032 struct work_struct **nextp)
1033{
1034 struct work_struct *n;
1035
1036
1037
1038
1039
1040 list_for_each_entry_safe_from(work, n, NULL, entry) {
1041 list_move_tail(&work->entry, head);
1042 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1043 break;
1044 }
1045
1046
1047
1048
1049
1050
1051 if (nextp)
1052 *nextp = n;
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062static void get_pwq(struct pool_workqueue *pwq)
1063{
1064 lockdep_assert_held(&pwq->pool->lock);
1065 WARN_ON_ONCE(pwq->refcnt <= 0);
1066 pwq->refcnt++;
1067}
1068
1069
1070
1071
1072
1073
1074
1075
1076static void put_pwq(struct pool_workqueue *pwq)
1077{
1078 lockdep_assert_held(&pwq->pool->lock);
1079 if (likely(--pwq->refcnt))
1080 return;
1081 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1082 return;
1083
1084
1085
1086
1087
1088
1089
1090
1091 schedule_work(&pwq->unbound_release_work);
1092}
1093
1094
1095
1096
1097
1098
1099
1100static void put_pwq_unlocked(struct pool_workqueue *pwq)
1101{
1102 if (pwq) {
1103
1104
1105
1106
1107 spin_lock_irq(&pwq->pool->lock);
1108 put_pwq(pwq);
1109 spin_unlock_irq(&pwq->pool->lock);
1110 }
1111}
1112
1113static void pwq_activate_delayed_work(struct work_struct *work)
1114{
1115 struct pool_workqueue *pwq = get_work_pwq(work);
1116
1117 trace_workqueue_activate_work(work);
1118 if (list_empty(&pwq->pool->worklist))
1119 pwq->pool->watchdog_ts = jiffies;
1120 move_linked_works(work, &pwq->pool->worklist, NULL);
1121 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1122 pwq->nr_active++;
1123}
1124
1125static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1126{
1127 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1128 struct work_struct, entry);
1129
1130 pwq_activate_delayed_work(work);
1131}
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1145{
1146
1147 if (color == WORK_NO_COLOR)
1148 goto out_put;
1149
1150 pwq->nr_in_flight[color]--;
1151
1152 pwq->nr_active--;
1153 if (!list_empty(&pwq->delayed_works)) {
1154
1155 if (pwq->nr_active < pwq->max_active)
1156 pwq_activate_first_delayed(pwq);
1157 }
1158
1159
1160 if (likely(pwq->flush_color != color))
1161 goto out_put;
1162
1163
1164 if (pwq->nr_in_flight[color])
1165 goto out_put;
1166
1167
1168 pwq->flush_color = -1;
1169
1170
1171
1172
1173
1174 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1175 complete(&pwq->wq->first_flusher->done);
1176out_put:
1177 put_pwq(pwq);
1178}
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1208 unsigned long *flags)
1209{
1210 struct worker_pool *pool;
1211 struct pool_workqueue *pwq;
1212
1213 local_irq_save(*flags);
1214
1215
1216 if (is_dwork) {
1217 struct delayed_work *dwork = to_delayed_work(work);
1218
1219
1220
1221
1222
1223
1224 if (likely(del_timer(&dwork->timer)))
1225 return 1;
1226 }
1227
1228
1229 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1230 return 0;
1231
1232
1233
1234
1235
1236 pool = get_work_pool(work);
1237 if (!pool)
1238 goto fail;
1239
1240 spin_lock(&pool->lock);
1241
1242
1243
1244
1245
1246
1247
1248
1249 pwq = get_work_pwq(work);
1250 if (pwq && pwq->pool == pool) {
1251 debug_work_deactivate(work);
1252
1253
1254
1255
1256
1257
1258
1259
1260 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1261 pwq_activate_delayed_work(work);
1262
1263 list_del_init(&work->entry);
1264 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1265
1266
1267 set_work_pool_and_keep_pending(work, pool->id);
1268
1269 spin_unlock(&pool->lock);
1270 return 1;
1271 }
1272 spin_unlock(&pool->lock);
1273fail:
1274 local_irq_restore(*flags);
1275 if (work_is_canceling(work))
1276 return -ENOENT;
1277 cpu_relax();
1278 return -EAGAIN;
1279}
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1295 struct list_head *head, unsigned int extra_flags)
1296{
1297 struct worker_pool *pool = pwq->pool;
1298
1299
1300 set_work_pwq(work, pwq, extra_flags);
1301 list_add_tail(&work->entry, head);
1302 get_pwq(pwq);
1303
1304
1305
1306
1307
1308
1309 smp_mb();
1310
1311 if (__need_more_worker(pool))
1312 wake_up_worker(pool);
1313}
1314
1315
1316
1317
1318
1319static bool is_chained_work(struct workqueue_struct *wq)
1320{
1321 struct worker *worker;
1322
1323 worker = current_wq_worker();
1324
1325
1326
1327
1328 return worker && worker->current_pwq->wq == wq;
1329}
1330
1331
1332
1333
1334
1335
1336static int wq_select_unbound_cpu(int cpu)
1337{
1338 static bool printed_dbg_warning;
1339 int new_cpu;
1340
1341 if (likely(!wq_debug_force_rr_cpu)) {
1342 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1343 return cpu;
1344 } else if (!printed_dbg_warning) {
1345 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1346 printed_dbg_warning = true;
1347 }
1348
1349 if (cpumask_empty(wq_unbound_cpumask))
1350 return cpu;
1351
1352 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1353 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1354 if (unlikely(new_cpu >= nr_cpu_ids)) {
1355 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1356 if (unlikely(new_cpu >= nr_cpu_ids))
1357 return cpu;
1358 }
1359 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1360
1361 return new_cpu;
1362}
1363
1364static void __queue_work(int cpu, struct workqueue_struct *wq,
1365 struct work_struct *work)
1366{
1367 struct pool_workqueue *pwq;
1368 struct worker_pool *last_pool;
1369 struct list_head *worklist;
1370 unsigned int work_flags;
1371 unsigned int req_cpu = cpu;
1372
1373
1374
1375
1376
1377
1378
1379 WARN_ON_ONCE(!irqs_disabled());
1380
1381 debug_work_activate(work);
1382
1383
1384 if (unlikely(wq->flags & __WQ_DRAINING) &&
1385 WARN_ON_ONCE(!is_chained_work(wq)))
1386 return;
1387retry:
1388 if (req_cpu == WORK_CPU_UNBOUND)
1389 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1390
1391
1392 if (!(wq->flags & WQ_UNBOUND))
1393 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1394 else
1395 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1396
1397
1398
1399
1400
1401
1402 last_pool = get_work_pool(work);
1403 if (last_pool && last_pool != pwq->pool) {
1404 struct worker *worker;
1405
1406 spin_lock(&last_pool->lock);
1407
1408 worker = find_worker_executing_work(last_pool, work);
1409
1410 if (worker && worker->current_pwq->wq == wq) {
1411 pwq = worker->current_pwq;
1412 } else {
1413
1414 spin_unlock(&last_pool->lock);
1415 spin_lock(&pwq->pool->lock);
1416 }
1417 } else {
1418 spin_lock(&pwq->pool->lock);
1419 }
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429 if (unlikely(!pwq->refcnt)) {
1430 if (wq->flags & WQ_UNBOUND) {
1431 spin_unlock(&pwq->pool->lock);
1432 cpu_relax();
1433 goto retry;
1434 }
1435
1436 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1437 wq->name, cpu);
1438 }
1439
1440
1441 trace_workqueue_queue_work(req_cpu, pwq, work);
1442
1443 if (WARN_ON(!list_empty(&work->entry))) {
1444 spin_unlock(&pwq->pool->lock);
1445 return;
1446 }
1447
1448 pwq->nr_in_flight[pwq->work_color]++;
1449 work_flags = work_color_to_flags(pwq->work_color);
1450
1451 if (likely(pwq->nr_active < pwq->max_active)) {
1452 trace_workqueue_activate_work(work);
1453 pwq->nr_active++;
1454 worklist = &pwq->pool->worklist;
1455 if (list_empty(worklist))
1456 pwq->pool->watchdog_ts = jiffies;
1457 } else {
1458 work_flags |= WORK_STRUCT_DELAYED;
1459 worklist = &pwq->delayed_works;
1460 }
1461
1462 insert_work(pwq, work, worklist, work_flags);
1463
1464 spin_unlock(&pwq->pool->lock);
1465}
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478bool queue_work_on(int cpu, struct workqueue_struct *wq,
1479 struct work_struct *work)
1480{
1481 bool ret = false;
1482 unsigned long flags;
1483
1484 local_irq_save(flags);
1485
1486 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1487 __queue_work(cpu, wq, work);
1488 ret = true;
1489 }
1490
1491 local_irq_restore(flags);
1492 return ret;
1493}
1494EXPORT_SYMBOL(queue_work_on);
1495
1496void delayed_work_timer_fn(unsigned long __data)
1497{
1498 struct delayed_work *dwork = (struct delayed_work *)__data;
1499
1500
1501 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1502}
1503EXPORT_SYMBOL(delayed_work_timer_fn);
1504
1505static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1506 struct delayed_work *dwork, unsigned long delay)
1507{
1508 struct timer_list *timer = &dwork->timer;
1509 struct work_struct *work = &dwork->work;
1510
1511 WARN_ON_ONCE(!wq);
1512 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1513 timer->data != (unsigned long)dwork);
1514 WARN_ON_ONCE(timer_pending(timer));
1515 WARN_ON_ONCE(!list_empty(&work->entry));
1516
1517
1518
1519
1520
1521
1522
1523 if (!delay) {
1524 __queue_work(cpu, wq, &dwork->work);
1525 return;
1526 }
1527
1528 dwork->wq = wq;
1529 dwork->cpu = cpu;
1530 timer->expires = jiffies + delay;
1531
1532 if (unlikely(cpu != WORK_CPU_UNBOUND))
1533 add_timer_on(timer, cpu);
1534 else
1535 add_timer(timer);
1536}
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1550 struct delayed_work *dwork, unsigned long delay)
1551{
1552 struct work_struct *work = &dwork->work;
1553 bool ret = false;
1554 unsigned long flags;
1555
1556
1557 local_irq_save(flags);
1558
1559 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1560 __queue_delayed_work(cpu, wq, dwork, delay);
1561 ret = true;
1562 }
1563
1564 local_irq_restore(flags);
1565 return ret;
1566}
1567EXPORT_SYMBOL(queue_delayed_work_on);
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1588 struct delayed_work *dwork, unsigned long delay)
1589{
1590 unsigned long flags;
1591 int ret;
1592
1593 do {
1594 ret = try_to_grab_pending(&dwork->work, true, &flags);
1595 } while (unlikely(ret == -EAGAIN));
1596
1597 if (likely(ret >= 0)) {
1598 __queue_delayed_work(cpu, wq, dwork, delay);
1599 local_irq_restore(flags);
1600 }
1601
1602
1603 return ret;
1604}
1605EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617static void worker_enter_idle(struct worker *worker)
1618{
1619 struct worker_pool *pool = worker->pool;
1620
1621 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1622 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1623 (worker->hentry.next || worker->hentry.pprev)))
1624 return;
1625
1626
1627 worker->flags |= WORKER_IDLE;
1628 pool->nr_idle++;
1629 worker->last_active = jiffies;
1630
1631
1632 list_add(&worker->entry, &pool->idle_list);
1633
1634 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1635 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1636
1637
1638
1639
1640
1641
1642
1643 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1644 pool->nr_workers == pool->nr_idle &&
1645 atomic_read(&pool->nr_running));
1646}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657static void worker_leave_idle(struct worker *worker)
1658{
1659 struct worker_pool *pool = worker->pool;
1660
1661 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1662 return;
1663 worker_clr_flags(worker, WORKER_IDLE);
1664 pool->nr_idle--;
1665 list_del_init(&worker->entry);
1666}
1667
1668static struct worker *alloc_worker(int node)
1669{
1670 struct worker *worker;
1671
1672 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1673 if (worker) {
1674 INIT_LIST_HEAD(&worker->entry);
1675 INIT_LIST_HEAD(&worker->scheduled);
1676 INIT_LIST_HEAD(&worker->node);
1677
1678 worker->flags = WORKER_PREP;
1679 }
1680 return worker;
1681}
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692static void worker_attach_to_pool(struct worker *worker,
1693 struct worker_pool *pool)
1694{
1695 mutex_lock(&pool->attach_mutex);
1696
1697
1698
1699
1700
1701 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1702
1703
1704
1705
1706
1707
1708 if (pool->flags & POOL_DISASSOCIATED)
1709 worker->flags |= WORKER_UNBOUND;
1710
1711 list_add_tail(&worker->node, &pool->workers);
1712
1713 mutex_unlock(&pool->attach_mutex);
1714}
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725static void worker_detach_from_pool(struct worker *worker,
1726 struct worker_pool *pool)
1727{
1728 struct completion *detach_completion = NULL;
1729
1730 mutex_lock(&pool->attach_mutex);
1731 list_del(&worker->node);
1732 if (list_empty(&pool->workers))
1733 detach_completion = pool->detach_completion;
1734 mutex_unlock(&pool->attach_mutex);
1735
1736
1737 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1738
1739 if (detach_completion)
1740 complete(detach_completion);
1741}
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755static struct worker *create_worker(struct worker_pool *pool)
1756{
1757 struct worker *worker = NULL;
1758 int id = -1;
1759 char id_buf[16];
1760
1761
1762 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1763 if (id < 0)
1764 goto fail;
1765
1766 worker = alloc_worker(pool->node);
1767 if (!worker)
1768 goto fail;
1769
1770 worker->pool = pool;
1771 worker->id = id;
1772
1773 if (pool->cpu >= 0)
1774 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1775 pool->attrs->nice < 0 ? "H" : "");
1776 else
1777 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1778
1779 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1780 "kworker/%s", id_buf);
1781 if (IS_ERR(worker->task))
1782 goto fail;
1783
1784 set_user_nice(worker->task, pool->attrs->nice);
1785 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1786
1787
1788 worker_attach_to_pool(worker, pool);
1789
1790
1791 spin_lock_irq(&pool->lock);
1792 worker->pool->nr_workers++;
1793 worker_enter_idle(worker);
1794 wake_up_process(worker->task);
1795 spin_unlock_irq(&pool->lock);
1796
1797 return worker;
1798
1799fail:
1800 if (id >= 0)
1801 ida_simple_remove(&pool->worker_ida, id);
1802 kfree(worker);
1803 return NULL;
1804}
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816static void destroy_worker(struct worker *worker)
1817{
1818 struct worker_pool *pool = worker->pool;
1819
1820 lockdep_assert_held(&pool->lock);
1821
1822
1823 if (WARN_ON(worker->current_work) ||
1824 WARN_ON(!list_empty(&worker->scheduled)) ||
1825 WARN_ON(!(worker->flags & WORKER_IDLE)))
1826 return;
1827
1828 pool->nr_workers--;
1829 pool->nr_idle--;
1830
1831 list_del_init(&worker->entry);
1832 worker->flags |= WORKER_DIE;
1833 wake_up_process(worker->task);
1834}
1835
1836static void idle_worker_timeout(unsigned long __pool)
1837{
1838 struct worker_pool *pool = (void *)__pool;
1839
1840 spin_lock_irq(&pool->lock);
1841
1842 while (too_many_workers(pool)) {
1843 struct worker *worker;
1844 unsigned long expires;
1845
1846
1847 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1848 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1849
1850 if (time_before(jiffies, expires)) {
1851 mod_timer(&pool->idle_timer, expires);
1852 break;
1853 }
1854
1855 destroy_worker(worker);
1856 }
1857
1858 spin_unlock_irq(&pool->lock);
1859}
1860
1861static void send_mayday(struct work_struct *work)
1862{
1863 struct pool_workqueue *pwq = get_work_pwq(work);
1864 struct workqueue_struct *wq = pwq->wq;
1865
1866 lockdep_assert_held(&wq_mayday_lock);
1867
1868 if (!wq->rescuer)
1869 return;
1870
1871
1872 if (list_empty(&pwq->mayday_node)) {
1873
1874
1875
1876
1877
1878 get_pwq(pwq);
1879 list_add_tail(&pwq->mayday_node, &wq->maydays);
1880 wake_up_process(wq->rescuer->task);
1881 }
1882}
1883
1884static void pool_mayday_timeout(unsigned long __pool)
1885{
1886 struct worker_pool *pool = (void *)__pool;
1887 struct work_struct *work;
1888
1889 spin_lock_irq(&pool->lock);
1890 spin_lock(&wq_mayday_lock);
1891
1892 if (need_to_create_worker(pool)) {
1893
1894
1895
1896
1897
1898
1899 list_for_each_entry(work, &pool->worklist, entry)
1900 send_mayday(work);
1901 }
1902
1903 spin_unlock(&wq_mayday_lock);
1904 spin_unlock_irq(&pool->lock);
1905
1906 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1907}
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927static void maybe_create_worker(struct worker_pool *pool)
1928__releases(&pool->lock)
1929__acquires(&pool->lock)
1930{
1931restart:
1932 spin_unlock_irq(&pool->lock);
1933
1934
1935 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1936
1937 while (true) {
1938 if (create_worker(pool) || !need_to_create_worker(pool))
1939 break;
1940
1941 schedule_timeout_interruptible(CREATE_COOLDOWN);
1942
1943 if (!need_to_create_worker(pool))
1944 break;
1945 }
1946
1947 del_timer_sync(&pool->mayday_timer);
1948 spin_lock_irq(&pool->lock);
1949
1950
1951
1952
1953
1954 if (need_to_create_worker(pool))
1955 goto restart;
1956}
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980static bool manage_workers(struct worker *worker)
1981{
1982 struct worker_pool *pool = worker->pool;
1983
1984 if (pool->flags & POOL_MANAGER_ACTIVE)
1985 return false;
1986
1987 pool->flags |= POOL_MANAGER_ACTIVE;
1988 pool->manager = worker;
1989
1990 maybe_create_worker(pool);
1991
1992 pool->manager = NULL;
1993 pool->flags &= ~POOL_MANAGER_ACTIVE;
1994 wake_up(&wq_manager_wait);
1995 return true;
1996}
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012static void process_one_work(struct worker *worker, struct work_struct *work)
2013__releases(&pool->lock)
2014__acquires(&pool->lock)
2015{
2016 struct pool_workqueue *pwq = get_work_pwq(work);
2017 struct worker_pool *pool = worker->pool;
2018 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2019 int work_color;
2020 struct worker *collision;
2021#ifdef CONFIG_LOCKDEP
2022
2023
2024
2025
2026
2027
2028
2029 struct lockdep_map lockdep_map;
2030
2031 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2032#endif
2033
2034 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2035 raw_smp_processor_id() != pool->cpu);
2036
2037
2038
2039
2040
2041
2042
2043 collision = find_worker_executing_work(pool, work);
2044 if (unlikely(collision)) {
2045 move_linked_works(work, &collision->scheduled, NULL);
2046 return;
2047 }
2048
2049
2050 debug_work_deactivate(work);
2051 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2052 worker->current_work = work;
2053 worker->current_func = work->func;
2054 worker->current_pwq = pwq;
2055 work_color = get_work_color(work);
2056
2057 list_del_init(&work->entry);
2058
2059
2060
2061
2062
2063
2064
2065 if (unlikely(cpu_intensive))
2066 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2067
2068
2069
2070
2071
2072
2073
2074
2075 if (need_more_worker(pool))
2076 wake_up_worker(pool);
2077
2078
2079
2080
2081
2082
2083
2084 set_work_pool_and_clear_pending(work, pool->id);
2085
2086 spin_unlock_irq(&pool->lock);
2087
2088 lock_map_acquire(&pwq->wq->lockdep_map);
2089 lock_map_acquire(&lockdep_map);
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111 lockdep_invariant_state(true);
2112 trace_workqueue_execute_start(work);
2113 worker->current_func(work);
2114
2115
2116
2117
2118 trace_workqueue_execute_end(work);
2119 lock_map_release(&lockdep_map);
2120 lock_map_release(&pwq->wq->lockdep_map);
2121
2122 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2123 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2124 " last function: %pf\n",
2125 current->comm, preempt_count(), task_pid_nr(current),
2126 worker->current_func);
2127 debug_show_held_locks(current);
2128 dump_stack();
2129 }
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139 cond_resched_rcu_qs();
2140
2141 spin_lock_irq(&pool->lock);
2142
2143
2144 if (unlikely(cpu_intensive))
2145 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2146
2147
2148 hash_del(&worker->hentry);
2149 worker->current_work = NULL;
2150 worker->current_func = NULL;
2151 worker->current_pwq = NULL;
2152 worker->desc_valid = false;
2153 pwq_dec_nr_in_flight(pwq, work_color);
2154}
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168static void process_scheduled_works(struct worker *worker)
2169{
2170 while (!list_empty(&worker->scheduled)) {
2171 struct work_struct *work = list_first_entry(&worker->scheduled,
2172 struct work_struct, entry);
2173 process_one_work(worker, work);
2174 }
2175}
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189static int worker_thread(void *__worker)
2190{
2191 struct worker *worker = __worker;
2192 struct worker_pool *pool = worker->pool;
2193
2194
2195 worker->task->flags |= PF_WQ_WORKER;
2196woke_up:
2197 spin_lock_irq(&pool->lock);
2198
2199
2200 if (unlikely(worker->flags & WORKER_DIE)) {
2201 spin_unlock_irq(&pool->lock);
2202 WARN_ON_ONCE(!list_empty(&worker->entry));
2203 worker->task->flags &= ~PF_WQ_WORKER;
2204
2205 set_task_comm(worker->task, "kworker/dying");
2206 ida_simple_remove(&pool->worker_ida, worker->id);
2207 worker_detach_from_pool(worker, pool);
2208 kfree(worker);
2209 return 0;
2210 }
2211
2212 worker_leave_idle(worker);
2213recheck:
2214
2215 if (!need_more_worker(pool))
2216 goto sleep;
2217
2218
2219 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2220 goto recheck;
2221
2222
2223
2224
2225
2226
2227 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2228
2229
2230
2231
2232
2233
2234
2235
2236 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2237
2238 do {
2239 struct work_struct *work =
2240 list_first_entry(&pool->worklist,
2241 struct work_struct, entry);
2242
2243 pool->watchdog_ts = jiffies;
2244
2245 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2246
2247 process_one_work(worker, work);
2248 if (unlikely(!list_empty(&worker->scheduled)))
2249 process_scheduled_works(worker);
2250 } else {
2251 move_linked_works(work, &worker->scheduled, NULL);
2252 process_scheduled_works(worker);
2253 }
2254 } while (keep_working(pool));
2255
2256 worker_set_flags(worker, WORKER_PREP);
2257sleep:
2258
2259
2260
2261
2262
2263
2264
2265 worker_enter_idle(worker);
2266 __set_current_state(TASK_IDLE);
2267 spin_unlock_irq(&pool->lock);
2268 schedule();
2269 goto woke_up;
2270}
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293static int rescuer_thread(void *__rescuer)
2294{
2295 struct worker *rescuer = __rescuer;
2296 struct workqueue_struct *wq = rescuer->rescue_wq;
2297 struct list_head *scheduled = &rescuer->scheduled;
2298 bool should_stop;
2299
2300 set_user_nice(current, RESCUER_NICE_LEVEL);
2301
2302
2303
2304
2305
2306 rescuer->task->flags |= PF_WQ_WORKER;
2307repeat:
2308 set_current_state(TASK_IDLE);
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318 should_stop = kthread_should_stop();
2319
2320
2321 spin_lock_irq(&wq_mayday_lock);
2322
2323 while (!list_empty(&wq->maydays)) {
2324 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2325 struct pool_workqueue, mayday_node);
2326 struct worker_pool *pool = pwq->pool;
2327 struct work_struct *work, *n;
2328 bool first = true;
2329
2330 __set_current_state(TASK_RUNNING);
2331 list_del_init(&pwq->mayday_node);
2332
2333 spin_unlock_irq(&wq_mayday_lock);
2334
2335 worker_attach_to_pool(rescuer, pool);
2336
2337 spin_lock_irq(&pool->lock);
2338 rescuer->pool = pool;
2339
2340
2341
2342
2343
2344 WARN_ON_ONCE(!list_empty(scheduled));
2345 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2346 if (get_work_pwq(work) == pwq) {
2347 if (first)
2348 pool->watchdog_ts = jiffies;
2349 move_linked_works(work, scheduled, &n);
2350 }
2351 first = false;
2352 }
2353
2354 if (!list_empty(scheduled)) {
2355 process_scheduled_works(rescuer);
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366 if (need_to_create_worker(pool)) {
2367 spin_lock(&wq_mayday_lock);
2368 get_pwq(pwq);
2369 list_move_tail(&pwq->mayday_node, &wq->maydays);
2370 spin_unlock(&wq_mayday_lock);
2371 }
2372 }
2373
2374
2375
2376
2377
2378 put_pwq(pwq);
2379
2380
2381
2382
2383
2384
2385 if (need_more_worker(pool))
2386 wake_up_worker(pool);
2387
2388 rescuer->pool = NULL;
2389 spin_unlock_irq(&pool->lock);
2390
2391 worker_detach_from_pool(rescuer, pool);
2392
2393 spin_lock_irq(&wq_mayday_lock);
2394 }
2395
2396 spin_unlock_irq(&wq_mayday_lock);
2397
2398 if (should_stop) {
2399 __set_current_state(TASK_RUNNING);
2400 rescuer->task->flags &= ~PF_WQ_WORKER;
2401 return 0;
2402 }
2403
2404
2405 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2406 schedule();
2407 goto repeat;
2408}
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421static void check_flush_dependency(struct workqueue_struct *target_wq,
2422 struct work_struct *target_work)
2423{
2424 work_func_t target_func = target_work ? target_work->func : NULL;
2425 struct worker *worker;
2426
2427 if (target_wq->flags & WQ_MEM_RECLAIM)
2428 return;
2429
2430 worker = current_wq_worker();
2431
2432 WARN_ONCE(current->flags & PF_MEMALLOC,
2433 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2434 current->pid, current->comm, target_wq->name, target_func);
2435 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2436 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2437 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2438 worker->current_pwq->wq->name, worker->current_func,
2439 target_wq->name, target_func);
2440}
2441
2442struct wq_barrier {
2443 struct work_struct work;
2444 struct completion done;
2445 struct task_struct *task;
2446};
2447
2448static void wq_barrier_func(struct work_struct *work)
2449{
2450 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2451 complete(&barr->done);
2452}
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478static void insert_wq_barrier(struct pool_workqueue *pwq,
2479 struct wq_barrier *barr,
2480 struct work_struct *target, struct worker *worker)
2481{
2482 struct list_head *head;
2483 unsigned int linked = 0;
2484
2485
2486
2487
2488
2489
2490
2491 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2492 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2493
2494
2495
2496
2497
2498
2499 lockdep_init_map_crosslock((struct lockdep_map *)&barr->done.map,
2500 "(complete)wq_barr::done",
2501 target->lockdep_map.key, 1);
2502 __init_completion(&barr->done);
2503 barr->task = current;
2504
2505
2506
2507
2508
2509 if (worker)
2510 head = worker->scheduled.next;
2511 else {
2512 unsigned long *bits = work_data_bits(target);
2513
2514 head = target->entry.next;
2515
2516 linked = *bits & WORK_STRUCT_LINKED;
2517 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2518 }
2519
2520 debug_work_activate(&barr->work);
2521 insert_work(pwq, &barr->work, head,
2522 work_color_to_flags(WORK_NO_COLOR) | linked);
2523}
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2557 int flush_color, int work_color)
2558{
2559 bool wait = false;
2560 struct pool_workqueue *pwq;
2561
2562 if (flush_color >= 0) {
2563 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2564 atomic_set(&wq->nr_pwqs_to_flush, 1);
2565 }
2566
2567 for_each_pwq(pwq, wq) {
2568 struct worker_pool *pool = pwq->pool;
2569
2570 spin_lock_irq(&pool->lock);
2571
2572 if (flush_color >= 0) {
2573 WARN_ON_ONCE(pwq->flush_color != -1);
2574
2575 if (pwq->nr_in_flight[flush_color]) {
2576 pwq->flush_color = flush_color;
2577 atomic_inc(&wq->nr_pwqs_to_flush);
2578 wait = true;
2579 }
2580 }
2581
2582 if (work_color >= 0) {
2583 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2584 pwq->work_color = work_color;
2585 }
2586
2587 spin_unlock_irq(&pool->lock);
2588 }
2589
2590 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2591 complete(&wq->first_flusher->done);
2592
2593 return wait;
2594}
2595
2596
2597
2598
2599
2600
2601
2602
2603void flush_workqueue(struct workqueue_struct *wq)
2604{
2605 struct wq_flusher this_flusher = {
2606 .list = LIST_HEAD_INIT(this_flusher.list),
2607 .flush_color = -1,
2608 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2609 };
2610 int next_color;
2611
2612 if (WARN_ON(!wq_online))
2613 return;
2614
2615 lock_map_acquire(&wq->lockdep_map);
2616 lock_map_release(&wq->lockdep_map);
2617
2618 mutex_lock(&wq->mutex);
2619
2620
2621
2622
2623 next_color = work_next_color(wq->work_color);
2624
2625 if (next_color != wq->flush_color) {
2626
2627
2628
2629
2630
2631 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2632 this_flusher.flush_color = wq->work_color;
2633 wq->work_color = next_color;
2634
2635 if (!wq->first_flusher) {
2636
2637 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2638
2639 wq->first_flusher = &this_flusher;
2640
2641 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2642 wq->work_color)) {
2643
2644 wq->flush_color = next_color;
2645 wq->first_flusher = NULL;
2646 goto out_unlock;
2647 }
2648 } else {
2649
2650 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2651 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2652 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2653 }
2654 } else {
2655
2656
2657
2658
2659
2660 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2661 }
2662
2663 check_flush_dependency(wq, NULL);
2664
2665 mutex_unlock(&wq->mutex);
2666
2667 wait_for_completion(&this_flusher.done);
2668
2669
2670
2671
2672
2673
2674
2675 if (wq->first_flusher != &this_flusher)
2676 return;
2677
2678 mutex_lock(&wq->mutex);
2679
2680
2681 if (wq->first_flusher != &this_flusher)
2682 goto out_unlock;
2683
2684 wq->first_flusher = NULL;
2685
2686 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2687 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2688
2689 while (true) {
2690 struct wq_flusher *next, *tmp;
2691
2692
2693 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2694 if (next->flush_color != wq->flush_color)
2695 break;
2696 list_del_init(&next->list);
2697 complete(&next->done);
2698 }
2699
2700 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2701 wq->flush_color != work_next_color(wq->work_color));
2702
2703
2704 wq->flush_color = work_next_color(wq->flush_color);
2705
2706
2707 if (!list_empty(&wq->flusher_overflow)) {
2708
2709
2710
2711
2712
2713
2714 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2715 tmp->flush_color = wq->work_color;
2716
2717 wq->work_color = work_next_color(wq->work_color);
2718
2719 list_splice_tail_init(&wq->flusher_overflow,
2720 &wq->flusher_queue);
2721 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2722 }
2723
2724 if (list_empty(&wq->flusher_queue)) {
2725 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2726 break;
2727 }
2728
2729
2730
2731
2732
2733 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2734 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2735
2736 list_del_init(&next->list);
2737 wq->first_flusher = next;
2738
2739 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2740 break;
2741
2742
2743
2744
2745
2746 wq->first_flusher = NULL;
2747 }
2748
2749out_unlock:
2750 mutex_unlock(&wq->mutex);
2751}
2752EXPORT_SYMBOL(flush_workqueue);
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765void drain_workqueue(struct workqueue_struct *wq)
2766{
2767 unsigned int flush_cnt = 0;
2768 struct pool_workqueue *pwq;
2769
2770
2771
2772
2773
2774
2775 mutex_lock(&wq->mutex);
2776 if (!wq->nr_drainers++)
2777 wq->flags |= __WQ_DRAINING;
2778 mutex_unlock(&wq->mutex);
2779reflush:
2780 flush_workqueue(wq);
2781
2782 mutex_lock(&wq->mutex);
2783
2784 for_each_pwq(pwq, wq) {
2785 bool drained;
2786
2787 spin_lock_irq(&pwq->pool->lock);
2788 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2789 spin_unlock_irq(&pwq->pool->lock);
2790
2791 if (drained)
2792 continue;
2793
2794 if (++flush_cnt == 10 ||
2795 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2796 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2797 wq->name, flush_cnt);
2798
2799 mutex_unlock(&wq->mutex);
2800 goto reflush;
2801 }
2802
2803 if (!--wq->nr_drainers)
2804 wq->flags &= ~__WQ_DRAINING;
2805 mutex_unlock(&wq->mutex);
2806}
2807EXPORT_SYMBOL_GPL(drain_workqueue);
2808
2809static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2810{
2811 struct worker *worker = NULL;
2812 struct worker_pool *pool;
2813 struct pool_workqueue *pwq;
2814
2815 might_sleep();
2816
2817 local_irq_disable();
2818 pool = get_work_pool(work);
2819 if (!pool) {
2820 local_irq_enable();
2821 return false;
2822 }
2823
2824 spin_lock(&pool->lock);
2825
2826 pwq = get_work_pwq(work);
2827 if (pwq) {
2828 if (unlikely(pwq->pool != pool))
2829 goto already_gone;
2830 } else {
2831 worker = find_worker_executing_work(pool, work);
2832 if (!worker)
2833 goto already_gone;
2834 pwq = worker->current_pwq;
2835 }
2836
2837 check_flush_dependency(pwq->wq, work);
2838
2839 insert_wq_barrier(pwq, barr, work, worker);
2840 spin_unlock_irq(&pool->lock);
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
2852 lock_map_acquire(&pwq->wq->lockdep_map);
2853 lock_map_release(&pwq->wq->lockdep_map);
2854 }
2855
2856 return true;
2857already_gone:
2858 spin_unlock_irq(&pool->lock);
2859 return false;
2860}
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873bool flush_work(struct work_struct *work)
2874{
2875 struct wq_barrier barr;
2876
2877 if (WARN_ON(!wq_online))
2878 return false;
2879
2880 lock_map_acquire(&work->lockdep_map);
2881 lock_map_release(&work->lockdep_map);
2882
2883 if (start_flush_work(work, &barr)) {
2884 wait_for_completion(&barr.done);
2885 destroy_work_on_stack(&barr.work);
2886 return true;
2887 } else {
2888 return false;
2889 }
2890}
2891EXPORT_SYMBOL_GPL(flush_work);
2892
2893struct cwt_wait {
2894 wait_queue_entry_t wait;
2895 struct work_struct *work;
2896};
2897
2898static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
2899{
2900 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
2901
2902 if (cwait->work != key)
2903 return 0;
2904 return autoremove_wake_function(wait, mode, sync, key);
2905}
2906
2907static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2908{
2909 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
2910 unsigned long flags;
2911 int ret;
2912
2913 do {
2914 ret = try_to_grab_pending(work, is_dwork, &flags);
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931 if (unlikely(ret == -ENOENT)) {
2932 struct cwt_wait cwait;
2933
2934 init_wait(&cwait.wait);
2935 cwait.wait.func = cwt_wakefn;
2936 cwait.work = work;
2937
2938 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
2939 TASK_UNINTERRUPTIBLE);
2940 if (work_is_canceling(work))
2941 schedule();
2942 finish_wait(&cancel_waitq, &cwait.wait);
2943 }
2944 } while (unlikely(ret < 0));
2945
2946
2947 mark_work_canceling(work);
2948 local_irq_restore(flags);
2949
2950
2951
2952
2953
2954 if (wq_online)
2955 flush_work(work);
2956
2957 clear_work_data(work);
2958
2959
2960
2961
2962
2963
2964 smp_mb();
2965 if (waitqueue_active(&cancel_waitq))
2966 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
2967
2968 return ret;
2969}
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989bool cancel_work_sync(struct work_struct *work)
2990{
2991 return __cancel_work_timer(work, false);
2992}
2993EXPORT_SYMBOL_GPL(cancel_work_sync);
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007bool flush_delayed_work(struct delayed_work *dwork)
3008{
3009 local_irq_disable();
3010 if (del_timer_sync(&dwork->timer))
3011 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3012 local_irq_enable();
3013 return flush_work(&dwork->work);
3014}
3015EXPORT_SYMBOL(flush_delayed_work);
3016
3017static bool __cancel_work(struct work_struct *work, bool is_dwork)
3018{
3019 unsigned long flags;
3020 int ret;
3021
3022 do {
3023 ret = try_to_grab_pending(work, is_dwork, &flags);
3024 } while (unlikely(ret == -EAGAIN));
3025
3026 if (unlikely(ret < 0))
3027 return false;
3028
3029 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3030 local_irq_restore(flags);
3031 return ret;
3032}
3033
3034
3035
3036
3037bool cancel_work(struct work_struct *work)
3038{
3039 return __cancel_work(work, false);
3040}
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058bool cancel_delayed_work(struct delayed_work *dwork)
3059{
3060 return __cancel_work(&dwork->work, true);
3061}
3062EXPORT_SYMBOL(cancel_delayed_work);
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073bool cancel_delayed_work_sync(struct delayed_work *dwork)
3074{
3075 return __cancel_work_timer(&dwork->work, true);
3076}
3077EXPORT_SYMBOL(cancel_delayed_work_sync);
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090int schedule_on_each_cpu(work_func_t func)
3091{
3092 int cpu;
3093 struct work_struct __percpu *works;
3094
3095 works = alloc_percpu(struct work_struct);
3096 if (!works)
3097 return -ENOMEM;
3098
3099 get_online_cpus();
3100
3101 for_each_online_cpu(cpu) {
3102 struct work_struct *work = per_cpu_ptr(works, cpu);
3103
3104 INIT_WORK(work, func);
3105 schedule_work_on(cpu, work);
3106 }
3107
3108 for_each_online_cpu(cpu)
3109 flush_work(per_cpu_ptr(works, cpu));
3110
3111 put_online_cpus();
3112 free_percpu(works);
3113 return 0;
3114}
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3129{
3130 if (!in_interrupt()) {
3131 fn(&ew->work);
3132 return 0;
3133 }
3134
3135 INIT_WORK(&ew->work, fn);
3136 schedule_work(&ew->work);
3137
3138 return 1;
3139}
3140EXPORT_SYMBOL_GPL(execute_in_process_context);
3141
3142
3143
3144
3145
3146
3147
3148void free_workqueue_attrs(struct workqueue_attrs *attrs)
3149{
3150 if (attrs) {
3151 free_cpumask_var(attrs->cpumask);
3152 kfree(attrs);
3153 }
3154}
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3166{
3167 struct workqueue_attrs *attrs;
3168
3169 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3170 if (!attrs)
3171 goto fail;
3172 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3173 goto fail;
3174
3175 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3176 return attrs;
3177fail:
3178 free_workqueue_attrs(attrs);
3179 return NULL;
3180}
3181
3182static void copy_workqueue_attrs(struct workqueue_attrs *to,
3183 const struct workqueue_attrs *from)
3184{
3185 to->nice = from->nice;
3186 cpumask_copy(to->cpumask, from->cpumask);
3187
3188
3189
3190
3191
3192 to->no_numa = from->no_numa;
3193}
3194
3195
3196static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3197{
3198 u32 hash = 0;
3199
3200 hash = jhash_1word(attrs->nice, hash);
3201 hash = jhash(cpumask_bits(attrs->cpumask),
3202 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3203 return hash;
3204}
3205
3206
3207static bool wqattrs_equal(const struct workqueue_attrs *a,
3208 const struct workqueue_attrs *b)
3209{
3210 if (a->nice != b->nice)
3211 return false;
3212 if (!cpumask_equal(a->cpumask, b->cpumask))
3213 return false;
3214 return true;
3215}
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227static int init_worker_pool(struct worker_pool *pool)
3228{
3229 spin_lock_init(&pool->lock);
3230 pool->id = -1;
3231 pool->cpu = -1;
3232 pool->node = NUMA_NO_NODE;
3233 pool->flags |= POOL_DISASSOCIATED;
3234 pool->watchdog_ts = jiffies;
3235 INIT_LIST_HEAD(&pool->worklist);
3236 INIT_LIST_HEAD(&pool->idle_list);
3237 hash_init(pool->busy_hash);
3238
3239 setup_deferrable_timer(&pool->idle_timer, idle_worker_timeout,
3240 (unsigned long)pool);
3241
3242 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3243 (unsigned long)pool);
3244
3245 mutex_init(&pool->attach_mutex);
3246 INIT_LIST_HEAD(&pool->workers);
3247
3248 ida_init(&pool->worker_ida);
3249 INIT_HLIST_NODE(&pool->hash_node);
3250 pool->refcnt = 1;
3251
3252
3253 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3254 if (!pool->attrs)
3255 return -ENOMEM;
3256 return 0;
3257}
3258
3259static void rcu_free_wq(struct rcu_head *rcu)
3260{
3261 struct workqueue_struct *wq =
3262 container_of(rcu, struct workqueue_struct, rcu);
3263
3264 if (!(wq->flags & WQ_UNBOUND))
3265 free_percpu(wq->cpu_pwqs);
3266 else
3267 free_workqueue_attrs(wq->unbound_attrs);
3268
3269 kfree(wq->rescuer);
3270 kfree(wq);
3271}
3272
3273static void rcu_free_pool(struct rcu_head *rcu)
3274{
3275 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3276
3277 ida_destroy(&pool->worker_ida);
3278 free_workqueue_attrs(pool->attrs);
3279 kfree(pool);
3280}
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293static void put_unbound_pool(struct worker_pool *pool)
3294{
3295 DECLARE_COMPLETION_ONSTACK(detach_completion);
3296 struct worker *worker;
3297
3298 lockdep_assert_held(&wq_pool_mutex);
3299
3300 if (--pool->refcnt)
3301 return;
3302
3303
3304 if (WARN_ON(!(pool->cpu < 0)) ||
3305 WARN_ON(!list_empty(&pool->worklist)))
3306 return;
3307
3308
3309 if (pool->id >= 0)
3310 idr_remove(&worker_pool_idr, pool->id);
3311 hash_del(&pool->hash_node);
3312
3313
3314
3315
3316
3317
3318 spin_lock_irq(&pool->lock);
3319 wait_event_lock_irq(wq_manager_wait,
3320 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3321 pool->flags |= POOL_MANAGER_ACTIVE;
3322
3323 while ((worker = first_idle_worker(pool)))
3324 destroy_worker(worker);
3325 WARN_ON(pool->nr_workers || pool->nr_idle);
3326 spin_unlock_irq(&pool->lock);
3327
3328 mutex_lock(&pool->attach_mutex);
3329 if (!list_empty(&pool->workers))
3330 pool->detach_completion = &detach_completion;
3331 mutex_unlock(&pool->attach_mutex);
3332
3333 if (pool->detach_completion)
3334 wait_for_completion(pool->detach_completion);
3335
3336
3337 del_timer_sync(&pool->idle_timer);
3338 del_timer_sync(&pool->mayday_timer);
3339
3340
3341 call_rcu_sched(&pool->rcu, rcu_free_pool);
3342}
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3359{
3360 u32 hash = wqattrs_hash(attrs);
3361 struct worker_pool *pool;
3362 int node;
3363 int target_node = NUMA_NO_NODE;
3364
3365 lockdep_assert_held(&wq_pool_mutex);
3366
3367
3368 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3369 if (wqattrs_equal(pool->attrs, attrs)) {
3370 pool->refcnt++;
3371 return pool;
3372 }
3373 }
3374
3375
3376 if (wq_numa_enabled) {
3377 for_each_node(node) {
3378 if (cpumask_subset(attrs->cpumask,
3379 wq_numa_possible_cpumask[node])) {
3380 target_node = node;
3381 break;
3382 }
3383 }
3384 }
3385
3386
3387 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3388 if (!pool || init_worker_pool(pool) < 0)
3389 goto fail;
3390
3391 lockdep_set_subclass(&pool->lock, 1);
3392 copy_workqueue_attrs(pool->attrs, attrs);
3393 pool->node = target_node;
3394
3395
3396
3397
3398
3399 pool->attrs->no_numa = false;
3400
3401 if (worker_pool_assign_id(pool) < 0)
3402 goto fail;
3403
3404
3405 if (wq_online && !create_worker(pool))
3406 goto fail;
3407
3408
3409 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3410
3411 return pool;
3412fail:
3413 if (pool)
3414 put_unbound_pool(pool);
3415 return NULL;
3416}
3417
3418static void rcu_free_pwq(struct rcu_head *rcu)
3419{
3420 kmem_cache_free(pwq_cache,
3421 container_of(rcu, struct pool_workqueue, rcu));
3422}
3423
3424
3425
3426
3427
3428static void pwq_unbound_release_workfn(struct work_struct *work)
3429{
3430 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3431 unbound_release_work);
3432 struct workqueue_struct *wq = pwq->wq;
3433 struct worker_pool *pool = pwq->pool;
3434 bool is_last;
3435
3436 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3437 return;
3438
3439 mutex_lock(&wq->mutex);
3440 list_del_rcu(&pwq->pwqs_node);
3441 is_last = list_empty(&wq->pwqs);
3442 mutex_unlock(&wq->mutex);
3443
3444 mutex_lock(&wq_pool_mutex);
3445 put_unbound_pool(pool);
3446 mutex_unlock(&wq_pool_mutex);
3447
3448 call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3449
3450
3451
3452
3453
3454 if (is_last)
3455 call_rcu_sched(&wq->rcu, rcu_free_wq);
3456}
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3467{
3468 struct workqueue_struct *wq = pwq->wq;
3469 bool freezable = wq->flags & WQ_FREEZABLE;
3470 unsigned long flags;
3471
3472
3473 lockdep_assert_held(&wq->mutex);
3474
3475
3476 if (!freezable && pwq->max_active == wq->saved_max_active)
3477 return;
3478
3479
3480 spin_lock_irqsave(&pwq->pool->lock, flags);
3481
3482
3483
3484
3485
3486
3487 if (!freezable || !workqueue_freezing) {
3488 pwq->max_active = wq->saved_max_active;
3489
3490 while (!list_empty(&pwq->delayed_works) &&
3491 pwq->nr_active < pwq->max_active)
3492 pwq_activate_first_delayed(pwq);
3493
3494
3495
3496
3497
3498 wake_up_worker(pwq->pool);
3499 } else {
3500 pwq->max_active = 0;
3501 }
3502
3503 spin_unlock_irqrestore(&pwq->pool->lock, flags);
3504}
3505
3506
3507static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3508 struct worker_pool *pool)
3509{
3510 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3511
3512 memset(pwq, 0, sizeof(*pwq));
3513
3514 pwq->pool = pool;
3515 pwq->wq = wq;
3516 pwq->flush_color = -1;
3517 pwq->refcnt = 1;
3518 INIT_LIST_HEAD(&pwq->delayed_works);
3519 INIT_LIST_HEAD(&pwq->pwqs_node);
3520 INIT_LIST_HEAD(&pwq->mayday_node);
3521 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3522}
3523
3524
3525static void link_pwq(struct pool_workqueue *pwq)
3526{
3527 struct workqueue_struct *wq = pwq->wq;
3528
3529 lockdep_assert_held(&wq->mutex);
3530
3531
3532 if (!list_empty(&pwq->pwqs_node))
3533 return;
3534
3535
3536 pwq->work_color = wq->work_color;
3537
3538
3539 pwq_adjust_max_active(pwq);
3540
3541
3542 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3543}
3544
3545
3546static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3547 const struct workqueue_attrs *attrs)
3548{
3549 struct worker_pool *pool;
3550 struct pool_workqueue *pwq;
3551
3552 lockdep_assert_held(&wq_pool_mutex);
3553
3554 pool = get_unbound_pool(attrs);
3555 if (!pool)
3556 return NULL;
3557
3558 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3559 if (!pwq) {
3560 put_unbound_pool(pool);
3561 return NULL;
3562 }
3563
3564 init_pwq(pwq, wq, pool);
3565 return pwq;
3566}
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3591 int cpu_going_down, cpumask_t *cpumask)
3592{
3593 if (!wq_numa_enabled || attrs->no_numa)
3594 goto use_dfl;
3595
3596
3597 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3598 if (cpu_going_down >= 0)
3599 cpumask_clear_cpu(cpu_going_down, cpumask);
3600
3601 if (cpumask_empty(cpumask))
3602 goto use_dfl;
3603
3604
3605 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3606
3607 if (cpumask_empty(cpumask)) {
3608 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3609 "possible intersect\n");
3610 return false;
3611 }
3612
3613 return !cpumask_equal(cpumask, attrs->cpumask);
3614
3615use_dfl:
3616 cpumask_copy(cpumask, attrs->cpumask);
3617 return false;
3618}
3619
3620
3621static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3622 int node,
3623 struct pool_workqueue *pwq)
3624{
3625 struct pool_workqueue *old_pwq;
3626
3627 lockdep_assert_held(&wq_pool_mutex);
3628 lockdep_assert_held(&wq->mutex);
3629
3630
3631 link_pwq(pwq);
3632
3633 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3634 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3635 return old_pwq;
3636}
3637
3638
3639struct apply_wqattrs_ctx {
3640 struct workqueue_struct *wq;
3641 struct workqueue_attrs *attrs;
3642 struct list_head list;
3643 struct pool_workqueue *dfl_pwq;
3644 struct pool_workqueue *pwq_tbl[];
3645};
3646
3647
3648static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3649{
3650 if (ctx) {
3651 int node;
3652
3653 for_each_node(node)
3654 put_pwq_unlocked(ctx->pwq_tbl[node]);
3655 put_pwq_unlocked(ctx->dfl_pwq);
3656
3657 free_workqueue_attrs(ctx->attrs);
3658
3659 kfree(ctx);
3660 }
3661}
3662
3663
3664static struct apply_wqattrs_ctx *
3665apply_wqattrs_prepare(struct workqueue_struct *wq,
3666 const struct workqueue_attrs *attrs)
3667{
3668 struct apply_wqattrs_ctx *ctx;
3669 struct workqueue_attrs *new_attrs, *tmp_attrs;
3670 int node;
3671
3672 lockdep_assert_held(&wq_pool_mutex);
3673
3674 ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
3675 GFP_KERNEL);
3676
3677 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3678 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3679 if (!ctx || !new_attrs || !tmp_attrs)
3680 goto out_free;
3681
3682
3683
3684
3685
3686
3687 copy_workqueue_attrs(new_attrs, attrs);
3688 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3689 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3690 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3691
3692
3693
3694
3695
3696
3697 copy_workqueue_attrs(tmp_attrs, new_attrs);
3698
3699
3700
3701
3702
3703
3704 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3705 if (!ctx->dfl_pwq)
3706 goto out_free;
3707
3708 for_each_node(node) {
3709 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3710 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3711 if (!ctx->pwq_tbl[node])
3712 goto out_free;
3713 } else {
3714 ctx->dfl_pwq->refcnt++;
3715 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3716 }
3717 }
3718
3719
3720 copy_workqueue_attrs(new_attrs, attrs);
3721 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3722 ctx->attrs = new_attrs;
3723
3724 ctx->wq = wq;
3725 free_workqueue_attrs(tmp_attrs);
3726 return ctx;
3727
3728out_free:
3729 free_workqueue_attrs(tmp_attrs);
3730 free_workqueue_attrs(new_attrs);
3731 apply_wqattrs_cleanup(ctx);
3732 return NULL;
3733}
3734
3735
3736static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3737{
3738 int node;
3739
3740
3741 mutex_lock(&ctx->wq->mutex);
3742
3743 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3744
3745
3746 for_each_node(node)
3747 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3748 ctx->pwq_tbl[node]);
3749
3750
3751 link_pwq(ctx->dfl_pwq);
3752 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3753
3754 mutex_unlock(&ctx->wq->mutex);
3755}
3756
3757static void apply_wqattrs_lock(void)
3758{
3759
3760 get_online_cpus();
3761 mutex_lock(&wq_pool_mutex);
3762}
3763
3764static void apply_wqattrs_unlock(void)
3765{
3766 mutex_unlock(&wq_pool_mutex);
3767 put_online_cpus();
3768}
3769
3770static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3771 const struct workqueue_attrs *attrs)
3772{
3773 struct apply_wqattrs_ctx *ctx;
3774
3775
3776 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3777 return -EINVAL;
3778
3779
3780 if (!list_empty(&wq->pwqs)) {
3781 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
3782 return -EINVAL;
3783
3784 wq->flags &= ~__WQ_ORDERED;
3785 }
3786
3787 ctx = apply_wqattrs_prepare(wq, attrs);
3788 if (!ctx)
3789 return -ENOMEM;
3790
3791
3792 apply_wqattrs_commit(ctx);
3793 apply_wqattrs_cleanup(ctx);
3794
3795 return 0;
3796}
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814int apply_workqueue_attrs(struct workqueue_struct *wq,
3815 const struct workqueue_attrs *attrs)
3816{
3817 int ret;
3818
3819 apply_wqattrs_lock();
3820 ret = apply_workqueue_attrs_locked(wq, attrs);
3821 apply_wqattrs_unlock();
3822
3823 return ret;
3824}
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3849 bool online)
3850{
3851 int node = cpu_to_node(cpu);
3852 int cpu_off = online ? -1 : cpu;
3853 struct pool_workqueue *old_pwq = NULL, *pwq;
3854 struct workqueue_attrs *target_attrs;
3855 cpumask_t *cpumask;
3856
3857 lockdep_assert_held(&wq_pool_mutex);
3858
3859 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
3860 wq->unbound_attrs->no_numa)
3861 return;
3862
3863
3864
3865
3866
3867
3868 target_attrs = wq_update_unbound_numa_attrs_buf;
3869 cpumask = target_attrs->cpumask;
3870
3871 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3872 pwq = unbound_pwq_by_node(wq, node);
3873
3874
3875
3876
3877
3878
3879
3880 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
3881 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3882 return;
3883 } else {
3884 goto use_dfl_pwq;
3885 }
3886
3887
3888 pwq = alloc_unbound_pwq(wq, target_attrs);
3889 if (!pwq) {
3890 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3891 wq->name);
3892 goto use_dfl_pwq;
3893 }
3894
3895
3896 mutex_lock(&wq->mutex);
3897 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3898 goto out_unlock;
3899
3900use_dfl_pwq:
3901 mutex_lock(&wq->mutex);
3902 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3903 get_pwq(wq->dfl_pwq);
3904 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
3905 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
3906out_unlock:
3907 mutex_unlock(&wq->mutex);
3908 put_pwq_unlocked(old_pwq);
3909}
3910
3911static int alloc_and_link_pwqs(struct workqueue_struct *wq)
3912{
3913 bool highpri = wq->flags & WQ_HIGHPRI;
3914 int cpu, ret;
3915
3916 if (!(wq->flags & WQ_UNBOUND)) {
3917 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
3918 if (!wq->cpu_pwqs)
3919 return -ENOMEM;
3920
3921 for_each_possible_cpu(cpu) {
3922 struct pool_workqueue *pwq =
3923 per_cpu_ptr(wq->cpu_pwqs, cpu);
3924 struct worker_pool *cpu_pools =
3925 per_cpu(cpu_worker_pools, cpu);
3926
3927 init_pwq(pwq, wq, &cpu_pools[highpri]);
3928
3929 mutex_lock(&wq->mutex);
3930 link_pwq(pwq);
3931 mutex_unlock(&wq->mutex);
3932 }
3933 return 0;
3934 } else if (wq->flags & __WQ_ORDERED) {
3935 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
3936
3937 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
3938 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
3939 "ordering guarantee broken for workqueue %s\n", wq->name);
3940 return ret;
3941 } else {
3942 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
3943 }
3944}
3945
3946static int wq_clamp_max_active(int max_active, unsigned int flags,
3947 const char *name)
3948{
3949 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
3950
3951 if (max_active < 1 || max_active > lim)
3952 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
3953 max_active, name, 1, lim);
3954
3955 return clamp_val(max_active, 1, lim);
3956}
3957
3958struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3959 unsigned int flags,
3960 int max_active,
3961 struct lock_class_key *key,
3962 const char *lock_name, ...)
3963{
3964 size_t tbl_size = 0;
3965 va_list args;
3966 struct workqueue_struct *wq;
3967 struct pool_workqueue *pwq;
3968
3969
3970
3971
3972
3973
3974
3975
3976 if ((flags & WQ_UNBOUND) && max_active == 1)
3977 flags |= __WQ_ORDERED;
3978
3979
3980 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
3981 flags |= WQ_UNBOUND;
3982
3983
3984 if (flags & WQ_UNBOUND)
3985 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
3986
3987 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
3988 if (!wq)
3989 return NULL;
3990
3991 if (flags & WQ_UNBOUND) {
3992 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3993 if (!wq->unbound_attrs)
3994 goto err_free_wq;
3995 }
3996
3997 va_start(args, lock_name);
3998 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
3999 va_end(args);
4000
4001 max_active = max_active ?: WQ_DFL_ACTIVE;
4002 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4003
4004
4005 wq->flags = flags;
4006 wq->saved_max_active = max_active;
4007 mutex_init(&wq->mutex);
4008 atomic_set(&wq->nr_pwqs_to_flush, 0);
4009 INIT_LIST_HEAD(&wq->pwqs);
4010 INIT_LIST_HEAD(&wq->flusher_queue);
4011 INIT_LIST_HEAD(&wq->flusher_overflow);
4012 INIT_LIST_HEAD(&wq->maydays);
4013
4014 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4015 INIT_LIST_HEAD(&wq->list);
4016
4017 if (alloc_and_link_pwqs(wq) < 0)
4018 goto err_free_wq;
4019
4020
4021
4022
4023
4024 if (flags & WQ_MEM_RECLAIM) {
4025 struct worker *rescuer;
4026
4027 rescuer = alloc_worker(NUMA_NO_NODE);
4028 if (!rescuer)
4029 goto err_destroy;
4030
4031 rescuer->rescue_wq = wq;
4032 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
4033 wq->name);
4034 if (IS_ERR(rescuer->task)) {
4035 kfree(rescuer);
4036 goto err_destroy;
4037 }
4038
4039 wq->rescuer = rescuer;
4040 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4041 wake_up_process(rescuer->task);
4042 }
4043
4044 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4045 goto err_destroy;
4046
4047
4048
4049
4050
4051
4052 mutex_lock(&wq_pool_mutex);
4053
4054 mutex_lock(&wq->mutex);
4055 for_each_pwq(pwq, wq)
4056 pwq_adjust_max_active(pwq);
4057 mutex_unlock(&wq->mutex);
4058
4059 list_add_tail_rcu(&wq->list, &workqueues);
4060
4061 mutex_unlock(&wq_pool_mutex);
4062
4063 return wq;
4064
4065err_free_wq:
4066 free_workqueue_attrs(wq->unbound_attrs);
4067 kfree(wq);
4068 return NULL;
4069err_destroy:
4070 destroy_workqueue(wq);
4071 return NULL;
4072}
4073EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4074
4075
4076
4077
4078
4079
4080
4081void destroy_workqueue(struct workqueue_struct *wq)
4082{
4083 struct pool_workqueue *pwq;
4084 int node;
4085
4086
4087 drain_workqueue(wq);
4088
4089
4090 mutex_lock(&wq->mutex);
4091 for_each_pwq(pwq, wq) {
4092 int i;
4093
4094 for (i = 0; i < WORK_NR_COLORS; i++) {
4095 if (WARN_ON(pwq->nr_in_flight[i])) {
4096 mutex_unlock(&wq->mutex);
4097 show_workqueue_state();
4098 return;
4099 }
4100 }
4101
4102 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4103 WARN_ON(pwq->nr_active) ||
4104 WARN_ON(!list_empty(&pwq->delayed_works))) {
4105 mutex_unlock(&wq->mutex);
4106 show_workqueue_state();
4107 return;
4108 }
4109 }
4110 mutex_unlock(&wq->mutex);
4111
4112
4113
4114
4115
4116 mutex_lock(&wq_pool_mutex);
4117 list_del_rcu(&wq->list);
4118 mutex_unlock(&wq_pool_mutex);
4119
4120 workqueue_sysfs_unregister(wq);
4121
4122 if (wq->rescuer)
4123 kthread_stop(wq->rescuer->task);
4124
4125 if (!(wq->flags & WQ_UNBOUND)) {
4126
4127
4128
4129
4130 call_rcu_sched(&wq->rcu, rcu_free_wq);
4131 } else {
4132
4133
4134
4135
4136
4137 for_each_node(node) {
4138 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4139 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4140 put_pwq_unlocked(pwq);
4141 }
4142
4143
4144
4145
4146
4147 pwq = wq->dfl_pwq;
4148 wq->dfl_pwq = NULL;
4149 put_pwq_unlocked(pwq);
4150 }
4151}
4152EXPORT_SYMBOL_GPL(destroy_workqueue);
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4165{
4166 struct pool_workqueue *pwq;
4167
4168
4169 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4170 return;
4171
4172 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4173
4174 mutex_lock(&wq->mutex);
4175
4176 wq->flags &= ~__WQ_ORDERED;
4177 wq->saved_max_active = max_active;
4178
4179 for_each_pwq(pwq, wq)
4180 pwq_adjust_max_active(pwq);
4181
4182 mutex_unlock(&wq->mutex);
4183}
4184EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194bool current_is_workqueue_rescuer(void)
4195{
4196 struct worker *worker = current_wq_worker();
4197
4198 return worker && worker->rescue_wq;
4199}
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4220{
4221 struct pool_workqueue *pwq;
4222 bool ret;
4223
4224 rcu_read_lock_sched();
4225
4226 if (cpu == WORK_CPU_UNBOUND)
4227 cpu = smp_processor_id();
4228
4229 if (!(wq->flags & WQ_UNBOUND))
4230 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4231 else
4232 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4233
4234 ret = !list_empty(&pwq->delayed_works);
4235 rcu_read_unlock_sched();
4236
4237 return ret;
4238}
4239EXPORT_SYMBOL_GPL(workqueue_congested);
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252unsigned int work_busy(struct work_struct *work)
4253{
4254 struct worker_pool *pool;
4255 unsigned long flags;
4256 unsigned int ret = 0;
4257
4258 if (work_pending(work))
4259 ret |= WORK_BUSY_PENDING;
4260
4261 local_irq_save(flags);
4262 pool = get_work_pool(work);
4263 if (pool) {
4264 spin_lock(&pool->lock);
4265 if (find_worker_executing_work(pool, work))
4266 ret |= WORK_BUSY_RUNNING;
4267 spin_unlock(&pool->lock);
4268 }
4269 local_irq_restore(flags);
4270
4271 return ret;
4272}
4273EXPORT_SYMBOL_GPL(work_busy);
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285void set_worker_desc(const char *fmt, ...)
4286{
4287 struct worker *worker = current_wq_worker();
4288 va_list args;
4289
4290 if (worker) {
4291 va_start(args, fmt);
4292 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4293 va_end(args);
4294 worker->desc_valid = true;
4295 }
4296}
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311void print_worker_info(const char *log_lvl, struct task_struct *task)
4312{
4313 work_func_t *fn = NULL;
4314 char name[WQ_NAME_LEN] = { };
4315 char desc[WORKER_DESC_LEN] = { };
4316 struct pool_workqueue *pwq = NULL;
4317 struct workqueue_struct *wq = NULL;
4318 bool desc_valid = false;
4319 struct worker *worker;
4320
4321 if (!(task->flags & PF_WQ_WORKER))
4322 return;
4323
4324
4325
4326
4327
4328 worker = kthread_probe_data(task);
4329
4330
4331
4332
4333
4334 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4335 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4336 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4337 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4338
4339
4340 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4341 if (desc_valid)
4342 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4343
4344 if (fn || name[0] || desc[0]) {
4345 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4346 if (desc[0])
4347 pr_cont(" (%s)", desc);
4348 pr_cont("\n");
4349 }
4350}
4351
4352static void pr_cont_pool_info(struct worker_pool *pool)
4353{
4354 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4355 if (pool->node != NUMA_NO_NODE)
4356 pr_cont(" node=%d", pool->node);
4357 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4358}
4359
4360static void pr_cont_work(bool comma, struct work_struct *work)
4361{
4362 if (work->func == wq_barrier_func) {
4363 struct wq_barrier *barr;
4364
4365 barr = container_of(work, struct wq_barrier, work);
4366
4367 pr_cont("%s BAR(%d)", comma ? "," : "",
4368 task_pid_nr(barr->task));
4369 } else {
4370 pr_cont("%s %pf", comma ? "," : "", work->func);
4371 }
4372}
4373
4374static void show_pwq(struct pool_workqueue *pwq)
4375{
4376 struct worker_pool *pool = pwq->pool;
4377 struct work_struct *work;
4378 struct worker *worker;
4379 bool has_in_flight = false, has_pending = false;
4380 int bkt;
4381
4382 pr_info(" pwq %d:", pool->id);
4383 pr_cont_pool_info(pool);
4384
4385 pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
4386 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4387
4388 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4389 if (worker->current_pwq == pwq) {
4390 has_in_flight = true;
4391 break;
4392 }
4393 }
4394 if (has_in_flight) {
4395 bool comma = false;
4396
4397 pr_info(" in-flight:");
4398 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4399 if (worker->current_pwq != pwq)
4400 continue;
4401
4402 pr_cont("%s %d%s:%pf", comma ? "," : "",
4403 task_pid_nr(worker->task),
4404 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4405 worker->current_func);
4406 list_for_each_entry(work, &worker->scheduled, entry)
4407 pr_cont_work(false, work);
4408 comma = true;
4409 }
4410 pr_cont("\n");
4411 }
4412
4413 list_for_each_entry(work, &pool->worklist, entry) {
4414 if (get_work_pwq(work) == pwq) {
4415 has_pending = true;
4416 break;
4417 }
4418 }
4419 if (has_pending) {
4420 bool comma = false;
4421
4422 pr_info(" pending:");
4423 list_for_each_entry(work, &pool->worklist, entry) {
4424 if (get_work_pwq(work) != pwq)
4425 continue;
4426
4427 pr_cont_work(comma, work);
4428 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4429 }
4430 pr_cont("\n");
4431 }
4432
4433 if (!list_empty(&pwq->delayed_works)) {
4434 bool comma = false;
4435
4436 pr_info(" delayed:");
4437 list_for_each_entry(work, &pwq->delayed_works, entry) {
4438 pr_cont_work(comma, work);
4439 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4440 }
4441 pr_cont("\n");
4442 }
4443}
4444
4445
4446
4447
4448
4449
4450
4451void show_workqueue_state(void)
4452{
4453 struct workqueue_struct *wq;
4454 struct worker_pool *pool;
4455 unsigned long flags;
4456 int pi;
4457
4458 rcu_read_lock_sched();
4459
4460 pr_info("Showing busy workqueues and worker pools:\n");
4461
4462 list_for_each_entry_rcu(wq, &workqueues, list) {
4463 struct pool_workqueue *pwq;
4464 bool idle = true;
4465
4466 for_each_pwq(pwq, wq) {
4467 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4468 idle = false;
4469 break;
4470 }
4471 }
4472 if (idle)
4473 continue;
4474
4475 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4476
4477 for_each_pwq(pwq, wq) {
4478 spin_lock_irqsave(&pwq->pool->lock, flags);
4479 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4480 show_pwq(pwq);
4481 spin_unlock_irqrestore(&pwq->pool->lock, flags);
4482 }
4483 }
4484
4485 for_each_pool(pool, pi) {
4486 struct worker *worker;
4487 bool first = true;
4488
4489 spin_lock_irqsave(&pool->lock, flags);
4490 if (pool->nr_workers == pool->nr_idle)
4491 goto next_pool;
4492
4493 pr_info("pool %d:", pool->id);
4494 pr_cont_pool_info(pool);
4495 pr_cont(" hung=%us workers=%d",
4496 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4497 pool->nr_workers);
4498 if (pool->manager)
4499 pr_cont(" manager: %d",
4500 task_pid_nr(pool->manager->task));
4501 list_for_each_entry(worker, &pool->idle_list, entry) {
4502 pr_cont(" %s%d", first ? "idle: " : "",
4503 task_pid_nr(worker->task));
4504 first = false;
4505 }
4506 pr_cont("\n");
4507 next_pool:
4508 spin_unlock_irqrestore(&pool->lock, flags);
4509 }
4510
4511 rcu_read_unlock_sched();
4512}
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529static void wq_unbind_fn(struct work_struct *work)
4530{
4531 int cpu = smp_processor_id();
4532 struct worker_pool *pool;
4533 struct worker *worker;
4534
4535 for_each_cpu_worker_pool(pool, cpu) {
4536 mutex_lock(&pool->attach_mutex);
4537 spin_lock_irq(&pool->lock);
4538
4539
4540
4541
4542
4543
4544
4545
4546 for_each_pool_worker(worker, pool)
4547 worker->flags |= WORKER_UNBOUND;
4548
4549 pool->flags |= POOL_DISASSOCIATED;
4550
4551 spin_unlock_irq(&pool->lock);
4552 mutex_unlock(&pool->attach_mutex);
4553
4554
4555
4556
4557
4558
4559
4560 schedule();
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570 atomic_set(&pool->nr_running, 0);
4571
4572
4573
4574
4575
4576
4577 spin_lock_irq(&pool->lock);
4578 wake_up_worker(pool);
4579 spin_unlock_irq(&pool->lock);
4580 }
4581}
4582
4583
4584
4585
4586
4587
4588
4589static void rebind_workers(struct worker_pool *pool)
4590{
4591 struct worker *worker;
4592
4593 lockdep_assert_held(&pool->attach_mutex);
4594
4595
4596
4597
4598
4599
4600
4601
4602 for_each_pool_worker(worker, pool)
4603 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4604 pool->attrs->cpumask) < 0);
4605
4606 spin_lock_irq(&pool->lock);
4607
4608
4609
4610
4611
4612
4613 if (!(pool->flags & POOL_DISASSOCIATED)) {
4614 spin_unlock_irq(&pool->lock);
4615 return;
4616 }
4617
4618 pool->flags &= ~POOL_DISASSOCIATED;
4619
4620 for_each_pool_worker(worker, pool) {
4621 unsigned int worker_flags = worker->flags;
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631 if (worker_flags & WORKER_IDLE)
4632 wake_up_process(worker->task);
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4650 worker_flags |= WORKER_REBOUND;
4651 worker_flags &= ~WORKER_UNBOUND;
4652 ACCESS_ONCE(worker->flags) = worker_flags;
4653 }
4654
4655 spin_unlock_irq(&pool->lock);
4656}
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4669{
4670 static cpumask_t cpumask;
4671 struct worker *worker;
4672
4673 lockdep_assert_held(&pool->attach_mutex);
4674
4675
4676 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4677 return;
4678
4679 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
4680
4681
4682 for_each_pool_worker(worker, pool)
4683 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
4684}
4685
4686int workqueue_prepare_cpu(unsigned int cpu)
4687{
4688 struct worker_pool *pool;
4689
4690 for_each_cpu_worker_pool(pool, cpu) {
4691 if (pool->nr_workers)
4692 continue;
4693 if (!create_worker(pool))
4694 return -ENOMEM;
4695 }
4696 return 0;
4697}
4698
4699int workqueue_online_cpu(unsigned int cpu)
4700{
4701 struct worker_pool *pool;
4702 struct workqueue_struct *wq;
4703 int pi;
4704
4705 mutex_lock(&wq_pool_mutex);
4706
4707 for_each_pool(pool, pi) {
4708 mutex_lock(&pool->attach_mutex);
4709
4710 if (pool->cpu == cpu)
4711 rebind_workers(pool);
4712 else if (pool->cpu < 0)
4713 restore_unbound_workers_cpumask(pool, cpu);
4714
4715 mutex_unlock(&pool->attach_mutex);
4716 }
4717
4718
4719 list_for_each_entry(wq, &workqueues, list)
4720 wq_update_unbound_numa(wq, cpu, true);
4721
4722 mutex_unlock(&wq_pool_mutex);
4723 return 0;
4724}
4725
4726int workqueue_offline_cpu(unsigned int cpu)
4727{
4728 struct work_struct unbind_work;
4729 struct workqueue_struct *wq;
4730
4731
4732 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
4733 queue_work_on(cpu, system_highpri_wq, &unbind_work);
4734
4735
4736 mutex_lock(&wq_pool_mutex);
4737 list_for_each_entry(wq, &workqueues, list)
4738 wq_update_unbound_numa(wq, cpu, false);
4739 mutex_unlock(&wq_pool_mutex);
4740
4741
4742 flush_work(&unbind_work);
4743 destroy_work_on_stack(&unbind_work);
4744 return 0;
4745}
4746
4747#ifdef CONFIG_SMP
4748
4749struct work_for_cpu {
4750 struct work_struct work;
4751 long (*fn)(void *);
4752 void *arg;
4753 long ret;
4754};
4755
4756static void work_for_cpu_fn(struct work_struct *work)
4757{
4758 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
4759
4760 wfc->ret = wfc->fn(wfc->arg);
4761}
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4775{
4776 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
4777
4778 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4779 schedule_work_on(cpu, &wfc.work);
4780 flush_work(&wfc.work);
4781 destroy_work_on_stack(&wfc.work);
4782 return wfc.ret;
4783}
4784EXPORT_SYMBOL_GPL(work_on_cpu);
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
4798{
4799 long ret = -ENODEV;
4800
4801 get_online_cpus();
4802 if (cpu_online(cpu))
4803 ret = work_on_cpu(cpu, fn, arg);
4804 put_online_cpus();
4805 return ret;
4806}
4807EXPORT_SYMBOL_GPL(work_on_cpu_safe);
4808#endif
4809
4810#ifdef CONFIG_FREEZER
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822void freeze_workqueues_begin(void)
4823{
4824 struct workqueue_struct *wq;
4825 struct pool_workqueue *pwq;
4826
4827 mutex_lock(&wq_pool_mutex);
4828
4829 WARN_ON_ONCE(workqueue_freezing);
4830 workqueue_freezing = true;
4831
4832 list_for_each_entry(wq, &workqueues, list) {
4833 mutex_lock(&wq->mutex);
4834 for_each_pwq(pwq, wq)
4835 pwq_adjust_max_active(pwq);
4836 mutex_unlock(&wq->mutex);
4837 }
4838
4839 mutex_unlock(&wq_pool_mutex);
4840}
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855bool freeze_workqueues_busy(void)
4856{
4857 bool busy = false;
4858 struct workqueue_struct *wq;
4859 struct pool_workqueue *pwq;
4860
4861 mutex_lock(&wq_pool_mutex);
4862
4863 WARN_ON_ONCE(!workqueue_freezing);
4864
4865 list_for_each_entry(wq, &workqueues, list) {
4866 if (!(wq->flags & WQ_FREEZABLE))
4867 continue;
4868
4869
4870
4871
4872 rcu_read_lock_sched();
4873 for_each_pwq(pwq, wq) {
4874 WARN_ON_ONCE(pwq->nr_active < 0);
4875 if (pwq->nr_active) {
4876 busy = true;
4877 rcu_read_unlock_sched();
4878 goto out_unlock;
4879 }
4880 }
4881 rcu_read_unlock_sched();
4882 }
4883out_unlock:
4884 mutex_unlock(&wq_pool_mutex);
4885 return busy;
4886}
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897void thaw_workqueues(void)
4898{
4899 struct workqueue_struct *wq;
4900 struct pool_workqueue *pwq;
4901
4902 mutex_lock(&wq_pool_mutex);
4903
4904 if (!workqueue_freezing)
4905 goto out_unlock;
4906
4907 workqueue_freezing = false;
4908
4909
4910 list_for_each_entry(wq, &workqueues, list) {
4911 mutex_lock(&wq->mutex);
4912 for_each_pwq(pwq, wq)
4913 pwq_adjust_max_active(pwq);
4914 mutex_unlock(&wq->mutex);
4915 }
4916
4917out_unlock:
4918 mutex_unlock(&wq_pool_mutex);
4919}
4920#endif
4921
4922static int workqueue_apply_unbound_cpumask(void)
4923{
4924 LIST_HEAD(ctxs);
4925 int ret = 0;
4926 struct workqueue_struct *wq;
4927 struct apply_wqattrs_ctx *ctx, *n;
4928
4929 lockdep_assert_held(&wq_pool_mutex);
4930
4931 list_for_each_entry(wq, &workqueues, list) {
4932 if (!(wq->flags & WQ_UNBOUND))
4933 continue;
4934
4935 if (wq->flags & __WQ_ORDERED)
4936 continue;
4937
4938 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
4939 if (!ctx) {
4940 ret = -ENOMEM;
4941 break;
4942 }
4943
4944 list_add_tail(&ctx->list, &ctxs);
4945 }
4946
4947 list_for_each_entry_safe(ctx, n, &ctxs, list) {
4948 if (!ret)
4949 apply_wqattrs_commit(ctx);
4950 apply_wqattrs_cleanup(ctx);
4951 }
4952
4953 return ret;
4954}
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
4969{
4970 int ret = -EINVAL;
4971 cpumask_var_t saved_cpumask;
4972
4973 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
4974 return -ENOMEM;
4975
4976 cpumask_and(cpumask, cpumask, cpu_possible_mask);
4977 if (!cpumask_empty(cpumask)) {
4978 apply_wqattrs_lock();
4979
4980
4981 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
4982
4983
4984 cpumask_copy(wq_unbound_cpumask, cpumask);
4985 ret = workqueue_apply_unbound_cpumask();
4986
4987
4988 if (ret < 0)
4989 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
4990
4991 apply_wqattrs_unlock();
4992 }
4993
4994 free_cpumask_var(saved_cpumask);
4995 return ret;
4996}
4997
4998#ifdef CONFIG_SYSFS
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013struct wq_device {
5014 struct workqueue_struct *wq;
5015 struct device dev;
5016};
5017
5018static struct workqueue_struct *dev_to_wq(struct device *dev)
5019{
5020 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5021
5022 return wq_dev->wq;
5023}
5024
5025static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5026 char *buf)
5027{
5028 struct workqueue_struct *wq = dev_to_wq(dev);
5029
5030 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5031}
5032static DEVICE_ATTR_RO(per_cpu);
5033
5034static ssize_t max_active_show(struct device *dev,
5035 struct device_attribute *attr, char *buf)
5036{
5037 struct workqueue_struct *wq = dev_to_wq(dev);
5038
5039 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5040}
5041
5042static ssize_t max_active_store(struct device *dev,
5043 struct device_attribute *attr, const char *buf,
5044 size_t count)
5045{
5046 struct workqueue_struct *wq = dev_to_wq(dev);
5047 int val;
5048
5049 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5050 return -EINVAL;
5051
5052 workqueue_set_max_active(wq, val);
5053 return count;
5054}
5055static DEVICE_ATTR_RW(max_active);
5056
5057static struct attribute *wq_sysfs_attrs[] = {
5058 &dev_attr_per_cpu.attr,
5059 &dev_attr_max_active.attr,
5060 NULL,
5061};
5062ATTRIBUTE_GROUPS(wq_sysfs);
5063
5064static ssize_t wq_pool_ids_show(struct device *dev,
5065 struct device_attribute *attr, char *buf)
5066{
5067 struct workqueue_struct *wq = dev_to_wq(dev);
5068 const char *delim = "";
5069 int node, written = 0;
5070
5071 rcu_read_lock_sched();
5072 for_each_node(node) {
5073 written += scnprintf(buf + written, PAGE_SIZE - written,
5074 "%s%d:%d", delim, node,
5075 unbound_pwq_by_node(wq, node)->pool->id);
5076 delim = " ";
5077 }
5078 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5079 rcu_read_unlock_sched();
5080
5081 return written;
5082}
5083
5084static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5085 char *buf)
5086{
5087 struct workqueue_struct *wq = dev_to_wq(dev);
5088 int written;
5089
5090 mutex_lock(&wq->mutex);
5091 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5092 mutex_unlock(&wq->mutex);
5093
5094 return written;
5095}
5096
5097
5098static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5099{
5100 struct workqueue_attrs *attrs;
5101
5102 lockdep_assert_held(&wq_pool_mutex);
5103
5104 attrs = alloc_workqueue_attrs(GFP_KERNEL);
5105 if (!attrs)
5106 return NULL;
5107
5108 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5109 return attrs;
5110}
5111
5112static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5113 const char *buf, size_t count)
5114{
5115 struct workqueue_struct *wq = dev_to_wq(dev);
5116 struct workqueue_attrs *attrs;
5117 int ret = -ENOMEM;
5118
5119 apply_wqattrs_lock();
5120
5121 attrs = wq_sysfs_prep_attrs(wq);
5122 if (!attrs)
5123 goto out_unlock;
5124
5125 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5126 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5127 ret = apply_workqueue_attrs_locked(wq, attrs);
5128 else
5129 ret = -EINVAL;
5130
5131out_unlock:
5132 apply_wqattrs_unlock();
5133 free_workqueue_attrs(attrs);
5134 return ret ?: count;
5135}
5136
5137static ssize_t wq_cpumask_show(struct device *dev,
5138 struct device_attribute *attr, char *buf)
5139{
5140 struct workqueue_struct *wq = dev_to_wq(dev);
5141 int written;
5142
5143 mutex_lock(&wq->mutex);
5144 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5145 cpumask_pr_args(wq->unbound_attrs->cpumask));
5146 mutex_unlock(&wq->mutex);
5147 return written;
5148}
5149
5150static ssize_t wq_cpumask_store(struct device *dev,
5151 struct device_attribute *attr,
5152 const char *buf, size_t count)
5153{
5154 struct workqueue_struct *wq = dev_to_wq(dev);
5155 struct workqueue_attrs *attrs;
5156 int ret = -ENOMEM;
5157
5158 apply_wqattrs_lock();
5159
5160 attrs = wq_sysfs_prep_attrs(wq);
5161 if (!attrs)
5162 goto out_unlock;
5163
5164 ret = cpumask_parse(buf, attrs->cpumask);
5165 if (!ret)
5166 ret = apply_workqueue_attrs_locked(wq, attrs);
5167
5168out_unlock:
5169 apply_wqattrs_unlock();
5170 free_workqueue_attrs(attrs);
5171 return ret ?: count;
5172}
5173
5174static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5175 char *buf)
5176{
5177 struct workqueue_struct *wq = dev_to_wq(dev);
5178 int written;
5179
5180 mutex_lock(&wq->mutex);
5181 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5182 !wq->unbound_attrs->no_numa);
5183 mutex_unlock(&wq->mutex);
5184
5185 return written;
5186}
5187
5188static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5189 const char *buf, size_t count)
5190{
5191 struct workqueue_struct *wq = dev_to_wq(dev);
5192 struct workqueue_attrs *attrs;
5193 int v, ret = -ENOMEM;
5194
5195 apply_wqattrs_lock();
5196
5197 attrs = wq_sysfs_prep_attrs(wq);
5198 if (!attrs)
5199 goto out_unlock;
5200
5201 ret = -EINVAL;
5202 if (sscanf(buf, "%d", &v) == 1) {
5203 attrs->no_numa = !v;
5204 ret = apply_workqueue_attrs_locked(wq, attrs);
5205 }
5206
5207out_unlock:
5208 apply_wqattrs_unlock();
5209 free_workqueue_attrs(attrs);
5210 return ret ?: count;
5211}
5212
5213static struct device_attribute wq_sysfs_unbound_attrs[] = {
5214 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5215 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5216 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5217 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5218 __ATTR_NULL,
5219};
5220
5221static struct bus_type wq_subsys = {
5222 .name = "workqueue",
5223 .dev_groups = wq_sysfs_groups,
5224};
5225
5226static ssize_t wq_unbound_cpumask_show(struct device *dev,
5227 struct device_attribute *attr, char *buf)
5228{
5229 int written;
5230
5231 mutex_lock(&wq_pool_mutex);
5232 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5233 cpumask_pr_args(wq_unbound_cpumask));
5234 mutex_unlock(&wq_pool_mutex);
5235
5236 return written;
5237}
5238
5239static ssize_t wq_unbound_cpumask_store(struct device *dev,
5240 struct device_attribute *attr, const char *buf, size_t count)
5241{
5242 cpumask_var_t cpumask;
5243 int ret;
5244
5245 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5246 return -ENOMEM;
5247
5248 ret = cpumask_parse(buf, cpumask);
5249 if (!ret)
5250 ret = workqueue_set_unbound_cpumask(cpumask);
5251
5252 free_cpumask_var(cpumask);
5253 return ret ? ret : count;
5254}
5255
5256static struct device_attribute wq_sysfs_cpumask_attr =
5257 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5258 wq_unbound_cpumask_store);
5259
5260static int __init wq_sysfs_init(void)
5261{
5262 int err;
5263
5264 err = subsys_virtual_register(&wq_subsys, NULL);
5265 if (err)
5266 return err;
5267
5268 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5269}
5270core_initcall(wq_sysfs_init);
5271
5272static void wq_device_release(struct device *dev)
5273{
5274 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5275
5276 kfree(wq_dev);
5277}
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294int workqueue_sysfs_register(struct workqueue_struct *wq)
5295{
5296 struct wq_device *wq_dev;
5297 int ret;
5298
5299
5300
5301
5302
5303
5304 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5305 return -EINVAL;
5306
5307 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5308 if (!wq_dev)
5309 return -ENOMEM;
5310
5311 wq_dev->wq = wq;
5312 wq_dev->dev.bus = &wq_subsys;
5313 wq_dev->dev.release = wq_device_release;
5314 dev_set_name(&wq_dev->dev, "%s", wq->name);
5315
5316
5317
5318
5319
5320 dev_set_uevent_suppress(&wq_dev->dev, true);
5321
5322 ret = device_register(&wq_dev->dev);
5323 if (ret) {
5324 kfree(wq_dev);
5325 wq->wq_dev = NULL;
5326 return ret;
5327 }
5328
5329 if (wq->flags & WQ_UNBOUND) {
5330 struct device_attribute *attr;
5331
5332 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5333 ret = device_create_file(&wq_dev->dev, attr);
5334 if (ret) {
5335 device_unregister(&wq_dev->dev);
5336 wq->wq_dev = NULL;
5337 return ret;
5338 }
5339 }
5340 }
5341
5342 dev_set_uevent_suppress(&wq_dev->dev, false);
5343 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5344 return 0;
5345}
5346
5347
5348
5349
5350
5351
5352
5353static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5354{
5355 struct wq_device *wq_dev = wq->wq_dev;
5356
5357 if (!wq->wq_dev)
5358 return;
5359
5360 wq->wq_dev = NULL;
5361 device_unregister(&wq_dev->dev);
5362}
5363#else
5364static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5365#endif
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384#ifdef CONFIG_WQ_WATCHDOG
5385
5386static void wq_watchdog_timer_fn(unsigned long data);
5387
5388static unsigned long wq_watchdog_thresh = 30;
5389static struct timer_list wq_watchdog_timer =
5390 TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
5391
5392static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5393static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5394
5395static void wq_watchdog_reset_touched(void)
5396{
5397 int cpu;
5398
5399 wq_watchdog_touched = jiffies;
5400 for_each_possible_cpu(cpu)
5401 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5402}
5403
5404static void wq_watchdog_timer_fn(unsigned long data)
5405{
5406 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5407 bool lockup_detected = false;
5408 struct worker_pool *pool;
5409 int pi;
5410
5411 if (!thresh)
5412 return;
5413
5414 rcu_read_lock();
5415
5416 for_each_pool(pool, pi) {
5417 unsigned long pool_ts, touched, ts;
5418
5419 if (list_empty(&pool->worklist))
5420 continue;
5421
5422
5423 pool_ts = READ_ONCE(pool->watchdog_ts);
5424 touched = READ_ONCE(wq_watchdog_touched);
5425
5426 if (time_after(pool_ts, touched))
5427 ts = pool_ts;
5428 else
5429 ts = touched;
5430
5431 if (pool->cpu >= 0) {
5432 unsigned long cpu_touched =
5433 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
5434 pool->cpu));
5435 if (time_after(cpu_touched, ts))
5436 ts = cpu_touched;
5437 }
5438
5439
5440 if (time_after(jiffies, ts + thresh)) {
5441 lockup_detected = true;
5442 pr_emerg("BUG: workqueue lockup - pool");
5443 pr_cont_pool_info(pool);
5444 pr_cont(" stuck for %us!\n",
5445 jiffies_to_msecs(jiffies - pool_ts) / 1000);
5446 }
5447 }
5448
5449 rcu_read_unlock();
5450
5451 if (lockup_detected)
5452 show_workqueue_state();
5453
5454 wq_watchdog_reset_touched();
5455 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5456}
5457
5458void wq_watchdog_touch(int cpu)
5459{
5460 if (cpu >= 0)
5461 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5462 else
5463 wq_watchdog_touched = jiffies;
5464}
5465
5466static void wq_watchdog_set_thresh(unsigned long thresh)
5467{
5468 wq_watchdog_thresh = 0;
5469 del_timer_sync(&wq_watchdog_timer);
5470
5471 if (thresh) {
5472 wq_watchdog_thresh = thresh;
5473 wq_watchdog_reset_touched();
5474 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5475 }
5476}
5477
5478static int wq_watchdog_param_set_thresh(const char *val,
5479 const struct kernel_param *kp)
5480{
5481 unsigned long thresh;
5482 int ret;
5483
5484 ret = kstrtoul(val, 0, &thresh);
5485 if (ret)
5486 return ret;
5487
5488 if (system_wq)
5489 wq_watchdog_set_thresh(thresh);
5490 else
5491 wq_watchdog_thresh = thresh;
5492
5493 return 0;
5494}
5495
5496static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5497 .set = wq_watchdog_param_set_thresh,
5498 .get = param_get_ulong,
5499};
5500
5501module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5502 0644);
5503
5504static void wq_watchdog_init(void)
5505{
5506 wq_watchdog_set_thresh(wq_watchdog_thresh);
5507}
5508
5509#else
5510
5511static inline void wq_watchdog_init(void) { }
5512
5513#endif
5514
5515static void __init wq_numa_init(void)
5516{
5517 cpumask_var_t *tbl;
5518 int node, cpu;
5519
5520 if (num_possible_nodes() <= 1)
5521 return;
5522
5523 if (wq_disable_numa) {
5524 pr_info("workqueue: NUMA affinity support disabled\n");
5525 return;
5526 }
5527
5528 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5529 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5530
5531
5532
5533
5534
5535
5536 tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
5537 BUG_ON(!tbl);
5538
5539 for_each_node(node)
5540 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5541 node_online(node) ? node : NUMA_NO_NODE));
5542
5543 for_each_possible_cpu(cpu) {
5544 node = cpu_to_node(cpu);
5545 if (WARN_ON(node == NUMA_NO_NODE)) {
5546 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5547
5548 return;
5549 }
5550 cpumask_set_cpu(cpu, tbl[node]);
5551 }
5552
5553 wq_numa_possible_cpumask = tbl;
5554 wq_numa_enabled = true;
5555}
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567int __init workqueue_init_early(void)
5568{
5569 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5570 int i, cpu;
5571
5572 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5573
5574 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5575 cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
5576
5577 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5578
5579
5580 for_each_possible_cpu(cpu) {
5581 struct worker_pool *pool;
5582
5583 i = 0;
5584 for_each_cpu_worker_pool(pool, cpu) {
5585 BUG_ON(init_worker_pool(pool));
5586 pool->cpu = cpu;
5587 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
5588 pool->attrs->nice = std_nice[i++];
5589 pool->node = cpu_to_node(cpu);
5590
5591
5592 mutex_lock(&wq_pool_mutex);
5593 BUG_ON(worker_pool_assign_id(pool));
5594 mutex_unlock(&wq_pool_mutex);
5595 }
5596 }
5597
5598
5599 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5600 struct workqueue_attrs *attrs;
5601
5602 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5603 attrs->nice = std_nice[i];
5604 unbound_std_wq_attrs[i] = attrs;
5605
5606
5607
5608
5609
5610
5611 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5612 attrs->nice = std_nice[i];
5613 attrs->no_numa = true;
5614 ordered_wq_attrs[i] = attrs;
5615 }
5616
5617 system_wq = alloc_workqueue("events", 0, 0);
5618 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5619 system_long_wq = alloc_workqueue("events_long", 0, 0);
5620 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5621 WQ_UNBOUND_MAX_ACTIVE);
5622 system_freezable_wq = alloc_workqueue("events_freezable",
5623 WQ_FREEZABLE, 0);
5624 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
5625 WQ_POWER_EFFICIENT, 0);
5626 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
5627 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
5628 0);
5629 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
5630 !system_unbound_wq || !system_freezable_wq ||
5631 !system_power_efficient_wq ||
5632 !system_freezable_power_efficient_wq);
5633
5634 return 0;
5635}
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646int __init workqueue_init(void)
5647{
5648 struct workqueue_struct *wq;
5649 struct worker_pool *pool;
5650 int cpu, bkt;
5651
5652
5653
5654
5655
5656
5657
5658
5659 wq_numa_init();
5660
5661 mutex_lock(&wq_pool_mutex);
5662
5663 for_each_possible_cpu(cpu) {
5664 for_each_cpu_worker_pool(pool, cpu) {
5665 pool->node = cpu_to_node(cpu);
5666 }
5667 }
5668
5669 list_for_each_entry(wq, &workqueues, list)
5670 wq_update_unbound_numa(wq, smp_processor_id(), true);
5671
5672 mutex_unlock(&wq_pool_mutex);
5673
5674
5675 for_each_online_cpu(cpu) {
5676 for_each_cpu_worker_pool(pool, cpu) {
5677 pool->flags &= ~POOL_DISASSOCIATED;
5678 BUG_ON(!create_worker(pool));
5679 }
5680 }
5681
5682 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
5683 BUG_ON(!create_worker(pool));
5684
5685 wq_online = true;
5686 wq_watchdog_init();
5687
5688 return 0;
5689}
5690