1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/export.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
38#include <linux/hardirq.h>
39#include <linux/mempolicy.h>
40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51
52#include "workqueue_internal.h"
53
54enum {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 POOL_DISASSOCIATED = 1 << 2,
72
73
74 WORKER_DIE = 1 << 1,
75 WORKER_IDLE = 1 << 2,
76 WORKER_PREP = 1 << 3,
77 WORKER_CPU_INTENSIVE = 1 << 6,
78 WORKER_UNBOUND = 1 << 7,
79 WORKER_REBOUND = 1 << 8,
80
81 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
82 WORKER_UNBOUND | WORKER_REBOUND,
83
84 NR_STD_WORKER_POOLS = 2,
85
86 UNBOUND_POOL_HASH_ORDER = 6,
87 BUSY_WORKER_HASH_ORDER = 6,
88
89 MAX_IDLE_WORKERS_RATIO = 4,
90 IDLE_WORKER_TIMEOUT = 300 * HZ,
91
92 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
93
94
95 MAYDAY_INTERVAL = HZ / 10,
96 CREATE_COOLDOWN = HZ,
97
98
99
100
101
102 RESCUER_NICE_LEVEL = MIN_NICE,
103 HIGHPRI_NICE_LEVEL = MIN_NICE,
104
105 WQ_NAME_LEN = 24,
106};
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144struct worker_pool {
145 spinlock_t lock;
146 int cpu;
147 int node;
148 int id;
149 unsigned int flags;
150
151 unsigned long watchdog_ts;
152
153 struct list_head worklist;
154 int nr_workers;
155
156
157 int nr_idle;
158
159 struct list_head idle_list;
160 struct timer_list idle_timer;
161 struct timer_list mayday_timer;
162
163
164 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
165
166
167
168 struct mutex manager_arb;
169 struct worker *manager;
170 struct mutex attach_mutex;
171 struct list_head workers;
172 struct completion *detach_completion;
173
174 struct ida worker_ida;
175
176 struct workqueue_attrs *attrs;
177 struct hlist_node hash_node;
178 int refcnt;
179
180
181
182
183
184
185 atomic_t nr_running ____cacheline_aligned_in_smp;
186
187
188
189
190
191 struct rcu_head rcu;
192} ____cacheline_aligned_in_smp;
193
194
195
196
197
198
199
200struct pool_workqueue {
201 struct worker_pool *pool;
202 struct workqueue_struct *wq;
203 int work_color;
204 int flush_color;
205 int refcnt;
206 int nr_in_flight[WORK_NR_COLORS];
207
208 int nr_active;
209 int max_active;
210 struct list_head delayed_works;
211 struct list_head pwqs_node;
212 struct list_head mayday_node;
213
214
215
216
217
218
219
220 struct work_struct unbound_release_work;
221 struct rcu_head rcu;
222} __aligned(1 << WORK_STRUCT_FLAG_BITS);
223
224
225
226
227struct wq_flusher {
228 struct list_head list;
229 int flush_color;
230 struct completion done;
231};
232
233struct wq_device;
234
235
236
237
238
239struct workqueue_struct {
240 struct list_head pwqs;
241 struct list_head list;
242
243 struct mutex mutex;
244 int work_color;
245 int flush_color;
246 atomic_t nr_pwqs_to_flush;
247 struct wq_flusher *first_flusher;
248 struct list_head flusher_queue;
249 struct list_head flusher_overflow;
250
251 struct list_head maydays;
252 struct worker *rescuer;
253
254 int nr_drainers;
255 int saved_max_active;
256
257 struct workqueue_attrs *unbound_attrs;
258 struct pool_workqueue *dfl_pwq;
259
260#ifdef CONFIG_SYSFS
261 struct wq_device *wq_dev;
262#endif
263#ifdef CONFIG_LOCKDEP
264 struct lockdep_map lockdep_map;
265#endif
266 char name[WQ_NAME_LEN];
267
268
269
270
271
272
273 struct rcu_head rcu;
274
275
276 unsigned int flags ____cacheline_aligned;
277 struct pool_workqueue __percpu *cpu_pwqs;
278 struct pool_workqueue __rcu *numa_pwq_tbl[];
279};
280
281static struct kmem_cache *pwq_cache;
282
283static cpumask_var_t *wq_numa_possible_cpumask;
284
285
286static bool wq_disable_numa;
287module_param_named(disable_numa, wq_disable_numa, bool, 0444);
288
289
290static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
291module_param_named(power_efficient, wq_power_efficient, bool, 0444);
292
293static bool wq_online;
294
295static bool wq_numa_enabled;
296
297
298static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299
300static DEFINE_MUTEX(wq_pool_mutex);
301static DEFINE_SPINLOCK(wq_mayday_lock);
302
303static LIST_HEAD(workqueues);
304static bool workqueue_freezing;
305
306
307static cpumask_var_t wq_unbound_cpumask;
308
309
310static DEFINE_PER_CPU(int, wq_rr_cpu_last);
311
312
313
314
315
316
317#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
318static bool wq_debug_force_rr_cpu = true;
319#else
320static bool wq_debug_force_rr_cpu = false;
321#endif
322module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
323
324
325static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
326
327static DEFINE_IDR(worker_pool_idr);
328
329
330static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
331
332
333static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
334
335
336static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
337
338struct workqueue_struct *system_wq __read_mostly;
339EXPORT_SYMBOL(system_wq);
340struct workqueue_struct *system_highpri_wq __read_mostly;
341EXPORT_SYMBOL_GPL(system_highpri_wq);
342struct workqueue_struct *system_long_wq __read_mostly;
343EXPORT_SYMBOL_GPL(system_long_wq);
344struct workqueue_struct *system_unbound_wq __read_mostly;
345EXPORT_SYMBOL_GPL(system_unbound_wq);
346struct workqueue_struct *system_freezable_wq __read_mostly;
347EXPORT_SYMBOL_GPL(system_freezable_wq);
348struct workqueue_struct *system_power_efficient_wq __read_mostly;
349EXPORT_SYMBOL_GPL(system_power_efficient_wq);
350struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
351EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
352
353static int worker_thread(void *__worker);
354static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
355
356#define CREATE_TRACE_POINTS
357#include <trace/events/workqueue.h>
358
359#define assert_rcu_or_pool_mutex() \
360 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
361 !lockdep_is_held(&wq_pool_mutex), \
362 "sched RCU or wq_pool_mutex should be held")
363
364#define assert_rcu_or_wq_mutex(wq) \
365 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
366 !lockdep_is_held(&wq->mutex), \
367 "sched RCU or wq->mutex should be held")
368
369#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
370 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
371 !lockdep_is_held(&wq->mutex) && \
372 !lockdep_is_held(&wq_pool_mutex), \
373 "sched RCU, wq->mutex or wq_pool_mutex should be held")
374
375#define for_each_cpu_worker_pool(pool, cpu) \
376 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
377 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
378 (pool)++)
379
380
381
382
383
384
385
386
387
388
389
390
391
392#define for_each_pool(pool, pi) \
393 idr_for_each_entry(&worker_pool_idr, pool, pi) \
394 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
395 else
396
397
398
399
400
401
402
403
404
405
406
407#define for_each_pool_worker(worker, pool) \
408 list_for_each_entry((worker), &(pool)->workers, node) \
409 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
410 else
411
412
413
414
415
416
417
418
419
420
421
422
423
424#define for_each_pwq(pwq, wq) \
425 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
426 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
427 else
428
429#ifdef CONFIG_DEBUG_OBJECTS_WORK
430
431static struct debug_obj_descr work_debug_descr;
432
433static void *work_debug_hint(void *addr)
434{
435 return ((struct work_struct *) addr)->func;
436}
437
438static bool work_is_static_object(void *addr)
439{
440 struct work_struct *work = addr;
441
442 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
443}
444
445
446
447
448
449static bool work_fixup_init(void *addr, enum debug_obj_state state)
450{
451 struct work_struct *work = addr;
452
453 switch (state) {
454 case ODEBUG_STATE_ACTIVE:
455 cancel_work_sync(work);
456 debug_object_init(work, &work_debug_descr);
457 return true;
458 default:
459 return false;
460 }
461}
462
463
464
465
466
467static bool work_fixup_free(void *addr, enum debug_obj_state state)
468{
469 struct work_struct *work = addr;
470
471 switch (state) {
472 case ODEBUG_STATE_ACTIVE:
473 cancel_work_sync(work);
474 debug_object_free(work, &work_debug_descr);
475 return true;
476 default:
477 return false;
478 }
479}
480
481static struct debug_obj_descr work_debug_descr = {
482 .name = "work_struct",
483 .debug_hint = work_debug_hint,
484 .is_static_object = work_is_static_object,
485 .fixup_init = work_fixup_init,
486 .fixup_free = work_fixup_free,
487};
488
489static inline void debug_work_activate(struct work_struct *work)
490{
491 debug_object_activate(work, &work_debug_descr);
492}
493
494static inline void debug_work_deactivate(struct work_struct *work)
495{
496 debug_object_deactivate(work, &work_debug_descr);
497}
498
499void __init_work(struct work_struct *work, int onstack)
500{
501 if (onstack)
502 debug_object_init_on_stack(work, &work_debug_descr);
503 else
504 debug_object_init(work, &work_debug_descr);
505}
506EXPORT_SYMBOL_GPL(__init_work);
507
508void destroy_work_on_stack(struct work_struct *work)
509{
510 debug_object_free(work, &work_debug_descr);
511}
512EXPORT_SYMBOL_GPL(destroy_work_on_stack);
513
514void destroy_delayed_work_on_stack(struct delayed_work *work)
515{
516 destroy_timer_on_stack(&work->timer);
517 debug_object_free(&work->work, &work_debug_descr);
518}
519EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
520
521#else
522static inline void debug_work_activate(struct work_struct *work) { }
523static inline void debug_work_deactivate(struct work_struct *work) { }
524#endif
525
526
527
528
529
530
531
532
533static int worker_pool_assign_id(struct worker_pool *pool)
534{
535 int ret;
536
537 lockdep_assert_held(&wq_pool_mutex);
538
539 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
540 GFP_KERNEL);
541 if (ret >= 0) {
542 pool->id = ret;
543 return 0;
544 }
545 return ret;
546}
547
548
549
550
551
552
553
554
555
556
557
558
559
560static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
561 int node)
562{
563 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
564
565
566
567
568
569
570
571 if (unlikely(node == NUMA_NO_NODE))
572 return wq->dfl_pwq;
573
574 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
575}
576
577static unsigned int work_color_to_flags(int color)
578{
579 return color << WORK_STRUCT_COLOR_SHIFT;
580}
581
582static int get_work_color(struct work_struct *work)
583{
584 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
585 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
586}
587
588static int work_next_color(int color)
589{
590 return (color + 1) % WORK_NR_COLORS;
591}
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613static inline void set_work_data(struct work_struct *work, unsigned long data,
614 unsigned long flags)
615{
616 WARN_ON_ONCE(!work_pending(work));
617 atomic_long_set(&work->data, data | flags | work_static(work));
618}
619
620static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
621 unsigned long extra_flags)
622{
623 set_work_data(work, (unsigned long)pwq,
624 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
625}
626
627static void set_work_pool_and_keep_pending(struct work_struct *work,
628 int pool_id)
629{
630 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
631 WORK_STRUCT_PENDING);
632}
633
634static void set_work_pool_and_clear_pending(struct work_struct *work,
635 int pool_id)
636{
637
638
639
640
641
642
643 smp_wmb();
644 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673 smp_mb();
674}
675
676static void clear_work_data(struct work_struct *work)
677{
678 smp_wmb();
679 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
680}
681
682static struct pool_workqueue *get_work_pwq(struct work_struct *work)
683{
684 unsigned long data = atomic_long_read(&work->data);
685
686 if (data & WORK_STRUCT_PWQ)
687 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
688 else
689 return NULL;
690}
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static struct worker_pool *get_work_pool(struct work_struct *work)
708{
709 unsigned long data = atomic_long_read(&work->data);
710 int pool_id;
711
712 assert_rcu_or_pool_mutex();
713
714 if (data & WORK_STRUCT_PWQ)
715 return ((struct pool_workqueue *)
716 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
717
718 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
719 if (pool_id == WORK_OFFQ_POOL_NONE)
720 return NULL;
721
722 return idr_find(&worker_pool_idr, pool_id);
723}
724
725
726
727
728
729
730
731
732static int get_work_pool_id(struct work_struct *work)
733{
734 unsigned long data = atomic_long_read(&work->data);
735
736 if (data & WORK_STRUCT_PWQ)
737 return ((struct pool_workqueue *)
738 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
739
740 return data >> WORK_OFFQ_POOL_SHIFT;
741}
742
743static void mark_work_canceling(struct work_struct *work)
744{
745 unsigned long pool_id = get_work_pool_id(work);
746
747 pool_id <<= WORK_OFFQ_POOL_SHIFT;
748 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
749}
750
751static bool work_is_canceling(struct work_struct *work)
752{
753 unsigned long data = atomic_long_read(&work->data);
754
755 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
756}
757
758
759
760
761
762
763
764static bool __need_more_worker(struct worker_pool *pool)
765{
766 return !atomic_read(&pool->nr_running);
767}
768
769
770
771
772
773
774
775
776
777static bool need_more_worker(struct worker_pool *pool)
778{
779 return !list_empty(&pool->worklist) && __need_more_worker(pool);
780}
781
782
783static bool may_start_working(struct worker_pool *pool)
784{
785 return pool->nr_idle;
786}
787
788
789static bool keep_working(struct worker_pool *pool)
790{
791 return !list_empty(&pool->worklist) &&
792 atomic_read(&pool->nr_running) <= 1;
793}
794
795
796static bool need_to_create_worker(struct worker_pool *pool)
797{
798 return need_more_worker(pool) && !may_start_working(pool);
799}
800
801
802static bool too_many_workers(struct worker_pool *pool)
803{
804 bool managing = mutex_is_locked(&pool->manager_arb);
805 int nr_idle = pool->nr_idle + managing;
806 int nr_busy = pool->nr_workers - nr_idle;
807
808 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
809}
810
811
812
813
814
815
816static struct worker *first_idle_worker(struct worker_pool *pool)
817{
818 if (unlikely(list_empty(&pool->idle_list)))
819 return NULL;
820
821 return list_first_entry(&pool->idle_list, struct worker, entry);
822}
823
824
825
826
827
828
829
830
831
832
833static void wake_up_worker(struct worker_pool *pool)
834{
835 struct worker *worker = first_idle_worker(pool);
836
837 if (likely(worker))
838 wake_up_process(worker->task);
839}
840
841
842
843
844
845
846
847
848
849
850
851
852void wq_worker_waking_up(struct task_struct *task, int cpu)
853{
854 struct worker *worker = kthread_data(task);
855
856 if (!(worker->flags & WORKER_NOT_RUNNING)) {
857 WARN_ON_ONCE(worker->pool->cpu != cpu);
858 atomic_inc(&worker->pool->nr_running);
859 }
860}
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876struct task_struct *wq_worker_sleeping(struct task_struct *task)
877{
878 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
879 struct worker_pool *pool;
880
881
882
883
884
885
886 if (worker->flags & WORKER_NOT_RUNNING)
887 return NULL;
888
889 pool = worker->pool;
890
891
892 if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
893 return NULL;
894
895
896
897
898
899
900
901
902
903
904
905
906 if (atomic_dec_and_test(&pool->nr_running) &&
907 !list_empty(&pool->worklist))
908 to_wakeup = first_idle_worker(pool);
909 return to_wakeup ? to_wakeup->task : NULL;
910}
911
912
913
914
915
916
917
918
919
920
921
922static inline void worker_set_flags(struct worker *worker, unsigned int flags)
923{
924 struct worker_pool *pool = worker->pool;
925
926 WARN_ON_ONCE(worker->task != current);
927
928
929 if ((flags & WORKER_NOT_RUNNING) &&
930 !(worker->flags & WORKER_NOT_RUNNING)) {
931 atomic_dec(&pool->nr_running);
932 }
933
934 worker->flags |= flags;
935}
936
937
938
939
940
941
942
943
944
945
946
947static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
948{
949 struct worker_pool *pool = worker->pool;
950 unsigned int oflags = worker->flags;
951
952 WARN_ON_ONCE(worker->task != current);
953
954 worker->flags &= ~flags;
955
956
957
958
959
960
961 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
962 if (!(worker->flags & WORKER_NOT_RUNNING))
963 atomic_inc(&pool->nr_running);
964}
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999static struct worker *find_worker_executing_work(struct worker_pool *pool,
1000 struct work_struct *work)
1001{
1002 struct worker *worker;
1003
1004 hash_for_each_possible(pool->busy_hash, worker, hentry,
1005 (unsigned long)work)
1006 if (worker->current_work == work &&
1007 worker->current_func == work->func)
1008 return worker;
1009
1010 return NULL;
1011}
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030static void move_linked_works(struct work_struct *work, struct list_head *head,
1031 struct work_struct **nextp)
1032{
1033 struct work_struct *n;
1034
1035
1036
1037
1038
1039 list_for_each_entry_safe_from(work, n, NULL, entry) {
1040 list_move_tail(&work->entry, head);
1041 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1042 break;
1043 }
1044
1045
1046
1047
1048
1049
1050 if (nextp)
1051 *nextp = n;
1052}
1053
1054
1055
1056
1057
1058
1059
1060
1061static void get_pwq(struct pool_workqueue *pwq)
1062{
1063 lockdep_assert_held(&pwq->pool->lock);
1064 WARN_ON_ONCE(pwq->refcnt <= 0);
1065 pwq->refcnt++;
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075static void put_pwq(struct pool_workqueue *pwq)
1076{
1077 lockdep_assert_held(&pwq->pool->lock);
1078 if (likely(--pwq->refcnt))
1079 return;
1080 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1081 return;
1082
1083
1084
1085
1086
1087
1088
1089
1090 schedule_work(&pwq->unbound_release_work);
1091}
1092
1093
1094
1095
1096
1097
1098
1099static void put_pwq_unlocked(struct pool_workqueue *pwq)
1100{
1101 if (pwq) {
1102
1103
1104
1105
1106 spin_lock_irq(&pwq->pool->lock);
1107 put_pwq(pwq);
1108 spin_unlock_irq(&pwq->pool->lock);
1109 }
1110}
1111
1112static void pwq_activate_delayed_work(struct work_struct *work)
1113{
1114 struct pool_workqueue *pwq = get_work_pwq(work);
1115
1116 trace_workqueue_activate_work(work);
1117 if (list_empty(&pwq->pool->worklist))
1118 pwq->pool->watchdog_ts = jiffies;
1119 move_linked_works(work, &pwq->pool->worklist, NULL);
1120 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1121 pwq->nr_active++;
1122}
1123
1124static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1125{
1126 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1127 struct work_struct, entry);
1128
1129 pwq_activate_delayed_work(work);
1130}
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1144{
1145
1146 if (color == WORK_NO_COLOR)
1147 goto out_put;
1148
1149 pwq->nr_in_flight[color]--;
1150
1151 pwq->nr_active--;
1152 if (!list_empty(&pwq->delayed_works)) {
1153
1154 if (pwq->nr_active < pwq->max_active)
1155 pwq_activate_first_delayed(pwq);
1156 }
1157
1158
1159 if (likely(pwq->flush_color != color))
1160 goto out_put;
1161
1162
1163 if (pwq->nr_in_flight[color])
1164 goto out_put;
1165
1166
1167 pwq->flush_color = -1;
1168
1169
1170
1171
1172
1173 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1174 complete(&pwq->wq->first_flusher->done);
1175out_put:
1176 put_pwq(pwq);
1177}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1207 unsigned long *flags)
1208{
1209 struct worker_pool *pool;
1210 struct pool_workqueue *pwq;
1211
1212 local_irq_save(*flags);
1213
1214
1215 if (is_dwork) {
1216 struct delayed_work *dwork = to_delayed_work(work);
1217
1218
1219
1220
1221
1222
1223 if (likely(del_timer(&dwork->timer)))
1224 return 1;
1225 }
1226
1227
1228 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1229 return 0;
1230
1231
1232
1233
1234
1235 pool = get_work_pool(work);
1236 if (!pool)
1237 goto fail;
1238
1239 spin_lock(&pool->lock);
1240
1241
1242
1243
1244
1245
1246
1247
1248 pwq = get_work_pwq(work);
1249 if (pwq && pwq->pool == pool) {
1250 debug_work_deactivate(work);
1251
1252
1253
1254
1255
1256
1257
1258
1259 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1260 pwq_activate_delayed_work(work);
1261
1262 list_del_init(&work->entry);
1263 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1264
1265
1266 set_work_pool_and_keep_pending(work, pool->id);
1267
1268 spin_unlock(&pool->lock);
1269 return 1;
1270 }
1271 spin_unlock(&pool->lock);
1272fail:
1273 local_irq_restore(*flags);
1274 if (work_is_canceling(work))
1275 return -ENOENT;
1276 cpu_relax();
1277 return -EAGAIN;
1278}
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1294 struct list_head *head, unsigned int extra_flags)
1295{
1296 struct worker_pool *pool = pwq->pool;
1297
1298
1299 set_work_pwq(work, pwq, extra_flags);
1300 list_add_tail(&work->entry, head);
1301 get_pwq(pwq);
1302
1303
1304
1305
1306
1307
1308 smp_mb();
1309
1310 if (__need_more_worker(pool))
1311 wake_up_worker(pool);
1312}
1313
1314
1315
1316
1317
1318static bool is_chained_work(struct workqueue_struct *wq)
1319{
1320 struct worker *worker;
1321
1322 worker = current_wq_worker();
1323
1324
1325
1326
1327 return worker && worker->current_pwq->wq == wq;
1328}
1329
1330
1331
1332
1333
1334
1335static int wq_select_unbound_cpu(int cpu)
1336{
1337 static bool printed_dbg_warning;
1338 int new_cpu;
1339
1340 if (likely(!wq_debug_force_rr_cpu)) {
1341 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1342 return cpu;
1343 } else if (!printed_dbg_warning) {
1344 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1345 printed_dbg_warning = true;
1346 }
1347
1348 if (cpumask_empty(wq_unbound_cpumask))
1349 return cpu;
1350
1351 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1352 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1353 if (unlikely(new_cpu >= nr_cpu_ids)) {
1354 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1355 if (unlikely(new_cpu >= nr_cpu_ids))
1356 return cpu;
1357 }
1358 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1359
1360 return new_cpu;
1361}
1362
1363static void __queue_work(int cpu, struct workqueue_struct *wq,
1364 struct work_struct *work)
1365{
1366 struct pool_workqueue *pwq;
1367 struct worker_pool *last_pool;
1368 struct list_head *worklist;
1369 unsigned int work_flags;
1370 unsigned int req_cpu = cpu;
1371
1372
1373
1374
1375
1376
1377
1378 WARN_ON_ONCE(!irqs_disabled());
1379
1380 debug_work_activate(work);
1381
1382
1383 if (unlikely(wq->flags & __WQ_DRAINING) &&
1384 WARN_ON_ONCE(!is_chained_work(wq)))
1385 return;
1386retry:
1387 if (req_cpu == WORK_CPU_UNBOUND)
1388 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1389
1390
1391 if (!(wq->flags & WQ_UNBOUND))
1392 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1393 else
1394 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1395
1396
1397
1398
1399
1400
1401 last_pool = get_work_pool(work);
1402 if (last_pool && last_pool != pwq->pool) {
1403 struct worker *worker;
1404
1405 spin_lock(&last_pool->lock);
1406
1407 worker = find_worker_executing_work(last_pool, work);
1408
1409 if (worker && worker->current_pwq->wq == wq) {
1410 pwq = worker->current_pwq;
1411 } else {
1412
1413 spin_unlock(&last_pool->lock);
1414 spin_lock(&pwq->pool->lock);
1415 }
1416 } else {
1417 spin_lock(&pwq->pool->lock);
1418 }
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 if (unlikely(!pwq->refcnt)) {
1429 if (wq->flags & WQ_UNBOUND) {
1430 spin_unlock(&pwq->pool->lock);
1431 cpu_relax();
1432 goto retry;
1433 }
1434
1435 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1436 wq->name, cpu);
1437 }
1438
1439
1440 trace_workqueue_queue_work(req_cpu, pwq, work);
1441
1442 if (WARN_ON(!list_empty(&work->entry))) {
1443 spin_unlock(&pwq->pool->lock);
1444 return;
1445 }
1446
1447 pwq->nr_in_flight[pwq->work_color]++;
1448 work_flags = work_color_to_flags(pwq->work_color);
1449
1450 if (likely(pwq->nr_active < pwq->max_active)) {
1451 trace_workqueue_activate_work(work);
1452 pwq->nr_active++;
1453 worklist = &pwq->pool->worklist;
1454 if (list_empty(worklist))
1455 pwq->pool->watchdog_ts = jiffies;
1456 } else {
1457 work_flags |= WORK_STRUCT_DELAYED;
1458 worklist = &pwq->delayed_works;
1459 }
1460
1461 insert_work(pwq, work, worklist, work_flags);
1462
1463 spin_unlock(&pwq->pool->lock);
1464}
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477bool queue_work_on(int cpu, struct workqueue_struct *wq,
1478 struct work_struct *work)
1479{
1480 bool ret = false;
1481 unsigned long flags;
1482
1483 local_irq_save(flags);
1484
1485 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1486 __queue_work(cpu, wq, work);
1487 ret = true;
1488 }
1489
1490 local_irq_restore(flags);
1491 return ret;
1492}
1493EXPORT_SYMBOL(queue_work_on);
1494
1495void delayed_work_timer_fn(unsigned long __data)
1496{
1497 struct delayed_work *dwork = (struct delayed_work *)__data;
1498
1499
1500 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1501}
1502EXPORT_SYMBOL(delayed_work_timer_fn);
1503
1504static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1505 struct delayed_work *dwork, unsigned long delay)
1506{
1507 struct timer_list *timer = &dwork->timer;
1508 struct work_struct *work = &dwork->work;
1509
1510 WARN_ON_ONCE(!wq);
1511 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1512 timer->data != (unsigned long)dwork);
1513 WARN_ON_ONCE(timer_pending(timer));
1514 WARN_ON_ONCE(!list_empty(&work->entry));
1515
1516
1517
1518
1519
1520
1521
1522 if (!delay) {
1523 __queue_work(cpu, wq, &dwork->work);
1524 return;
1525 }
1526
1527 dwork->wq = wq;
1528 dwork->cpu = cpu;
1529 timer->expires = jiffies + delay;
1530
1531 if (unlikely(cpu != WORK_CPU_UNBOUND))
1532 add_timer_on(timer, cpu);
1533 else
1534 add_timer(timer);
1535}
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1549 struct delayed_work *dwork, unsigned long delay)
1550{
1551 struct work_struct *work = &dwork->work;
1552 bool ret = false;
1553 unsigned long flags;
1554
1555
1556 local_irq_save(flags);
1557
1558 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1559 __queue_delayed_work(cpu, wq, dwork, delay);
1560 ret = true;
1561 }
1562
1563 local_irq_restore(flags);
1564 return ret;
1565}
1566EXPORT_SYMBOL(queue_delayed_work_on);
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1587 struct delayed_work *dwork, unsigned long delay)
1588{
1589 unsigned long flags;
1590 int ret;
1591
1592 do {
1593 ret = try_to_grab_pending(&dwork->work, true, &flags);
1594 } while (unlikely(ret == -EAGAIN));
1595
1596 if (likely(ret >= 0)) {
1597 __queue_delayed_work(cpu, wq, dwork, delay);
1598 local_irq_restore(flags);
1599 }
1600
1601
1602 return ret;
1603}
1604EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616static void worker_enter_idle(struct worker *worker)
1617{
1618 struct worker_pool *pool = worker->pool;
1619
1620 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1621 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1622 (worker->hentry.next || worker->hentry.pprev)))
1623 return;
1624
1625
1626 worker->flags |= WORKER_IDLE;
1627 pool->nr_idle++;
1628 worker->last_active = jiffies;
1629
1630
1631 list_add(&worker->entry, &pool->idle_list);
1632
1633 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1634 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1635
1636
1637
1638
1639
1640
1641
1642 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1643 pool->nr_workers == pool->nr_idle &&
1644 atomic_read(&pool->nr_running));
1645}
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656static void worker_leave_idle(struct worker *worker)
1657{
1658 struct worker_pool *pool = worker->pool;
1659
1660 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1661 return;
1662 worker_clr_flags(worker, WORKER_IDLE);
1663 pool->nr_idle--;
1664 list_del_init(&worker->entry);
1665}
1666
1667static struct worker *alloc_worker(int node)
1668{
1669 struct worker *worker;
1670
1671 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1672 if (worker) {
1673 INIT_LIST_HEAD(&worker->entry);
1674 INIT_LIST_HEAD(&worker->scheduled);
1675 INIT_LIST_HEAD(&worker->node);
1676
1677 worker->flags = WORKER_PREP;
1678 }
1679 return worker;
1680}
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691static void worker_attach_to_pool(struct worker *worker,
1692 struct worker_pool *pool)
1693{
1694 mutex_lock(&pool->attach_mutex);
1695
1696
1697
1698
1699
1700 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1701
1702
1703
1704
1705
1706
1707 if (pool->flags & POOL_DISASSOCIATED)
1708 worker->flags |= WORKER_UNBOUND;
1709
1710 list_add_tail(&worker->node, &pool->workers);
1711
1712 mutex_unlock(&pool->attach_mutex);
1713}
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724static void worker_detach_from_pool(struct worker *worker,
1725 struct worker_pool *pool)
1726{
1727 struct completion *detach_completion = NULL;
1728
1729 mutex_lock(&pool->attach_mutex);
1730 list_del(&worker->node);
1731 if (list_empty(&pool->workers))
1732 detach_completion = pool->detach_completion;
1733 mutex_unlock(&pool->attach_mutex);
1734
1735
1736 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1737
1738 if (detach_completion)
1739 complete(detach_completion);
1740}
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754static struct worker *create_worker(struct worker_pool *pool)
1755{
1756 struct worker *worker = NULL;
1757 int id = -1;
1758 char id_buf[16];
1759
1760
1761 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1762 if (id < 0)
1763 goto fail;
1764
1765 worker = alloc_worker(pool->node);
1766 if (!worker)
1767 goto fail;
1768
1769 worker->pool = pool;
1770 worker->id = id;
1771
1772 if (pool->cpu >= 0)
1773 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1774 pool->attrs->nice < 0 ? "H" : "");
1775 else
1776 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1777
1778 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1779 "kworker/%s", id_buf);
1780 if (IS_ERR(worker->task))
1781 goto fail;
1782
1783 set_user_nice(worker->task, pool->attrs->nice);
1784 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1785
1786
1787 worker_attach_to_pool(worker, pool);
1788
1789
1790 spin_lock_irq(&pool->lock);
1791 worker->pool->nr_workers++;
1792 worker_enter_idle(worker);
1793 wake_up_process(worker->task);
1794 spin_unlock_irq(&pool->lock);
1795
1796 return worker;
1797
1798fail:
1799 if (id >= 0)
1800 ida_simple_remove(&pool->worker_ida, id);
1801 kfree(worker);
1802 return NULL;
1803}
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815static void destroy_worker(struct worker *worker)
1816{
1817 struct worker_pool *pool = worker->pool;
1818
1819 lockdep_assert_held(&pool->lock);
1820
1821
1822 if (WARN_ON(worker->current_work) ||
1823 WARN_ON(!list_empty(&worker->scheduled)) ||
1824 WARN_ON(!(worker->flags & WORKER_IDLE)))
1825 return;
1826
1827 pool->nr_workers--;
1828 pool->nr_idle--;
1829
1830 list_del_init(&worker->entry);
1831 worker->flags |= WORKER_DIE;
1832 wake_up_process(worker->task);
1833}
1834
1835static void idle_worker_timeout(unsigned long __pool)
1836{
1837 struct worker_pool *pool = (void *)__pool;
1838
1839 spin_lock_irq(&pool->lock);
1840
1841 while (too_many_workers(pool)) {
1842 struct worker *worker;
1843 unsigned long expires;
1844
1845
1846 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1847 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1848
1849 if (time_before(jiffies, expires)) {
1850 mod_timer(&pool->idle_timer, expires);
1851 break;
1852 }
1853
1854 destroy_worker(worker);
1855 }
1856
1857 spin_unlock_irq(&pool->lock);
1858}
1859
1860static void send_mayday(struct work_struct *work)
1861{
1862 struct pool_workqueue *pwq = get_work_pwq(work);
1863 struct workqueue_struct *wq = pwq->wq;
1864
1865 lockdep_assert_held(&wq_mayday_lock);
1866
1867 if (!wq->rescuer)
1868 return;
1869
1870
1871 if (list_empty(&pwq->mayday_node)) {
1872
1873
1874
1875
1876
1877 get_pwq(pwq);
1878 list_add_tail(&pwq->mayday_node, &wq->maydays);
1879 wake_up_process(wq->rescuer->task);
1880 }
1881}
1882
1883static void pool_mayday_timeout(unsigned long __pool)
1884{
1885 struct worker_pool *pool = (void *)__pool;
1886 struct work_struct *work;
1887
1888 spin_lock_irq(&pool->lock);
1889 spin_lock(&wq_mayday_lock);
1890
1891 if (need_to_create_worker(pool)) {
1892
1893
1894
1895
1896
1897
1898 list_for_each_entry(work, &pool->worklist, entry)
1899 send_mayday(work);
1900 }
1901
1902 spin_unlock(&wq_mayday_lock);
1903 spin_unlock_irq(&pool->lock);
1904
1905 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1906}
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926static void maybe_create_worker(struct worker_pool *pool)
1927__releases(&pool->lock)
1928__acquires(&pool->lock)
1929{
1930restart:
1931 spin_unlock_irq(&pool->lock);
1932
1933
1934 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1935
1936 while (true) {
1937 if (create_worker(pool) || !need_to_create_worker(pool))
1938 break;
1939
1940 schedule_timeout_interruptible(CREATE_COOLDOWN);
1941
1942 if (!need_to_create_worker(pool))
1943 break;
1944 }
1945
1946 del_timer_sync(&pool->mayday_timer);
1947 spin_lock_irq(&pool->lock);
1948
1949
1950
1951
1952
1953 if (need_to_create_worker(pool))
1954 goto restart;
1955}
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979static bool manage_workers(struct worker *worker)
1980{
1981 struct worker_pool *pool = worker->pool;
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993 if (!mutex_trylock(&pool->manager_arb))
1994 return false;
1995 pool->manager = worker;
1996
1997 maybe_create_worker(pool);
1998
1999 pool->manager = NULL;
2000 mutex_unlock(&pool->manager_arb);
2001 return true;
2002}
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018static void process_one_work(struct worker *worker, struct work_struct *work)
2019__releases(&pool->lock)
2020__acquires(&pool->lock)
2021{
2022 struct pool_workqueue *pwq = get_work_pwq(work);
2023 struct worker_pool *pool = worker->pool;
2024 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2025 int work_color;
2026 struct worker *collision;
2027#ifdef CONFIG_LOCKDEP
2028
2029
2030
2031
2032
2033
2034
2035 struct lockdep_map lockdep_map;
2036
2037 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2038#endif
2039
2040 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2041 raw_smp_processor_id() != pool->cpu);
2042
2043
2044
2045
2046
2047
2048
2049 collision = find_worker_executing_work(pool, work);
2050 if (unlikely(collision)) {
2051 move_linked_works(work, &collision->scheduled, NULL);
2052 return;
2053 }
2054
2055
2056 debug_work_deactivate(work);
2057 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2058 worker->current_work = work;
2059 worker->current_func = work->func;
2060 worker->current_pwq = pwq;
2061 work_color = get_work_color(work);
2062
2063 list_del_init(&work->entry);
2064
2065
2066
2067
2068
2069
2070
2071 if (unlikely(cpu_intensive))
2072 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2073
2074
2075
2076
2077
2078
2079
2080
2081 if (need_more_worker(pool))
2082 wake_up_worker(pool);
2083
2084
2085
2086
2087
2088
2089
2090 set_work_pool_and_clear_pending(work, pool->id);
2091
2092 spin_unlock_irq(&pool->lock);
2093
2094 lock_map_acquire_read(&pwq->wq->lockdep_map);
2095 lock_map_acquire(&lockdep_map);
2096 trace_workqueue_execute_start(work);
2097 worker->current_func(work);
2098
2099
2100
2101
2102 trace_workqueue_execute_end(work);
2103 lock_map_release(&lockdep_map);
2104 lock_map_release(&pwq->wq->lockdep_map);
2105
2106 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2107 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2108 " last function: %pf\n",
2109 current->comm, preempt_count(), task_pid_nr(current),
2110 worker->current_func);
2111 debug_show_held_locks(current);
2112 dump_stack();
2113 }
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123 cond_resched_rcu_qs();
2124
2125 spin_lock_irq(&pool->lock);
2126
2127
2128 if (unlikely(cpu_intensive))
2129 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2130
2131
2132 hash_del(&worker->hentry);
2133 worker->current_work = NULL;
2134 worker->current_func = NULL;
2135 worker->current_pwq = NULL;
2136 worker->desc_valid = false;
2137 pwq_dec_nr_in_flight(pwq, work_color);
2138}
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152static void process_scheduled_works(struct worker *worker)
2153{
2154 while (!list_empty(&worker->scheduled)) {
2155 struct work_struct *work = list_first_entry(&worker->scheduled,
2156 struct work_struct, entry);
2157 process_one_work(worker, work);
2158 }
2159}
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173static int worker_thread(void *__worker)
2174{
2175 struct worker *worker = __worker;
2176 struct worker_pool *pool = worker->pool;
2177
2178
2179 worker->task->flags |= PF_WQ_WORKER;
2180woke_up:
2181 spin_lock_irq(&pool->lock);
2182
2183
2184 if (unlikely(worker->flags & WORKER_DIE)) {
2185 spin_unlock_irq(&pool->lock);
2186 WARN_ON_ONCE(!list_empty(&worker->entry));
2187 worker->task->flags &= ~PF_WQ_WORKER;
2188
2189 set_task_comm(worker->task, "kworker/dying");
2190 ida_simple_remove(&pool->worker_ida, worker->id);
2191 worker_detach_from_pool(worker, pool);
2192 kfree(worker);
2193 return 0;
2194 }
2195
2196 worker_leave_idle(worker);
2197recheck:
2198
2199 if (!need_more_worker(pool))
2200 goto sleep;
2201
2202
2203 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2204 goto recheck;
2205
2206
2207
2208
2209
2210
2211 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2212
2213
2214
2215
2216
2217
2218
2219
2220 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2221
2222 do {
2223 struct work_struct *work =
2224 list_first_entry(&pool->worklist,
2225 struct work_struct, entry);
2226
2227 pool->watchdog_ts = jiffies;
2228
2229 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2230
2231 process_one_work(worker, work);
2232 if (unlikely(!list_empty(&worker->scheduled)))
2233 process_scheduled_works(worker);
2234 } else {
2235 move_linked_works(work, &worker->scheduled, NULL);
2236 process_scheduled_works(worker);
2237 }
2238 } while (keep_working(pool));
2239
2240 worker_set_flags(worker, WORKER_PREP);
2241sleep:
2242
2243
2244
2245
2246
2247
2248
2249 worker_enter_idle(worker);
2250 __set_current_state(TASK_INTERRUPTIBLE);
2251 spin_unlock_irq(&pool->lock);
2252 schedule();
2253 goto woke_up;
2254}
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277static int rescuer_thread(void *__rescuer)
2278{
2279 struct worker *rescuer = __rescuer;
2280 struct workqueue_struct *wq = rescuer->rescue_wq;
2281 struct list_head *scheduled = &rescuer->scheduled;
2282 bool should_stop;
2283
2284 set_user_nice(current, RESCUER_NICE_LEVEL);
2285
2286
2287
2288
2289
2290 rescuer->task->flags |= PF_WQ_WORKER;
2291repeat:
2292 set_current_state(TASK_INTERRUPTIBLE);
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302 should_stop = kthread_should_stop();
2303
2304
2305 spin_lock_irq(&wq_mayday_lock);
2306
2307 while (!list_empty(&wq->maydays)) {
2308 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2309 struct pool_workqueue, mayday_node);
2310 struct worker_pool *pool = pwq->pool;
2311 struct work_struct *work, *n;
2312 bool first = true;
2313
2314 __set_current_state(TASK_RUNNING);
2315 list_del_init(&pwq->mayday_node);
2316
2317 spin_unlock_irq(&wq_mayday_lock);
2318
2319 worker_attach_to_pool(rescuer, pool);
2320
2321 spin_lock_irq(&pool->lock);
2322 rescuer->pool = pool;
2323
2324
2325
2326
2327
2328 WARN_ON_ONCE(!list_empty(scheduled));
2329 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2330 if (get_work_pwq(work) == pwq) {
2331 if (first)
2332 pool->watchdog_ts = jiffies;
2333 move_linked_works(work, scheduled, &n);
2334 }
2335 first = false;
2336 }
2337
2338 if (!list_empty(scheduled)) {
2339 process_scheduled_works(rescuer);
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350 if (need_to_create_worker(pool)) {
2351 spin_lock(&wq_mayday_lock);
2352 get_pwq(pwq);
2353 list_move_tail(&pwq->mayday_node, &wq->maydays);
2354 spin_unlock(&wq_mayday_lock);
2355 }
2356 }
2357
2358
2359
2360
2361
2362 put_pwq(pwq);
2363
2364
2365
2366
2367
2368
2369 if (need_more_worker(pool))
2370 wake_up_worker(pool);
2371
2372 rescuer->pool = NULL;
2373 spin_unlock_irq(&pool->lock);
2374
2375 worker_detach_from_pool(rescuer, pool);
2376
2377 spin_lock_irq(&wq_mayday_lock);
2378 }
2379
2380 spin_unlock_irq(&wq_mayday_lock);
2381
2382 if (should_stop) {
2383 __set_current_state(TASK_RUNNING);
2384 rescuer->task->flags &= ~PF_WQ_WORKER;
2385 return 0;
2386 }
2387
2388
2389 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2390 schedule();
2391 goto repeat;
2392}
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405static void check_flush_dependency(struct workqueue_struct *target_wq,
2406 struct work_struct *target_work)
2407{
2408 work_func_t target_func = target_work ? target_work->func : NULL;
2409 struct worker *worker;
2410
2411 if (target_wq->flags & WQ_MEM_RECLAIM)
2412 return;
2413
2414 worker = current_wq_worker();
2415
2416 WARN_ONCE(current->flags & PF_MEMALLOC,
2417 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2418 current->pid, current->comm, target_wq->name, target_func);
2419 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2420 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2421 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2422 worker->current_pwq->wq->name, worker->current_func,
2423 target_wq->name, target_func);
2424}
2425
2426struct wq_barrier {
2427 struct work_struct work;
2428 struct completion done;
2429 struct task_struct *task;
2430};
2431
2432static void wq_barrier_func(struct work_struct *work)
2433{
2434 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2435 complete(&barr->done);
2436}
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462static void insert_wq_barrier(struct pool_workqueue *pwq,
2463 struct wq_barrier *barr,
2464 struct work_struct *target, struct worker *worker)
2465{
2466 struct list_head *head;
2467 unsigned int linked = 0;
2468
2469
2470
2471
2472
2473
2474
2475 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2476 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2477 init_completion(&barr->done);
2478 barr->task = current;
2479
2480
2481
2482
2483
2484 if (worker)
2485 head = worker->scheduled.next;
2486 else {
2487 unsigned long *bits = work_data_bits(target);
2488
2489 head = target->entry.next;
2490
2491 linked = *bits & WORK_STRUCT_LINKED;
2492 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2493 }
2494
2495 debug_work_activate(&barr->work);
2496 insert_work(pwq, &barr->work, head,
2497 work_color_to_flags(WORK_NO_COLOR) | linked);
2498}
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2532 int flush_color, int work_color)
2533{
2534 bool wait = false;
2535 struct pool_workqueue *pwq;
2536
2537 if (flush_color >= 0) {
2538 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2539 atomic_set(&wq->nr_pwqs_to_flush, 1);
2540 }
2541
2542 for_each_pwq(pwq, wq) {
2543 struct worker_pool *pool = pwq->pool;
2544
2545 spin_lock_irq(&pool->lock);
2546
2547 if (flush_color >= 0) {
2548 WARN_ON_ONCE(pwq->flush_color != -1);
2549
2550 if (pwq->nr_in_flight[flush_color]) {
2551 pwq->flush_color = flush_color;
2552 atomic_inc(&wq->nr_pwqs_to_flush);
2553 wait = true;
2554 }
2555 }
2556
2557 if (work_color >= 0) {
2558 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2559 pwq->work_color = work_color;
2560 }
2561
2562 spin_unlock_irq(&pool->lock);
2563 }
2564
2565 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2566 complete(&wq->first_flusher->done);
2567
2568 return wait;
2569}
2570
2571
2572
2573
2574
2575
2576
2577
2578void flush_workqueue(struct workqueue_struct *wq)
2579{
2580 struct wq_flusher this_flusher = {
2581 .list = LIST_HEAD_INIT(this_flusher.list),
2582 .flush_color = -1,
2583 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2584 };
2585 int next_color;
2586
2587 if (WARN_ON(!wq_online))
2588 return;
2589
2590 lock_map_acquire(&wq->lockdep_map);
2591 lock_map_release(&wq->lockdep_map);
2592
2593 mutex_lock(&wq->mutex);
2594
2595
2596
2597
2598 next_color = work_next_color(wq->work_color);
2599
2600 if (next_color != wq->flush_color) {
2601
2602
2603
2604
2605
2606 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2607 this_flusher.flush_color = wq->work_color;
2608 wq->work_color = next_color;
2609
2610 if (!wq->first_flusher) {
2611
2612 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2613
2614 wq->first_flusher = &this_flusher;
2615
2616 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2617 wq->work_color)) {
2618
2619 wq->flush_color = next_color;
2620 wq->first_flusher = NULL;
2621 goto out_unlock;
2622 }
2623 } else {
2624
2625 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2626 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2627 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2628 }
2629 } else {
2630
2631
2632
2633
2634
2635 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2636 }
2637
2638 check_flush_dependency(wq, NULL);
2639
2640 mutex_unlock(&wq->mutex);
2641
2642 wait_for_completion(&this_flusher.done);
2643
2644
2645
2646
2647
2648
2649
2650 if (wq->first_flusher != &this_flusher)
2651 return;
2652
2653 mutex_lock(&wq->mutex);
2654
2655
2656 if (wq->first_flusher != &this_flusher)
2657 goto out_unlock;
2658
2659 wq->first_flusher = NULL;
2660
2661 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2662 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2663
2664 while (true) {
2665 struct wq_flusher *next, *tmp;
2666
2667
2668 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2669 if (next->flush_color != wq->flush_color)
2670 break;
2671 list_del_init(&next->list);
2672 complete(&next->done);
2673 }
2674
2675 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2676 wq->flush_color != work_next_color(wq->work_color));
2677
2678
2679 wq->flush_color = work_next_color(wq->flush_color);
2680
2681
2682 if (!list_empty(&wq->flusher_overflow)) {
2683
2684
2685
2686
2687
2688
2689 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2690 tmp->flush_color = wq->work_color;
2691
2692 wq->work_color = work_next_color(wq->work_color);
2693
2694 list_splice_tail_init(&wq->flusher_overflow,
2695 &wq->flusher_queue);
2696 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2697 }
2698
2699 if (list_empty(&wq->flusher_queue)) {
2700 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2701 break;
2702 }
2703
2704
2705
2706
2707
2708 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2709 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2710
2711 list_del_init(&next->list);
2712 wq->first_flusher = next;
2713
2714 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2715 break;
2716
2717
2718
2719
2720
2721 wq->first_flusher = NULL;
2722 }
2723
2724out_unlock:
2725 mutex_unlock(&wq->mutex);
2726}
2727EXPORT_SYMBOL(flush_workqueue);
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740void drain_workqueue(struct workqueue_struct *wq)
2741{
2742 unsigned int flush_cnt = 0;
2743 struct pool_workqueue *pwq;
2744
2745
2746
2747
2748
2749
2750 mutex_lock(&wq->mutex);
2751 if (!wq->nr_drainers++)
2752 wq->flags |= __WQ_DRAINING;
2753 mutex_unlock(&wq->mutex);
2754reflush:
2755 flush_workqueue(wq);
2756
2757 mutex_lock(&wq->mutex);
2758
2759 for_each_pwq(pwq, wq) {
2760 bool drained;
2761
2762 spin_lock_irq(&pwq->pool->lock);
2763 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2764 spin_unlock_irq(&pwq->pool->lock);
2765
2766 if (drained)
2767 continue;
2768
2769 if (++flush_cnt == 10 ||
2770 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2771 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2772 wq->name, flush_cnt);
2773
2774 mutex_unlock(&wq->mutex);
2775 goto reflush;
2776 }
2777
2778 if (!--wq->nr_drainers)
2779 wq->flags &= ~__WQ_DRAINING;
2780 mutex_unlock(&wq->mutex);
2781}
2782EXPORT_SYMBOL_GPL(drain_workqueue);
2783
2784static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2785{
2786 struct worker *worker = NULL;
2787 struct worker_pool *pool;
2788 struct pool_workqueue *pwq;
2789
2790 might_sleep();
2791
2792 local_irq_disable();
2793 pool = get_work_pool(work);
2794 if (!pool) {
2795 local_irq_enable();
2796 return false;
2797 }
2798
2799 spin_lock(&pool->lock);
2800
2801 pwq = get_work_pwq(work);
2802 if (pwq) {
2803 if (unlikely(pwq->pool != pool))
2804 goto already_gone;
2805 } else {
2806 worker = find_worker_executing_work(pool, work);
2807 if (!worker)
2808 goto already_gone;
2809 pwq = worker->current_pwq;
2810 }
2811
2812 check_flush_dependency(pwq->wq, work);
2813
2814 insert_wq_barrier(pwq, barr, work, worker);
2815 spin_unlock_irq(&pool->lock);
2816
2817
2818
2819
2820
2821
2822
2823 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
2824 lock_map_acquire(&pwq->wq->lockdep_map);
2825 else
2826 lock_map_acquire_read(&pwq->wq->lockdep_map);
2827 lock_map_release(&pwq->wq->lockdep_map);
2828
2829 return true;
2830already_gone:
2831 spin_unlock_irq(&pool->lock);
2832 return false;
2833}
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846bool flush_work(struct work_struct *work)
2847{
2848 struct wq_barrier barr;
2849
2850 if (WARN_ON(!wq_online))
2851 return false;
2852
2853 lock_map_acquire(&work->lockdep_map);
2854 lock_map_release(&work->lockdep_map);
2855
2856 if (start_flush_work(work, &barr)) {
2857 wait_for_completion(&barr.done);
2858 destroy_work_on_stack(&barr.work);
2859 return true;
2860 } else {
2861 return false;
2862 }
2863}
2864EXPORT_SYMBOL_GPL(flush_work);
2865
2866struct cwt_wait {
2867 wait_queue_entry_t wait;
2868 struct work_struct *work;
2869};
2870
2871static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
2872{
2873 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
2874
2875 if (cwait->work != key)
2876 return 0;
2877 return autoremove_wake_function(wait, mode, sync, key);
2878}
2879
2880static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2881{
2882 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
2883 unsigned long flags;
2884 int ret;
2885
2886 do {
2887 ret = try_to_grab_pending(work, is_dwork, &flags);
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904 if (unlikely(ret == -ENOENT)) {
2905 struct cwt_wait cwait;
2906
2907 init_wait(&cwait.wait);
2908 cwait.wait.func = cwt_wakefn;
2909 cwait.work = work;
2910
2911 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
2912 TASK_UNINTERRUPTIBLE);
2913 if (work_is_canceling(work))
2914 schedule();
2915 finish_wait(&cancel_waitq, &cwait.wait);
2916 }
2917 } while (unlikely(ret < 0));
2918
2919
2920 mark_work_canceling(work);
2921 local_irq_restore(flags);
2922
2923
2924
2925
2926
2927 if (wq_online)
2928 flush_work(work);
2929
2930 clear_work_data(work);
2931
2932
2933
2934
2935
2936
2937 smp_mb();
2938 if (waitqueue_active(&cancel_waitq))
2939 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
2940
2941 return ret;
2942}
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962bool cancel_work_sync(struct work_struct *work)
2963{
2964 return __cancel_work_timer(work, false);
2965}
2966EXPORT_SYMBOL_GPL(cancel_work_sync);
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980bool flush_delayed_work(struct delayed_work *dwork)
2981{
2982 local_irq_disable();
2983 if (del_timer_sync(&dwork->timer))
2984 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
2985 local_irq_enable();
2986 return flush_work(&dwork->work);
2987}
2988EXPORT_SYMBOL(flush_delayed_work);
2989
2990static bool __cancel_work(struct work_struct *work, bool is_dwork)
2991{
2992 unsigned long flags;
2993 int ret;
2994
2995 do {
2996 ret = try_to_grab_pending(work, is_dwork, &flags);
2997 } while (unlikely(ret == -EAGAIN));
2998
2999 if (unlikely(ret < 0))
3000 return false;
3001
3002 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3003 local_irq_restore(flags);
3004 return ret;
3005}
3006
3007
3008
3009
3010bool cancel_work(struct work_struct *work)
3011{
3012 return __cancel_work(work, false);
3013}
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031bool cancel_delayed_work(struct delayed_work *dwork)
3032{
3033 return __cancel_work(&dwork->work, true);
3034}
3035EXPORT_SYMBOL(cancel_delayed_work);
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046bool cancel_delayed_work_sync(struct delayed_work *dwork)
3047{
3048 return __cancel_work_timer(&dwork->work, true);
3049}
3050EXPORT_SYMBOL(cancel_delayed_work_sync);
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063int schedule_on_each_cpu(work_func_t func)
3064{
3065 int cpu;
3066 struct work_struct __percpu *works;
3067
3068 works = alloc_percpu(struct work_struct);
3069 if (!works)
3070 return -ENOMEM;
3071
3072 get_online_cpus();
3073
3074 for_each_online_cpu(cpu) {
3075 struct work_struct *work = per_cpu_ptr(works, cpu);
3076
3077 INIT_WORK(work, func);
3078 schedule_work_on(cpu, work);
3079 }
3080
3081 for_each_online_cpu(cpu)
3082 flush_work(per_cpu_ptr(works, cpu));
3083
3084 put_online_cpus();
3085 free_percpu(works);
3086 return 0;
3087}
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3102{
3103 if (!in_interrupt()) {
3104 fn(&ew->work);
3105 return 0;
3106 }
3107
3108 INIT_WORK(&ew->work, fn);
3109 schedule_work(&ew->work);
3110
3111 return 1;
3112}
3113EXPORT_SYMBOL_GPL(execute_in_process_context);
3114
3115
3116
3117
3118
3119
3120
3121void free_workqueue_attrs(struct workqueue_attrs *attrs)
3122{
3123 if (attrs) {
3124 free_cpumask_var(attrs->cpumask);
3125 kfree(attrs);
3126 }
3127}
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3139{
3140 struct workqueue_attrs *attrs;
3141
3142 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3143 if (!attrs)
3144 goto fail;
3145 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3146 goto fail;
3147
3148 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3149 return attrs;
3150fail:
3151 free_workqueue_attrs(attrs);
3152 return NULL;
3153}
3154
3155static void copy_workqueue_attrs(struct workqueue_attrs *to,
3156 const struct workqueue_attrs *from)
3157{
3158 to->nice = from->nice;
3159 cpumask_copy(to->cpumask, from->cpumask);
3160
3161
3162
3163
3164
3165 to->no_numa = from->no_numa;
3166}
3167
3168
3169static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3170{
3171 u32 hash = 0;
3172
3173 hash = jhash_1word(attrs->nice, hash);
3174 hash = jhash(cpumask_bits(attrs->cpumask),
3175 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3176 return hash;
3177}
3178
3179
3180static bool wqattrs_equal(const struct workqueue_attrs *a,
3181 const struct workqueue_attrs *b)
3182{
3183 if (a->nice != b->nice)
3184 return false;
3185 if (!cpumask_equal(a->cpumask, b->cpumask))
3186 return false;
3187 return true;
3188}
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200static int init_worker_pool(struct worker_pool *pool)
3201{
3202 spin_lock_init(&pool->lock);
3203 pool->id = -1;
3204 pool->cpu = -1;
3205 pool->node = NUMA_NO_NODE;
3206 pool->flags |= POOL_DISASSOCIATED;
3207 pool->watchdog_ts = jiffies;
3208 INIT_LIST_HEAD(&pool->worklist);
3209 INIT_LIST_HEAD(&pool->idle_list);
3210 hash_init(pool->busy_hash);
3211
3212 setup_deferrable_timer(&pool->idle_timer, idle_worker_timeout,
3213 (unsigned long)pool);
3214
3215 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3216 (unsigned long)pool);
3217
3218 mutex_init(&pool->manager_arb);
3219 mutex_init(&pool->attach_mutex);
3220 INIT_LIST_HEAD(&pool->workers);
3221
3222 ida_init(&pool->worker_ida);
3223 INIT_HLIST_NODE(&pool->hash_node);
3224 pool->refcnt = 1;
3225
3226
3227 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3228 if (!pool->attrs)
3229 return -ENOMEM;
3230 return 0;
3231}
3232
3233static void rcu_free_wq(struct rcu_head *rcu)
3234{
3235 struct workqueue_struct *wq =
3236 container_of(rcu, struct workqueue_struct, rcu);
3237
3238 if (!(wq->flags & WQ_UNBOUND))
3239 free_percpu(wq->cpu_pwqs);
3240 else
3241 free_workqueue_attrs(wq->unbound_attrs);
3242
3243 kfree(wq->rescuer);
3244 kfree(wq);
3245}
3246
3247static void rcu_free_pool(struct rcu_head *rcu)
3248{
3249 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3250
3251 ida_destroy(&pool->worker_ida);
3252 free_workqueue_attrs(pool->attrs);
3253 kfree(pool);
3254}
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267static void put_unbound_pool(struct worker_pool *pool)
3268{
3269 DECLARE_COMPLETION_ONSTACK(detach_completion);
3270 struct worker *worker;
3271
3272 lockdep_assert_held(&wq_pool_mutex);
3273
3274 if (--pool->refcnt)
3275 return;
3276
3277
3278 if (WARN_ON(!(pool->cpu < 0)) ||
3279 WARN_ON(!list_empty(&pool->worklist)))
3280 return;
3281
3282
3283 if (pool->id >= 0)
3284 idr_remove(&worker_pool_idr, pool->id);
3285 hash_del(&pool->hash_node);
3286
3287
3288
3289
3290
3291
3292 mutex_lock(&pool->manager_arb);
3293
3294 spin_lock_irq(&pool->lock);
3295 while ((worker = first_idle_worker(pool)))
3296 destroy_worker(worker);
3297 WARN_ON(pool->nr_workers || pool->nr_idle);
3298 spin_unlock_irq(&pool->lock);
3299
3300 mutex_lock(&pool->attach_mutex);
3301 if (!list_empty(&pool->workers))
3302 pool->detach_completion = &detach_completion;
3303 mutex_unlock(&pool->attach_mutex);
3304
3305 if (pool->detach_completion)
3306 wait_for_completion(pool->detach_completion);
3307
3308 mutex_unlock(&pool->manager_arb);
3309
3310
3311 del_timer_sync(&pool->idle_timer);
3312 del_timer_sync(&pool->mayday_timer);
3313
3314
3315 call_rcu_sched(&pool->rcu, rcu_free_pool);
3316}
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3333{
3334 u32 hash = wqattrs_hash(attrs);
3335 struct worker_pool *pool;
3336 int node;
3337 int target_node = NUMA_NO_NODE;
3338
3339 lockdep_assert_held(&wq_pool_mutex);
3340
3341
3342 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3343 if (wqattrs_equal(pool->attrs, attrs)) {
3344 pool->refcnt++;
3345 return pool;
3346 }
3347 }
3348
3349
3350 if (wq_numa_enabled) {
3351 for_each_node(node) {
3352 if (cpumask_subset(attrs->cpumask,
3353 wq_numa_possible_cpumask[node])) {
3354 target_node = node;
3355 break;
3356 }
3357 }
3358 }
3359
3360
3361 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3362 if (!pool || init_worker_pool(pool) < 0)
3363 goto fail;
3364
3365 lockdep_set_subclass(&pool->lock, 1);
3366 copy_workqueue_attrs(pool->attrs, attrs);
3367 pool->node = target_node;
3368
3369
3370
3371
3372
3373 pool->attrs->no_numa = false;
3374
3375 if (worker_pool_assign_id(pool) < 0)
3376 goto fail;
3377
3378
3379 if (wq_online && !create_worker(pool))
3380 goto fail;
3381
3382
3383 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3384
3385 return pool;
3386fail:
3387 if (pool)
3388 put_unbound_pool(pool);
3389 return NULL;
3390}
3391
3392static void rcu_free_pwq(struct rcu_head *rcu)
3393{
3394 kmem_cache_free(pwq_cache,
3395 container_of(rcu, struct pool_workqueue, rcu));
3396}
3397
3398
3399
3400
3401
3402static void pwq_unbound_release_workfn(struct work_struct *work)
3403{
3404 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3405 unbound_release_work);
3406 struct workqueue_struct *wq = pwq->wq;
3407 struct worker_pool *pool = pwq->pool;
3408 bool is_last;
3409
3410 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3411 return;
3412
3413 mutex_lock(&wq->mutex);
3414 list_del_rcu(&pwq->pwqs_node);
3415 is_last = list_empty(&wq->pwqs);
3416 mutex_unlock(&wq->mutex);
3417
3418 mutex_lock(&wq_pool_mutex);
3419 put_unbound_pool(pool);
3420 mutex_unlock(&wq_pool_mutex);
3421
3422 call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3423
3424
3425
3426
3427
3428 if (is_last)
3429 call_rcu_sched(&wq->rcu, rcu_free_wq);
3430}
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3441{
3442 struct workqueue_struct *wq = pwq->wq;
3443 bool freezable = wq->flags & WQ_FREEZABLE;
3444 unsigned long flags;
3445
3446
3447 lockdep_assert_held(&wq->mutex);
3448
3449
3450 if (!freezable && pwq->max_active == wq->saved_max_active)
3451 return;
3452
3453
3454 spin_lock_irqsave(&pwq->pool->lock, flags);
3455
3456
3457
3458
3459
3460
3461 if (!freezable || !workqueue_freezing) {
3462 pwq->max_active = wq->saved_max_active;
3463
3464 while (!list_empty(&pwq->delayed_works) &&
3465 pwq->nr_active < pwq->max_active)
3466 pwq_activate_first_delayed(pwq);
3467
3468
3469
3470
3471
3472 wake_up_worker(pwq->pool);
3473 } else {
3474 pwq->max_active = 0;
3475 }
3476
3477 spin_unlock_irqrestore(&pwq->pool->lock, flags);
3478}
3479
3480
3481static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3482 struct worker_pool *pool)
3483{
3484 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3485
3486 memset(pwq, 0, sizeof(*pwq));
3487
3488 pwq->pool = pool;
3489 pwq->wq = wq;
3490 pwq->flush_color = -1;
3491 pwq->refcnt = 1;
3492 INIT_LIST_HEAD(&pwq->delayed_works);
3493 INIT_LIST_HEAD(&pwq->pwqs_node);
3494 INIT_LIST_HEAD(&pwq->mayday_node);
3495 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3496}
3497
3498
3499static void link_pwq(struct pool_workqueue *pwq)
3500{
3501 struct workqueue_struct *wq = pwq->wq;
3502
3503 lockdep_assert_held(&wq->mutex);
3504
3505
3506 if (!list_empty(&pwq->pwqs_node))
3507 return;
3508
3509
3510 pwq->work_color = wq->work_color;
3511
3512
3513 pwq_adjust_max_active(pwq);
3514
3515
3516 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3517}
3518
3519
3520static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3521 const struct workqueue_attrs *attrs)
3522{
3523 struct worker_pool *pool;
3524 struct pool_workqueue *pwq;
3525
3526 lockdep_assert_held(&wq_pool_mutex);
3527
3528 pool = get_unbound_pool(attrs);
3529 if (!pool)
3530 return NULL;
3531
3532 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3533 if (!pwq) {
3534 put_unbound_pool(pool);
3535 return NULL;
3536 }
3537
3538 init_pwq(pwq, wq, pool);
3539 return pwq;
3540}
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3565 int cpu_going_down, cpumask_t *cpumask)
3566{
3567 if (!wq_numa_enabled || attrs->no_numa)
3568 goto use_dfl;
3569
3570
3571 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3572 if (cpu_going_down >= 0)
3573 cpumask_clear_cpu(cpu_going_down, cpumask);
3574
3575 if (cpumask_empty(cpumask))
3576 goto use_dfl;
3577
3578
3579 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3580
3581 if (cpumask_empty(cpumask)) {
3582 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3583 "possible intersect\n");
3584 return false;
3585 }
3586
3587 return !cpumask_equal(cpumask, attrs->cpumask);
3588
3589use_dfl:
3590 cpumask_copy(cpumask, attrs->cpumask);
3591 return false;
3592}
3593
3594
3595static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3596 int node,
3597 struct pool_workqueue *pwq)
3598{
3599 struct pool_workqueue *old_pwq;
3600
3601 lockdep_assert_held(&wq_pool_mutex);
3602 lockdep_assert_held(&wq->mutex);
3603
3604
3605 link_pwq(pwq);
3606
3607 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3608 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3609 return old_pwq;
3610}
3611
3612
3613struct apply_wqattrs_ctx {
3614 struct workqueue_struct *wq;
3615 struct workqueue_attrs *attrs;
3616 struct list_head list;
3617 struct pool_workqueue *dfl_pwq;
3618 struct pool_workqueue *pwq_tbl[];
3619};
3620
3621
3622static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3623{
3624 if (ctx) {
3625 int node;
3626
3627 for_each_node(node)
3628 put_pwq_unlocked(ctx->pwq_tbl[node]);
3629 put_pwq_unlocked(ctx->dfl_pwq);
3630
3631 free_workqueue_attrs(ctx->attrs);
3632
3633 kfree(ctx);
3634 }
3635}
3636
3637
3638static struct apply_wqattrs_ctx *
3639apply_wqattrs_prepare(struct workqueue_struct *wq,
3640 const struct workqueue_attrs *attrs)
3641{
3642 struct apply_wqattrs_ctx *ctx;
3643 struct workqueue_attrs *new_attrs, *tmp_attrs;
3644 int node;
3645
3646 lockdep_assert_held(&wq_pool_mutex);
3647
3648 ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
3649 GFP_KERNEL);
3650
3651 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3652 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3653 if (!ctx || !new_attrs || !tmp_attrs)
3654 goto out_free;
3655
3656
3657
3658
3659
3660
3661 copy_workqueue_attrs(new_attrs, attrs);
3662 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3663 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3664 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3665
3666
3667
3668
3669
3670
3671 copy_workqueue_attrs(tmp_attrs, new_attrs);
3672
3673
3674
3675
3676
3677
3678 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3679 if (!ctx->dfl_pwq)
3680 goto out_free;
3681
3682 for_each_node(node) {
3683 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3684 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3685 if (!ctx->pwq_tbl[node])
3686 goto out_free;
3687 } else {
3688 ctx->dfl_pwq->refcnt++;
3689 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3690 }
3691 }
3692
3693
3694 copy_workqueue_attrs(new_attrs, attrs);
3695 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3696 ctx->attrs = new_attrs;
3697
3698 ctx->wq = wq;
3699 free_workqueue_attrs(tmp_attrs);
3700 return ctx;
3701
3702out_free:
3703 free_workqueue_attrs(tmp_attrs);
3704 free_workqueue_attrs(new_attrs);
3705 apply_wqattrs_cleanup(ctx);
3706 return NULL;
3707}
3708
3709
3710static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3711{
3712 int node;
3713
3714
3715 mutex_lock(&ctx->wq->mutex);
3716
3717 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3718
3719
3720 for_each_node(node)
3721 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3722 ctx->pwq_tbl[node]);
3723
3724
3725 link_pwq(ctx->dfl_pwq);
3726 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3727
3728 mutex_unlock(&ctx->wq->mutex);
3729}
3730
3731static void apply_wqattrs_lock(void)
3732{
3733
3734 get_online_cpus();
3735 mutex_lock(&wq_pool_mutex);
3736}
3737
3738static void apply_wqattrs_unlock(void)
3739{
3740 mutex_unlock(&wq_pool_mutex);
3741 put_online_cpus();
3742}
3743
3744static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3745 const struct workqueue_attrs *attrs)
3746{
3747 struct apply_wqattrs_ctx *ctx;
3748
3749
3750 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3751 return -EINVAL;
3752
3753
3754 if (!list_empty(&wq->pwqs)) {
3755 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
3756 return -EINVAL;
3757
3758 wq->flags &= ~__WQ_ORDERED;
3759 }
3760
3761 ctx = apply_wqattrs_prepare(wq, attrs);
3762 if (!ctx)
3763 return -ENOMEM;
3764
3765
3766 apply_wqattrs_commit(ctx);
3767 apply_wqattrs_cleanup(ctx);
3768
3769 return 0;
3770}
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788int apply_workqueue_attrs(struct workqueue_struct *wq,
3789 const struct workqueue_attrs *attrs)
3790{
3791 int ret;
3792
3793 apply_wqattrs_lock();
3794 ret = apply_workqueue_attrs_locked(wq, attrs);
3795 apply_wqattrs_unlock();
3796
3797 return ret;
3798}
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3823 bool online)
3824{
3825 int node = cpu_to_node(cpu);
3826 int cpu_off = online ? -1 : cpu;
3827 struct pool_workqueue *old_pwq = NULL, *pwq;
3828 struct workqueue_attrs *target_attrs;
3829 cpumask_t *cpumask;
3830
3831 lockdep_assert_held(&wq_pool_mutex);
3832
3833 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
3834 wq->unbound_attrs->no_numa)
3835 return;
3836
3837
3838
3839
3840
3841
3842 target_attrs = wq_update_unbound_numa_attrs_buf;
3843 cpumask = target_attrs->cpumask;
3844
3845 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3846 pwq = unbound_pwq_by_node(wq, node);
3847
3848
3849
3850
3851
3852
3853
3854 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
3855 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3856 return;
3857 } else {
3858 goto use_dfl_pwq;
3859 }
3860
3861
3862 pwq = alloc_unbound_pwq(wq, target_attrs);
3863 if (!pwq) {
3864 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3865 wq->name);
3866 goto use_dfl_pwq;
3867 }
3868
3869
3870 mutex_lock(&wq->mutex);
3871 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3872 goto out_unlock;
3873
3874use_dfl_pwq:
3875 mutex_lock(&wq->mutex);
3876 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3877 get_pwq(wq->dfl_pwq);
3878 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
3879 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
3880out_unlock:
3881 mutex_unlock(&wq->mutex);
3882 put_pwq_unlocked(old_pwq);
3883}
3884
3885static int alloc_and_link_pwqs(struct workqueue_struct *wq)
3886{
3887 bool highpri = wq->flags & WQ_HIGHPRI;
3888 int cpu, ret;
3889
3890 if (!(wq->flags & WQ_UNBOUND)) {
3891 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
3892 if (!wq->cpu_pwqs)
3893 return -ENOMEM;
3894
3895 for_each_possible_cpu(cpu) {
3896 struct pool_workqueue *pwq =
3897 per_cpu_ptr(wq->cpu_pwqs, cpu);
3898 struct worker_pool *cpu_pools =
3899 per_cpu(cpu_worker_pools, cpu);
3900
3901 init_pwq(pwq, wq, &cpu_pools[highpri]);
3902
3903 mutex_lock(&wq->mutex);
3904 link_pwq(pwq);
3905 mutex_unlock(&wq->mutex);
3906 }
3907 return 0;
3908 } else if (wq->flags & __WQ_ORDERED) {
3909 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
3910
3911 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
3912 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
3913 "ordering guarantee broken for workqueue %s\n", wq->name);
3914 return ret;
3915 } else {
3916 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
3917 }
3918}
3919
3920static int wq_clamp_max_active(int max_active, unsigned int flags,
3921 const char *name)
3922{
3923 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
3924
3925 if (max_active < 1 || max_active > lim)
3926 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
3927 max_active, name, 1, lim);
3928
3929 return clamp_val(max_active, 1, lim);
3930}
3931
3932struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3933 unsigned int flags,
3934 int max_active,
3935 struct lock_class_key *key,
3936 const char *lock_name, ...)
3937{
3938 size_t tbl_size = 0;
3939 va_list args;
3940 struct workqueue_struct *wq;
3941 struct pool_workqueue *pwq;
3942
3943
3944
3945
3946
3947
3948
3949
3950 if ((flags & WQ_UNBOUND) && max_active == 1)
3951 flags |= __WQ_ORDERED;
3952
3953
3954 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
3955 flags |= WQ_UNBOUND;
3956
3957
3958 if (flags & WQ_UNBOUND)
3959 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
3960
3961 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
3962 if (!wq)
3963 return NULL;
3964
3965 if (flags & WQ_UNBOUND) {
3966 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3967 if (!wq->unbound_attrs)
3968 goto err_free_wq;
3969 }
3970
3971 va_start(args, lock_name);
3972 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
3973 va_end(args);
3974
3975 max_active = max_active ?: WQ_DFL_ACTIVE;
3976 max_active = wq_clamp_max_active(max_active, flags, wq->name);
3977
3978
3979 wq->flags = flags;
3980 wq->saved_max_active = max_active;
3981 mutex_init(&wq->mutex);
3982 atomic_set(&wq->nr_pwqs_to_flush, 0);
3983 INIT_LIST_HEAD(&wq->pwqs);
3984 INIT_LIST_HEAD(&wq->flusher_queue);
3985 INIT_LIST_HEAD(&wq->flusher_overflow);
3986 INIT_LIST_HEAD(&wq->maydays);
3987
3988 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
3989 INIT_LIST_HEAD(&wq->list);
3990
3991 if (alloc_and_link_pwqs(wq) < 0)
3992 goto err_free_wq;
3993
3994
3995
3996
3997
3998 if (flags & WQ_MEM_RECLAIM) {
3999 struct worker *rescuer;
4000
4001 rescuer = alloc_worker(NUMA_NO_NODE);
4002 if (!rescuer)
4003 goto err_destroy;
4004
4005 rescuer->rescue_wq = wq;
4006 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
4007 wq->name);
4008 if (IS_ERR(rescuer->task)) {
4009 kfree(rescuer);
4010 goto err_destroy;
4011 }
4012
4013 wq->rescuer = rescuer;
4014 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4015 wake_up_process(rescuer->task);
4016 }
4017
4018 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4019 goto err_destroy;
4020
4021
4022
4023
4024
4025
4026 mutex_lock(&wq_pool_mutex);
4027
4028 mutex_lock(&wq->mutex);
4029 for_each_pwq(pwq, wq)
4030 pwq_adjust_max_active(pwq);
4031 mutex_unlock(&wq->mutex);
4032
4033 list_add_tail_rcu(&wq->list, &workqueues);
4034
4035 mutex_unlock(&wq_pool_mutex);
4036
4037 return wq;
4038
4039err_free_wq:
4040 free_workqueue_attrs(wq->unbound_attrs);
4041 kfree(wq);
4042 return NULL;
4043err_destroy:
4044 destroy_workqueue(wq);
4045 return NULL;
4046}
4047EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4048
4049
4050
4051
4052
4053
4054
4055void destroy_workqueue(struct workqueue_struct *wq)
4056{
4057 struct pool_workqueue *pwq;
4058 int node;
4059
4060
4061 drain_workqueue(wq);
4062
4063
4064 mutex_lock(&wq->mutex);
4065 for_each_pwq(pwq, wq) {
4066 int i;
4067
4068 for (i = 0; i < WORK_NR_COLORS; i++) {
4069 if (WARN_ON(pwq->nr_in_flight[i])) {
4070 mutex_unlock(&wq->mutex);
4071 show_workqueue_state();
4072 return;
4073 }
4074 }
4075
4076 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4077 WARN_ON(pwq->nr_active) ||
4078 WARN_ON(!list_empty(&pwq->delayed_works))) {
4079 mutex_unlock(&wq->mutex);
4080 show_workqueue_state();
4081 return;
4082 }
4083 }
4084 mutex_unlock(&wq->mutex);
4085
4086
4087
4088
4089
4090 mutex_lock(&wq_pool_mutex);
4091 list_del_rcu(&wq->list);
4092 mutex_unlock(&wq_pool_mutex);
4093
4094 workqueue_sysfs_unregister(wq);
4095
4096 if (wq->rescuer)
4097 kthread_stop(wq->rescuer->task);
4098
4099 if (!(wq->flags & WQ_UNBOUND)) {
4100
4101
4102
4103
4104 call_rcu_sched(&wq->rcu, rcu_free_wq);
4105 } else {
4106
4107
4108
4109
4110
4111 for_each_node(node) {
4112 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4113 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4114 put_pwq_unlocked(pwq);
4115 }
4116
4117
4118
4119
4120
4121 pwq = wq->dfl_pwq;
4122 wq->dfl_pwq = NULL;
4123 put_pwq_unlocked(pwq);
4124 }
4125}
4126EXPORT_SYMBOL_GPL(destroy_workqueue);
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4139{
4140 struct pool_workqueue *pwq;
4141
4142
4143 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4144 return;
4145
4146 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4147
4148 mutex_lock(&wq->mutex);
4149
4150 wq->flags &= ~__WQ_ORDERED;
4151 wq->saved_max_active = max_active;
4152
4153 for_each_pwq(pwq, wq)
4154 pwq_adjust_max_active(pwq);
4155
4156 mutex_unlock(&wq->mutex);
4157}
4158EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168bool current_is_workqueue_rescuer(void)
4169{
4170 struct worker *worker = current_wq_worker();
4171
4172 return worker && worker->rescue_wq;
4173}
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4194{
4195 struct pool_workqueue *pwq;
4196 bool ret;
4197
4198 rcu_read_lock_sched();
4199
4200 if (cpu == WORK_CPU_UNBOUND)
4201 cpu = smp_processor_id();
4202
4203 if (!(wq->flags & WQ_UNBOUND))
4204 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4205 else
4206 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4207
4208 ret = !list_empty(&pwq->delayed_works);
4209 rcu_read_unlock_sched();
4210
4211 return ret;
4212}
4213EXPORT_SYMBOL_GPL(workqueue_congested);
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226unsigned int work_busy(struct work_struct *work)
4227{
4228 struct worker_pool *pool;
4229 unsigned long flags;
4230 unsigned int ret = 0;
4231
4232 if (work_pending(work))
4233 ret |= WORK_BUSY_PENDING;
4234
4235 local_irq_save(flags);
4236 pool = get_work_pool(work);
4237 if (pool) {
4238 spin_lock(&pool->lock);
4239 if (find_worker_executing_work(pool, work))
4240 ret |= WORK_BUSY_RUNNING;
4241 spin_unlock(&pool->lock);
4242 }
4243 local_irq_restore(flags);
4244
4245 return ret;
4246}
4247EXPORT_SYMBOL_GPL(work_busy);
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259void set_worker_desc(const char *fmt, ...)
4260{
4261 struct worker *worker = current_wq_worker();
4262 va_list args;
4263
4264 if (worker) {
4265 va_start(args, fmt);
4266 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4267 va_end(args);
4268 worker->desc_valid = true;
4269 }
4270}
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285void print_worker_info(const char *log_lvl, struct task_struct *task)
4286{
4287 work_func_t *fn = NULL;
4288 char name[WQ_NAME_LEN] = { };
4289 char desc[WORKER_DESC_LEN] = { };
4290 struct pool_workqueue *pwq = NULL;
4291 struct workqueue_struct *wq = NULL;
4292 bool desc_valid = false;
4293 struct worker *worker;
4294
4295 if (!(task->flags & PF_WQ_WORKER))
4296 return;
4297
4298
4299
4300
4301
4302 worker = kthread_probe_data(task);
4303
4304
4305
4306
4307
4308 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4309 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4310 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4311 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4312
4313
4314 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4315 if (desc_valid)
4316 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4317
4318 if (fn || name[0] || desc[0]) {
4319 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4320 if (desc[0])
4321 pr_cont(" (%s)", desc);
4322 pr_cont("\n");
4323 }
4324}
4325
4326static void pr_cont_pool_info(struct worker_pool *pool)
4327{
4328 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4329 if (pool->node != NUMA_NO_NODE)
4330 pr_cont(" node=%d", pool->node);
4331 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4332}
4333
4334static void pr_cont_work(bool comma, struct work_struct *work)
4335{
4336 if (work->func == wq_barrier_func) {
4337 struct wq_barrier *barr;
4338
4339 barr = container_of(work, struct wq_barrier, work);
4340
4341 pr_cont("%s BAR(%d)", comma ? "," : "",
4342 task_pid_nr(barr->task));
4343 } else {
4344 pr_cont("%s %pf", comma ? "," : "", work->func);
4345 }
4346}
4347
4348static void show_pwq(struct pool_workqueue *pwq)
4349{
4350 struct worker_pool *pool = pwq->pool;
4351 struct work_struct *work;
4352 struct worker *worker;
4353 bool has_in_flight = false, has_pending = false;
4354 int bkt;
4355
4356 pr_info(" pwq %d:", pool->id);
4357 pr_cont_pool_info(pool);
4358
4359 pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
4360 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4361
4362 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4363 if (worker->current_pwq == pwq) {
4364 has_in_flight = true;
4365 break;
4366 }
4367 }
4368 if (has_in_flight) {
4369 bool comma = false;
4370
4371 pr_info(" in-flight:");
4372 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4373 if (worker->current_pwq != pwq)
4374 continue;
4375
4376 pr_cont("%s %d%s:%pf", comma ? "," : "",
4377 task_pid_nr(worker->task),
4378 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4379 worker->current_func);
4380 list_for_each_entry(work, &worker->scheduled, entry)
4381 pr_cont_work(false, work);
4382 comma = true;
4383 }
4384 pr_cont("\n");
4385 }
4386
4387 list_for_each_entry(work, &pool->worklist, entry) {
4388 if (get_work_pwq(work) == pwq) {
4389 has_pending = true;
4390 break;
4391 }
4392 }
4393 if (has_pending) {
4394 bool comma = false;
4395
4396 pr_info(" pending:");
4397 list_for_each_entry(work, &pool->worklist, entry) {
4398 if (get_work_pwq(work) != pwq)
4399 continue;
4400
4401 pr_cont_work(comma, work);
4402 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4403 }
4404 pr_cont("\n");
4405 }
4406
4407 if (!list_empty(&pwq->delayed_works)) {
4408 bool comma = false;
4409
4410 pr_info(" delayed:");
4411 list_for_each_entry(work, &pwq->delayed_works, entry) {
4412 pr_cont_work(comma, work);
4413 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4414 }
4415 pr_cont("\n");
4416 }
4417}
4418
4419
4420
4421
4422
4423
4424
4425void show_workqueue_state(void)
4426{
4427 struct workqueue_struct *wq;
4428 struct worker_pool *pool;
4429 unsigned long flags;
4430 int pi;
4431
4432 rcu_read_lock_sched();
4433
4434 pr_info("Showing busy workqueues and worker pools:\n");
4435
4436 list_for_each_entry_rcu(wq, &workqueues, list) {
4437 struct pool_workqueue *pwq;
4438 bool idle = true;
4439
4440 for_each_pwq(pwq, wq) {
4441 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4442 idle = false;
4443 break;
4444 }
4445 }
4446 if (idle)
4447 continue;
4448
4449 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4450
4451 for_each_pwq(pwq, wq) {
4452 spin_lock_irqsave(&pwq->pool->lock, flags);
4453 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4454 show_pwq(pwq);
4455 spin_unlock_irqrestore(&pwq->pool->lock, flags);
4456 }
4457 }
4458
4459 for_each_pool(pool, pi) {
4460 struct worker *worker;
4461 bool first = true;
4462
4463 spin_lock_irqsave(&pool->lock, flags);
4464 if (pool->nr_workers == pool->nr_idle)
4465 goto next_pool;
4466
4467 pr_info("pool %d:", pool->id);
4468 pr_cont_pool_info(pool);
4469 pr_cont(" hung=%us workers=%d",
4470 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4471 pool->nr_workers);
4472 if (pool->manager)
4473 pr_cont(" manager: %d",
4474 task_pid_nr(pool->manager->task));
4475 list_for_each_entry(worker, &pool->idle_list, entry) {
4476 pr_cont(" %s%d", first ? "idle: " : "",
4477 task_pid_nr(worker->task));
4478 first = false;
4479 }
4480 pr_cont("\n");
4481 next_pool:
4482 spin_unlock_irqrestore(&pool->lock, flags);
4483 }
4484
4485 rcu_read_unlock_sched();
4486}
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503static void wq_unbind_fn(struct work_struct *work)
4504{
4505 int cpu = smp_processor_id();
4506 struct worker_pool *pool;
4507 struct worker *worker;
4508
4509 for_each_cpu_worker_pool(pool, cpu) {
4510 mutex_lock(&pool->attach_mutex);
4511 spin_lock_irq(&pool->lock);
4512
4513
4514
4515
4516
4517
4518
4519
4520 for_each_pool_worker(worker, pool)
4521 worker->flags |= WORKER_UNBOUND;
4522
4523 pool->flags |= POOL_DISASSOCIATED;
4524
4525 spin_unlock_irq(&pool->lock);
4526 mutex_unlock(&pool->attach_mutex);
4527
4528
4529
4530
4531
4532
4533
4534 schedule();
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544 atomic_set(&pool->nr_running, 0);
4545
4546
4547
4548
4549
4550
4551 spin_lock_irq(&pool->lock);
4552 wake_up_worker(pool);
4553 spin_unlock_irq(&pool->lock);
4554 }
4555}
4556
4557
4558
4559
4560
4561
4562
4563static void rebind_workers(struct worker_pool *pool)
4564{
4565 struct worker *worker;
4566
4567 lockdep_assert_held(&pool->attach_mutex);
4568
4569
4570
4571
4572
4573
4574
4575
4576 for_each_pool_worker(worker, pool)
4577 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4578 pool->attrs->cpumask) < 0);
4579
4580 spin_lock_irq(&pool->lock);
4581
4582
4583
4584
4585
4586
4587 if (!(pool->flags & POOL_DISASSOCIATED)) {
4588 spin_unlock_irq(&pool->lock);
4589 return;
4590 }
4591
4592 pool->flags &= ~POOL_DISASSOCIATED;
4593
4594 for_each_pool_worker(worker, pool) {
4595 unsigned int worker_flags = worker->flags;
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605 if (worker_flags & WORKER_IDLE)
4606 wake_up_process(worker->task);
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4624 worker_flags |= WORKER_REBOUND;
4625 worker_flags &= ~WORKER_UNBOUND;
4626 ACCESS_ONCE(worker->flags) = worker_flags;
4627 }
4628
4629 spin_unlock_irq(&pool->lock);
4630}
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4643{
4644 static cpumask_t cpumask;
4645 struct worker *worker;
4646
4647 lockdep_assert_held(&pool->attach_mutex);
4648
4649
4650 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4651 return;
4652
4653 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
4654
4655
4656 for_each_pool_worker(worker, pool)
4657 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
4658}
4659
4660int workqueue_prepare_cpu(unsigned int cpu)
4661{
4662 struct worker_pool *pool;
4663
4664 for_each_cpu_worker_pool(pool, cpu) {
4665 if (pool->nr_workers)
4666 continue;
4667 if (!create_worker(pool))
4668 return -ENOMEM;
4669 }
4670 return 0;
4671}
4672
4673int workqueue_online_cpu(unsigned int cpu)
4674{
4675 struct worker_pool *pool;
4676 struct workqueue_struct *wq;
4677 int pi;
4678
4679 mutex_lock(&wq_pool_mutex);
4680
4681 for_each_pool(pool, pi) {
4682 mutex_lock(&pool->attach_mutex);
4683
4684 if (pool->cpu == cpu)
4685 rebind_workers(pool);
4686 else if (pool->cpu < 0)
4687 restore_unbound_workers_cpumask(pool, cpu);
4688
4689 mutex_unlock(&pool->attach_mutex);
4690 }
4691
4692
4693 list_for_each_entry(wq, &workqueues, list)
4694 wq_update_unbound_numa(wq, cpu, true);
4695
4696 mutex_unlock(&wq_pool_mutex);
4697 return 0;
4698}
4699
4700int workqueue_offline_cpu(unsigned int cpu)
4701{
4702 struct work_struct unbind_work;
4703 struct workqueue_struct *wq;
4704
4705
4706 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
4707 queue_work_on(cpu, system_highpri_wq, &unbind_work);
4708
4709
4710 mutex_lock(&wq_pool_mutex);
4711 list_for_each_entry(wq, &workqueues, list)
4712 wq_update_unbound_numa(wq, cpu, false);
4713 mutex_unlock(&wq_pool_mutex);
4714
4715
4716 flush_work(&unbind_work);
4717 destroy_work_on_stack(&unbind_work);
4718 return 0;
4719}
4720
4721#ifdef CONFIG_SMP
4722
4723struct work_for_cpu {
4724 struct work_struct work;
4725 long (*fn)(void *);
4726 void *arg;
4727 long ret;
4728};
4729
4730static void work_for_cpu_fn(struct work_struct *work)
4731{
4732 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
4733
4734 wfc->ret = wfc->fn(wfc->arg);
4735}
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4749{
4750 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
4751
4752 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4753 schedule_work_on(cpu, &wfc.work);
4754 flush_work(&wfc.work);
4755 destroy_work_on_stack(&wfc.work);
4756 return wfc.ret;
4757}
4758EXPORT_SYMBOL_GPL(work_on_cpu);
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
4772{
4773 long ret = -ENODEV;
4774
4775 get_online_cpus();
4776 if (cpu_online(cpu))
4777 ret = work_on_cpu(cpu, fn, arg);
4778 put_online_cpus();
4779 return ret;
4780}
4781EXPORT_SYMBOL_GPL(work_on_cpu_safe);
4782#endif
4783
4784#ifdef CONFIG_FREEZER
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796void freeze_workqueues_begin(void)
4797{
4798 struct workqueue_struct *wq;
4799 struct pool_workqueue *pwq;
4800
4801 mutex_lock(&wq_pool_mutex);
4802
4803 WARN_ON_ONCE(workqueue_freezing);
4804 workqueue_freezing = true;
4805
4806 list_for_each_entry(wq, &workqueues, list) {
4807 mutex_lock(&wq->mutex);
4808 for_each_pwq(pwq, wq)
4809 pwq_adjust_max_active(pwq);
4810 mutex_unlock(&wq->mutex);
4811 }
4812
4813 mutex_unlock(&wq_pool_mutex);
4814}
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829bool freeze_workqueues_busy(void)
4830{
4831 bool busy = false;
4832 struct workqueue_struct *wq;
4833 struct pool_workqueue *pwq;
4834
4835 mutex_lock(&wq_pool_mutex);
4836
4837 WARN_ON_ONCE(!workqueue_freezing);
4838
4839 list_for_each_entry(wq, &workqueues, list) {
4840 if (!(wq->flags & WQ_FREEZABLE))
4841 continue;
4842
4843
4844
4845
4846 rcu_read_lock_sched();
4847 for_each_pwq(pwq, wq) {
4848 WARN_ON_ONCE(pwq->nr_active < 0);
4849 if (pwq->nr_active) {
4850 busy = true;
4851 rcu_read_unlock_sched();
4852 goto out_unlock;
4853 }
4854 }
4855 rcu_read_unlock_sched();
4856 }
4857out_unlock:
4858 mutex_unlock(&wq_pool_mutex);
4859 return busy;
4860}
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871void thaw_workqueues(void)
4872{
4873 struct workqueue_struct *wq;
4874 struct pool_workqueue *pwq;
4875
4876 mutex_lock(&wq_pool_mutex);
4877
4878 if (!workqueue_freezing)
4879 goto out_unlock;
4880
4881 workqueue_freezing = false;
4882
4883
4884 list_for_each_entry(wq, &workqueues, list) {
4885 mutex_lock(&wq->mutex);
4886 for_each_pwq(pwq, wq)
4887 pwq_adjust_max_active(pwq);
4888 mutex_unlock(&wq->mutex);
4889 }
4890
4891out_unlock:
4892 mutex_unlock(&wq_pool_mutex);
4893}
4894#endif
4895
4896static int workqueue_apply_unbound_cpumask(void)
4897{
4898 LIST_HEAD(ctxs);
4899 int ret = 0;
4900 struct workqueue_struct *wq;
4901 struct apply_wqattrs_ctx *ctx, *n;
4902
4903 lockdep_assert_held(&wq_pool_mutex);
4904
4905 list_for_each_entry(wq, &workqueues, list) {
4906 if (!(wq->flags & WQ_UNBOUND))
4907 continue;
4908
4909 if (wq->flags & __WQ_ORDERED)
4910 continue;
4911
4912 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
4913 if (!ctx) {
4914 ret = -ENOMEM;
4915 break;
4916 }
4917
4918 list_add_tail(&ctx->list, &ctxs);
4919 }
4920
4921 list_for_each_entry_safe(ctx, n, &ctxs, list) {
4922 if (!ret)
4923 apply_wqattrs_commit(ctx);
4924 apply_wqattrs_cleanup(ctx);
4925 }
4926
4927 return ret;
4928}
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
4943{
4944 int ret = -EINVAL;
4945 cpumask_var_t saved_cpumask;
4946
4947 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
4948 return -ENOMEM;
4949
4950 cpumask_and(cpumask, cpumask, cpu_possible_mask);
4951 if (!cpumask_empty(cpumask)) {
4952 apply_wqattrs_lock();
4953
4954
4955 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
4956
4957
4958 cpumask_copy(wq_unbound_cpumask, cpumask);
4959 ret = workqueue_apply_unbound_cpumask();
4960
4961
4962 if (ret < 0)
4963 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
4964
4965 apply_wqattrs_unlock();
4966 }
4967
4968 free_cpumask_var(saved_cpumask);
4969 return ret;
4970}
4971
4972#ifdef CONFIG_SYSFS
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987struct wq_device {
4988 struct workqueue_struct *wq;
4989 struct device dev;
4990};
4991
4992static struct workqueue_struct *dev_to_wq(struct device *dev)
4993{
4994 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
4995
4996 return wq_dev->wq;
4997}
4998
4999static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5000 char *buf)
5001{
5002 struct workqueue_struct *wq = dev_to_wq(dev);
5003
5004 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5005}
5006static DEVICE_ATTR_RO(per_cpu);
5007
5008static ssize_t max_active_show(struct device *dev,
5009 struct device_attribute *attr, char *buf)
5010{
5011 struct workqueue_struct *wq = dev_to_wq(dev);
5012
5013 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5014}
5015
5016static ssize_t max_active_store(struct device *dev,
5017 struct device_attribute *attr, const char *buf,
5018 size_t count)
5019{
5020 struct workqueue_struct *wq = dev_to_wq(dev);
5021 int val;
5022
5023 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5024 return -EINVAL;
5025
5026 workqueue_set_max_active(wq, val);
5027 return count;
5028}
5029static DEVICE_ATTR_RW(max_active);
5030
5031static struct attribute *wq_sysfs_attrs[] = {
5032 &dev_attr_per_cpu.attr,
5033 &dev_attr_max_active.attr,
5034 NULL,
5035};
5036ATTRIBUTE_GROUPS(wq_sysfs);
5037
5038static ssize_t wq_pool_ids_show(struct device *dev,
5039 struct device_attribute *attr, char *buf)
5040{
5041 struct workqueue_struct *wq = dev_to_wq(dev);
5042 const char *delim = "";
5043 int node, written = 0;
5044
5045 rcu_read_lock_sched();
5046 for_each_node(node) {
5047 written += scnprintf(buf + written, PAGE_SIZE - written,
5048 "%s%d:%d", delim, node,
5049 unbound_pwq_by_node(wq, node)->pool->id);
5050 delim = " ";
5051 }
5052 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5053 rcu_read_unlock_sched();
5054
5055 return written;
5056}
5057
5058static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5059 char *buf)
5060{
5061 struct workqueue_struct *wq = dev_to_wq(dev);
5062 int written;
5063
5064 mutex_lock(&wq->mutex);
5065 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5066 mutex_unlock(&wq->mutex);
5067
5068 return written;
5069}
5070
5071
5072static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5073{
5074 struct workqueue_attrs *attrs;
5075
5076 lockdep_assert_held(&wq_pool_mutex);
5077
5078 attrs = alloc_workqueue_attrs(GFP_KERNEL);
5079 if (!attrs)
5080 return NULL;
5081
5082 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5083 return attrs;
5084}
5085
5086static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5087 const char *buf, size_t count)
5088{
5089 struct workqueue_struct *wq = dev_to_wq(dev);
5090 struct workqueue_attrs *attrs;
5091 int ret = -ENOMEM;
5092
5093 apply_wqattrs_lock();
5094
5095 attrs = wq_sysfs_prep_attrs(wq);
5096 if (!attrs)
5097 goto out_unlock;
5098
5099 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5100 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5101 ret = apply_workqueue_attrs_locked(wq, attrs);
5102 else
5103 ret = -EINVAL;
5104
5105out_unlock:
5106 apply_wqattrs_unlock();
5107 free_workqueue_attrs(attrs);
5108 return ret ?: count;
5109}
5110
5111static ssize_t wq_cpumask_show(struct device *dev,
5112 struct device_attribute *attr, char *buf)
5113{
5114 struct workqueue_struct *wq = dev_to_wq(dev);
5115 int written;
5116
5117 mutex_lock(&wq->mutex);
5118 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5119 cpumask_pr_args(wq->unbound_attrs->cpumask));
5120 mutex_unlock(&wq->mutex);
5121 return written;
5122}
5123
5124static ssize_t wq_cpumask_store(struct device *dev,
5125 struct device_attribute *attr,
5126 const char *buf, size_t count)
5127{
5128 struct workqueue_struct *wq = dev_to_wq(dev);
5129 struct workqueue_attrs *attrs;
5130 int ret = -ENOMEM;
5131
5132 apply_wqattrs_lock();
5133
5134 attrs = wq_sysfs_prep_attrs(wq);
5135 if (!attrs)
5136 goto out_unlock;
5137
5138 ret = cpumask_parse(buf, attrs->cpumask);
5139 if (!ret)
5140 ret = apply_workqueue_attrs_locked(wq, attrs);
5141
5142out_unlock:
5143 apply_wqattrs_unlock();
5144 free_workqueue_attrs(attrs);
5145 return ret ?: count;
5146}
5147
5148static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5149 char *buf)
5150{
5151 struct workqueue_struct *wq = dev_to_wq(dev);
5152 int written;
5153
5154 mutex_lock(&wq->mutex);
5155 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5156 !wq->unbound_attrs->no_numa);
5157 mutex_unlock(&wq->mutex);
5158
5159 return written;
5160}
5161
5162static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5163 const char *buf, size_t count)
5164{
5165 struct workqueue_struct *wq = dev_to_wq(dev);
5166 struct workqueue_attrs *attrs;
5167 int v, ret = -ENOMEM;
5168
5169 apply_wqattrs_lock();
5170
5171 attrs = wq_sysfs_prep_attrs(wq);
5172 if (!attrs)
5173 goto out_unlock;
5174
5175 ret = -EINVAL;
5176 if (sscanf(buf, "%d", &v) == 1) {
5177 attrs->no_numa = !v;
5178 ret = apply_workqueue_attrs_locked(wq, attrs);
5179 }
5180
5181out_unlock:
5182 apply_wqattrs_unlock();
5183 free_workqueue_attrs(attrs);
5184 return ret ?: count;
5185}
5186
5187static struct device_attribute wq_sysfs_unbound_attrs[] = {
5188 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5189 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5190 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5191 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5192 __ATTR_NULL,
5193};
5194
5195static struct bus_type wq_subsys = {
5196 .name = "workqueue",
5197 .dev_groups = wq_sysfs_groups,
5198};
5199
5200static ssize_t wq_unbound_cpumask_show(struct device *dev,
5201 struct device_attribute *attr, char *buf)
5202{
5203 int written;
5204
5205 mutex_lock(&wq_pool_mutex);
5206 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5207 cpumask_pr_args(wq_unbound_cpumask));
5208 mutex_unlock(&wq_pool_mutex);
5209
5210 return written;
5211}
5212
5213static ssize_t wq_unbound_cpumask_store(struct device *dev,
5214 struct device_attribute *attr, const char *buf, size_t count)
5215{
5216 cpumask_var_t cpumask;
5217 int ret;
5218
5219 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5220 return -ENOMEM;
5221
5222 ret = cpumask_parse(buf, cpumask);
5223 if (!ret)
5224 ret = workqueue_set_unbound_cpumask(cpumask);
5225
5226 free_cpumask_var(cpumask);
5227 return ret ? ret : count;
5228}
5229
5230static struct device_attribute wq_sysfs_cpumask_attr =
5231 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5232 wq_unbound_cpumask_store);
5233
5234static int __init wq_sysfs_init(void)
5235{
5236 int err;
5237
5238 err = subsys_virtual_register(&wq_subsys, NULL);
5239 if (err)
5240 return err;
5241
5242 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5243}
5244core_initcall(wq_sysfs_init);
5245
5246static void wq_device_release(struct device *dev)
5247{
5248 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5249
5250 kfree(wq_dev);
5251}
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268int workqueue_sysfs_register(struct workqueue_struct *wq)
5269{
5270 struct wq_device *wq_dev;
5271 int ret;
5272
5273
5274
5275
5276
5277
5278 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5279 return -EINVAL;
5280
5281 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5282 if (!wq_dev)
5283 return -ENOMEM;
5284
5285 wq_dev->wq = wq;
5286 wq_dev->dev.bus = &wq_subsys;
5287 wq_dev->dev.release = wq_device_release;
5288 dev_set_name(&wq_dev->dev, "%s", wq->name);
5289
5290
5291
5292
5293
5294 dev_set_uevent_suppress(&wq_dev->dev, true);
5295
5296 ret = device_register(&wq_dev->dev);
5297 if (ret) {
5298 kfree(wq_dev);
5299 wq->wq_dev = NULL;
5300 return ret;
5301 }
5302
5303 if (wq->flags & WQ_UNBOUND) {
5304 struct device_attribute *attr;
5305
5306 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5307 ret = device_create_file(&wq_dev->dev, attr);
5308 if (ret) {
5309 device_unregister(&wq_dev->dev);
5310 wq->wq_dev = NULL;
5311 return ret;
5312 }
5313 }
5314 }
5315
5316 dev_set_uevent_suppress(&wq_dev->dev, false);
5317 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5318 return 0;
5319}
5320
5321
5322
5323
5324
5325
5326
5327static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5328{
5329 struct wq_device *wq_dev = wq->wq_dev;
5330
5331 if (!wq->wq_dev)
5332 return;
5333
5334 wq->wq_dev = NULL;
5335 device_unregister(&wq_dev->dev);
5336}
5337#else
5338static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5339#endif
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358#ifdef CONFIG_WQ_WATCHDOG
5359
5360static void wq_watchdog_timer_fn(unsigned long data);
5361
5362static unsigned long wq_watchdog_thresh = 30;
5363static struct timer_list wq_watchdog_timer =
5364 TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
5365
5366static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5367static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5368
5369static void wq_watchdog_reset_touched(void)
5370{
5371 int cpu;
5372
5373 wq_watchdog_touched = jiffies;
5374 for_each_possible_cpu(cpu)
5375 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5376}
5377
5378static void wq_watchdog_timer_fn(unsigned long data)
5379{
5380 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5381 bool lockup_detected = false;
5382 struct worker_pool *pool;
5383 int pi;
5384
5385 if (!thresh)
5386 return;
5387
5388 rcu_read_lock();
5389
5390 for_each_pool(pool, pi) {
5391 unsigned long pool_ts, touched, ts;
5392
5393 if (list_empty(&pool->worklist))
5394 continue;
5395
5396
5397 pool_ts = READ_ONCE(pool->watchdog_ts);
5398 touched = READ_ONCE(wq_watchdog_touched);
5399
5400 if (time_after(pool_ts, touched))
5401 ts = pool_ts;
5402 else
5403 ts = touched;
5404
5405 if (pool->cpu >= 0) {
5406 unsigned long cpu_touched =
5407 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
5408 pool->cpu));
5409 if (time_after(cpu_touched, ts))
5410 ts = cpu_touched;
5411 }
5412
5413
5414 if (time_after(jiffies, ts + thresh)) {
5415 lockup_detected = true;
5416 pr_emerg("BUG: workqueue lockup - pool");
5417 pr_cont_pool_info(pool);
5418 pr_cont(" stuck for %us!\n",
5419 jiffies_to_msecs(jiffies - pool_ts) / 1000);
5420 }
5421 }
5422
5423 rcu_read_unlock();
5424
5425 if (lockup_detected)
5426 show_workqueue_state();
5427
5428 wq_watchdog_reset_touched();
5429 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5430}
5431
5432void wq_watchdog_touch(int cpu)
5433{
5434 if (cpu >= 0)
5435 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5436 else
5437 wq_watchdog_touched = jiffies;
5438}
5439
5440static void wq_watchdog_set_thresh(unsigned long thresh)
5441{
5442 wq_watchdog_thresh = 0;
5443 del_timer_sync(&wq_watchdog_timer);
5444
5445 if (thresh) {
5446 wq_watchdog_thresh = thresh;
5447 wq_watchdog_reset_touched();
5448 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5449 }
5450}
5451
5452static int wq_watchdog_param_set_thresh(const char *val,
5453 const struct kernel_param *kp)
5454{
5455 unsigned long thresh;
5456 int ret;
5457
5458 ret = kstrtoul(val, 0, &thresh);
5459 if (ret)
5460 return ret;
5461
5462 if (system_wq)
5463 wq_watchdog_set_thresh(thresh);
5464 else
5465 wq_watchdog_thresh = thresh;
5466
5467 return 0;
5468}
5469
5470static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5471 .set = wq_watchdog_param_set_thresh,
5472 .get = param_get_ulong,
5473};
5474
5475module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5476 0644);
5477
5478static void wq_watchdog_init(void)
5479{
5480 wq_watchdog_set_thresh(wq_watchdog_thresh);
5481}
5482
5483#else
5484
5485static inline void wq_watchdog_init(void) { }
5486
5487#endif
5488
5489static void __init wq_numa_init(void)
5490{
5491 cpumask_var_t *tbl;
5492 int node, cpu;
5493
5494 if (num_possible_nodes() <= 1)
5495 return;
5496
5497 if (wq_disable_numa) {
5498 pr_info("workqueue: NUMA affinity support disabled\n");
5499 return;
5500 }
5501
5502 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5503 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5504
5505
5506
5507
5508
5509
5510 tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
5511 BUG_ON(!tbl);
5512
5513 for_each_node(node)
5514 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5515 node_online(node) ? node : NUMA_NO_NODE));
5516
5517 for_each_possible_cpu(cpu) {
5518 node = cpu_to_node(cpu);
5519 if (WARN_ON(node == NUMA_NO_NODE)) {
5520 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5521
5522 return;
5523 }
5524 cpumask_set_cpu(cpu, tbl[node]);
5525 }
5526
5527 wq_numa_possible_cpumask = tbl;
5528 wq_numa_enabled = true;
5529}
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541int __init workqueue_init_early(void)
5542{
5543 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5544 int i, cpu;
5545
5546 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5547
5548 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5549 cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
5550
5551 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5552
5553
5554 for_each_possible_cpu(cpu) {
5555 struct worker_pool *pool;
5556
5557 i = 0;
5558 for_each_cpu_worker_pool(pool, cpu) {
5559 BUG_ON(init_worker_pool(pool));
5560 pool->cpu = cpu;
5561 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
5562 pool->attrs->nice = std_nice[i++];
5563 pool->node = cpu_to_node(cpu);
5564
5565
5566 mutex_lock(&wq_pool_mutex);
5567 BUG_ON(worker_pool_assign_id(pool));
5568 mutex_unlock(&wq_pool_mutex);
5569 }
5570 }
5571
5572
5573 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5574 struct workqueue_attrs *attrs;
5575
5576 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5577 attrs->nice = std_nice[i];
5578 unbound_std_wq_attrs[i] = attrs;
5579
5580
5581
5582
5583
5584
5585 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5586 attrs->nice = std_nice[i];
5587 attrs->no_numa = true;
5588 ordered_wq_attrs[i] = attrs;
5589 }
5590
5591 system_wq = alloc_workqueue("events", 0, 0);
5592 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5593 system_long_wq = alloc_workqueue("events_long", 0, 0);
5594 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5595 WQ_UNBOUND_MAX_ACTIVE);
5596 system_freezable_wq = alloc_workqueue("events_freezable",
5597 WQ_FREEZABLE, 0);
5598 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
5599 WQ_POWER_EFFICIENT, 0);
5600 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
5601 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
5602 0);
5603 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
5604 !system_unbound_wq || !system_freezable_wq ||
5605 !system_power_efficient_wq ||
5606 !system_freezable_power_efficient_wq);
5607
5608 return 0;
5609}
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620int __init workqueue_init(void)
5621{
5622 struct workqueue_struct *wq;
5623 struct worker_pool *pool;
5624 int cpu, bkt;
5625
5626
5627
5628
5629
5630
5631
5632
5633 wq_numa_init();
5634
5635 mutex_lock(&wq_pool_mutex);
5636
5637 for_each_possible_cpu(cpu) {
5638 for_each_cpu_worker_pool(pool, cpu) {
5639 pool->node = cpu_to_node(cpu);
5640 }
5641 }
5642
5643 list_for_each_entry(wq, &workqueues, list)
5644 wq_update_unbound_numa(wq, smp_processor_id(), true);
5645
5646 mutex_unlock(&wq_pool_mutex);
5647
5648
5649 for_each_online_cpu(cpu) {
5650 for_each_cpu_worker_pool(pool, cpu) {
5651 pool->flags &= ~POOL_DISASSOCIATED;
5652 BUG_ON(!create_worker(pool));
5653 }
5654 }
5655
5656 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
5657 BUG_ON(!create_worker(pool));
5658
5659 wq_online = true;
5660 wq_watchdog_init();
5661
5662 return 0;
5663}
5664