1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/export.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
38#include <linux/hardirq.h>
39#include <linux/mempolicy.h>
40#include <linux/freezer.h>
41#include <linux/debug_locks.h>
42#include <linux/lockdep.h>
43#include <linux/idr.h>
44#include <linux/jhash.h>
45#include <linux/hashtable.h>
46#include <linux/rculist.h>
47#include <linux/nodemask.h>
48#include <linux/moduleparam.h>
49#include <linux/uaccess.h>
50#include <linux/sched/isolation.h>
51#include <linux/nmi.h>
52
53#include "workqueue_internal.h"
54
55enum {
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 POOL_MANAGER_ACTIVE = 1 << 0,
73 POOL_DISASSOCIATED = 1 << 2,
74
75
76 WORKER_DIE = 1 << 1,
77 WORKER_IDLE = 1 << 2,
78 WORKER_PREP = 1 << 3,
79 WORKER_CPU_INTENSIVE = 1 << 6,
80 WORKER_UNBOUND = 1 << 7,
81 WORKER_REBOUND = 1 << 8,
82
83 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
84 WORKER_UNBOUND | WORKER_REBOUND,
85
86 NR_STD_WORKER_POOLS = 2,
87
88 UNBOUND_POOL_HASH_ORDER = 6,
89 BUSY_WORKER_HASH_ORDER = 6,
90
91 MAX_IDLE_WORKERS_RATIO = 4,
92 IDLE_WORKER_TIMEOUT = 300 * HZ,
93
94 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
95
96
97 MAYDAY_INTERVAL = HZ / 10,
98 CREATE_COOLDOWN = HZ,
99
100
101
102
103
104 RESCUER_NICE_LEVEL = MIN_NICE,
105 HIGHPRI_NICE_LEVEL = MIN_NICE,
106
107 WQ_NAME_LEN = 24,
108};
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146struct worker_pool {
147 spinlock_t lock;
148 int cpu;
149 int node;
150 int id;
151 unsigned int flags;
152
153 unsigned long watchdog_ts;
154
155 struct list_head worklist;
156
157 int nr_workers;
158 int nr_idle;
159
160 struct list_head idle_list;
161 struct timer_list idle_timer;
162 struct timer_list mayday_timer;
163
164
165 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
166
167
168 struct worker *manager;
169 struct mutex attach_mutex;
170 struct list_head workers;
171 struct completion *detach_completion;
172
173 struct ida worker_ida;
174
175 struct workqueue_attrs *attrs;
176 struct hlist_node hash_node;
177 int refcnt;
178
179
180
181
182
183
184 atomic_t nr_running ____cacheline_aligned_in_smp;
185
186
187
188
189
190 struct rcu_head rcu;
191} ____cacheline_aligned_in_smp;
192
193
194
195
196
197
198
199struct pool_workqueue {
200 struct worker_pool *pool;
201 struct workqueue_struct *wq;
202 int work_color;
203 int flush_color;
204 int refcnt;
205 int nr_in_flight[WORK_NR_COLORS];
206
207 int nr_active;
208 int max_active;
209 struct list_head delayed_works;
210 struct list_head pwqs_node;
211 struct list_head mayday_node;
212
213
214
215
216
217
218
219 struct work_struct unbound_release_work;
220 struct rcu_head rcu;
221} __aligned(1 << WORK_STRUCT_FLAG_BITS);
222
223
224
225
226struct wq_flusher {
227 struct list_head list;
228 int flush_color;
229 struct completion done;
230};
231
232struct wq_device;
233
234
235
236
237
238struct workqueue_struct {
239 struct list_head pwqs;
240 struct list_head list;
241
242 struct mutex mutex;
243 int work_color;
244 int flush_color;
245 atomic_t nr_pwqs_to_flush;
246 struct wq_flusher *first_flusher;
247 struct list_head flusher_queue;
248 struct list_head flusher_overflow;
249
250 struct list_head maydays;
251 struct worker *rescuer;
252
253 int nr_drainers;
254 int saved_max_active;
255
256 struct workqueue_attrs *unbound_attrs;
257 struct pool_workqueue *dfl_pwq;
258
259#ifdef CONFIG_SYSFS
260 struct wq_device *wq_dev;
261#endif
262#ifdef CONFIG_LOCKDEP
263 struct lockdep_map lockdep_map;
264#endif
265 char name[WQ_NAME_LEN];
266
267
268
269
270
271
272 struct rcu_head rcu;
273
274
275 unsigned int flags ____cacheline_aligned;
276 struct pool_workqueue __percpu *cpu_pwqs;
277 struct pool_workqueue __rcu *numa_pwq_tbl[];
278};
279
280static struct kmem_cache *pwq_cache;
281
282static cpumask_var_t *wq_numa_possible_cpumask;
283
284
285static bool wq_disable_numa;
286module_param_named(disable_numa, wq_disable_numa, bool, 0444);
287
288
289static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
290module_param_named(power_efficient, wq_power_efficient, bool, 0444);
291
292static bool wq_online;
293
294static bool wq_numa_enabled;
295
296
297static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
298
299static DEFINE_MUTEX(wq_pool_mutex);
300static DEFINE_SPINLOCK(wq_mayday_lock);
301static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait);
302
303static LIST_HEAD(workqueues);
304static bool workqueue_freezing;
305
306
307static cpumask_var_t wq_unbound_cpumask;
308
309
310static DEFINE_PER_CPU(int, wq_rr_cpu_last);
311
312
313
314
315
316
317#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
318static bool wq_debug_force_rr_cpu = true;
319#else
320static bool wq_debug_force_rr_cpu = false;
321#endif
322module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
323
324
325static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
326
327static DEFINE_IDR(worker_pool_idr);
328
329
330static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
331
332
333static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
334
335
336static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
337
338struct workqueue_struct *system_wq __read_mostly;
339EXPORT_SYMBOL(system_wq);
340struct workqueue_struct *system_highpri_wq __read_mostly;
341EXPORT_SYMBOL_GPL(system_highpri_wq);
342struct workqueue_struct *system_long_wq __read_mostly;
343EXPORT_SYMBOL_GPL(system_long_wq);
344struct workqueue_struct *system_unbound_wq __read_mostly;
345EXPORT_SYMBOL_GPL(system_unbound_wq);
346struct workqueue_struct *system_freezable_wq __read_mostly;
347EXPORT_SYMBOL_GPL(system_freezable_wq);
348struct workqueue_struct *system_power_efficient_wq __read_mostly;
349EXPORT_SYMBOL_GPL(system_power_efficient_wq);
350struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
351EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
352
353static int worker_thread(void *__worker);
354static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
355
356#define CREATE_TRACE_POINTS
357#include <trace/events/workqueue.h>
358
359#define assert_rcu_or_pool_mutex() \
360 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
361 !lockdep_is_held(&wq_pool_mutex), \
362 "sched RCU or wq_pool_mutex should be held")
363
364#define assert_rcu_or_wq_mutex(wq) \
365 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
366 !lockdep_is_held(&wq->mutex), \
367 "sched RCU or wq->mutex should be held")
368
369#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
370 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
371 !lockdep_is_held(&wq->mutex) && \
372 !lockdep_is_held(&wq_pool_mutex), \
373 "sched RCU, wq->mutex or wq_pool_mutex should be held")
374
375#define for_each_cpu_worker_pool(pool, cpu) \
376 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
377 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
378 (pool)++)
379
380
381
382
383
384
385
386
387
388
389
390
391
392#define for_each_pool(pool, pi) \
393 idr_for_each_entry(&worker_pool_idr, pool, pi) \
394 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
395 else
396
397
398
399
400
401
402
403
404
405
406
407#define for_each_pool_worker(worker, pool) \
408 list_for_each_entry((worker), &(pool)->workers, node) \
409 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
410 else
411
412
413
414
415
416
417
418
419
420
421
422
423
424#define for_each_pwq(pwq, wq) \
425 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
426 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
427 else
428
429#ifdef CONFIG_DEBUG_OBJECTS_WORK
430
431static struct debug_obj_descr work_debug_descr;
432
433static void *work_debug_hint(void *addr)
434{
435 return ((struct work_struct *) addr)->func;
436}
437
438static bool work_is_static_object(void *addr)
439{
440 struct work_struct *work = addr;
441
442 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
443}
444
445
446
447
448
449static bool work_fixup_init(void *addr, enum debug_obj_state state)
450{
451 struct work_struct *work = addr;
452
453 switch (state) {
454 case ODEBUG_STATE_ACTIVE:
455 cancel_work_sync(work);
456 debug_object_init(work, &work_debug_descr);
457 return true;
458 default:
459 return false;
460 }
461}
462
463
464
465
466
467static bool work_fixup_free(void *addr, enum debug_obj_state state)
468{
469 struct work_struct *work = addr;
470
471 switch (state) {
472 case ODEBUG_STATE_ACTIVE:
473 cancel_work_sync(work);
474 debug_object_free(work, &work_debug_descr);
475 return true;
476 default:
477 return false;
478 }
479}
480
481static struct debug_obj_descr work_debug_descr = {
482 .name = "work_struct",
483 .debug_hint = work_debug_hint,
484 .is_static_object = work_is_static_object,
485 .fixup_init = work_fixup_init,
486 .fixup_free = work_fixup_free,
487};
488
489static inline void debug_work_activate(struct work_struct *work)
490{
491 debug_object_activate(work, &work_debug_descr);
492}
493
494static inline void debug_work_deactivate(struct work_struct *work)
495{
496 debug_object_deactivate(work, &work_debug_descr);
497}
498
499void __init_work(struct work_struct *work, int onstack)
500{
501 if (onstack)
502 debug_object_init_on_stack(work, &work_debug_descr);
503 else
504 debug_object_init(work, &work_debug_descr);
505}
506EXPORT_SYMBOL_GPL(__init_work);
507
508void destroy_work_on_stack(struct work_struct *work)
509{
510 debug_object_free(work, &work_debug_descr);
511}
512EXPORT_SYMBOL_GPL(destroy_work_on_stack);
513
514void destroy_delayed_work_on_stack(struct delayed_work *work)
515{
516 destroy_timer_on_stack(&work->timer);
517 debug_object_free(&work->work, &work_debug_descr);
518}
519EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
520
521#else
522static inline void debug_work_activate(struct work_struct *work) { }
523static inline void debug_work_deactivate(struct work_struct *work) { }
524#endif
525
526
527
528
529
530
531
532
533static int worker_pool_assign_id(struct worker_pool *pool)
534{
535 int ret;
536
537 lockdep_assert_held(&wq_pool_mutex);
538
539 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
540 GFP_KERNEL);
541 if (ret >= 0) {
542 pool->id = ret;
543 return 0;
544 }
545 return ret;
546}
547
548
549
550
551
552
553
554
555
556
557
558
559
560static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
561 int node)
562{
563 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
564
565
566
567
568
569
570
571 if (unlikely(node == NUMA_NO_NODE))
572 return wq->dfl_pwq;
573
574 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
575}
576
577static unsigned int work_color_to_flags(int color)
578{
579 return color << WORK_STRUCT_COLOR_SHIFT;
580}
581
582static int get_work_color(struct work_struct *work)
583{
584 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
585 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
586}
587
588static int work_next_color(int color)
589{
590 return (color + 1) % WORK_NR_COLORS;
591}
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613static inline void set_work_data(struct work_struct *work, unsigned long data,
614 unsigned long flags)
615{
616 WARN_ON_ONCE(!work_pending(work));
617 atomic_long_set(&work->data, data | flags | work_static(work));
618}
619
620static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
621 unsigned long extra_flags)
622{
623 set_work_data(work, (unsigned long)pwq,
624 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
625}
626
627static void set_work_pool_and_keep_pending(struct work_struct *work,
628 int pool_id)
629{
630 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
631 WORK_STRUCT_PENDING);
632}
633
634static void set_work_pool_and_clear_pending(struct work_struct *work,
635 int pool_id)
636{
637
638
639
640
641
642
643 smp_wmb();
644 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673 smp_mb();
674}
675
676static void clear_work_data(struct work_struct *work)
677{
678 smp_wmb();
679 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
680}
681
682static struct pool_workqueue *get_work_pwq(struct work_struct *work)
683{
684 unsigned long data = atomic_long_read(&work->data);
685
686 if (data & WORK_STRUCT_PWQ)
687 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
688 else
689 return NULL;
690}
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static struct worker_pool *get_work_pool(struct work_struct *work)
708{
709 unsigned long data = atomic_long_read(&work->data);
710 int pool_id;
711
712 assert_rcu_or_pool_mutex();
713
714 if (data & WORK_STRUCT_PWQ)
715 return ((struct pool_workqueue *)
716 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
717
718 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
719 if (pool_id == WORK_OFFQ_POOL_NONE)
720 return NULL;
721
722 return idr_find(&worker_pool_idr, pool_id);
723}
724
725
726
727
728
729
730
731
732static int get_work_pool_id(struct work_struct *work)
733{
734 unsigned long data = atomic_long_read(&work->data);
735
736 if (data & WORK_STRUCT_PWQ)
737 return ((struct pool_workqueue *)
738 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
739
740 return data >> WORK_OFFQ_POOL_SHIFT;
741}
742
743static void mark_work_canceling(struct work_struct *work)
744{
745 unsigned long pool_id = get_work_pool_id(work);
746
747 pool_id <<= WORK_OFFQ_POOL_SHIFT;
748 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
749}
750
751static bool work_is_canceling(struct work_struct *work)
752{
753 unsigned long data = atomic_long_read(&work->data);
754
755 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
756}
757
758
759
760
761
762
763
764static bool __need_more_worker(struct worker_pool *pool)
765{
766 return !atomic_read(&pool->nr_running);
767}
768
769
770
771
772
773
774
775
776
777static bool need_more_worker(struct worker_pool *pool)
778{
779 return !list_empty(&pool->worklist) && __need_more_worker(pool);
780}
781
782
783static bool may_start_working(struct worker_pool *pool)
784{
785 return pool->nr_idle;
786}
787
788
789static bool keep_working(struct worker_pool *pool)
790{
791 return !list_empty(&pool->worklist) &&
792 atomic_read(&pool->nr_running) <= 1;
793}
794
795
796static bool need_to_create_worker(struct worker_pool *pool)
797{
798 return need_more_worker(pool) && !may_start_working(pool);
799}
800
801
802static bool too_many_workers(struct worker_pool *pool)
803{
804 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805 int nr_idle = pool->nr_idle + managing;
806 int nr_busy = pool->nr_workers - nr_idle;
807
808 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
809}
810
811
812
813
814
815
816static struct worker *first_idle_worker(struct worker_pool *pool)
817{
818 if (unlikely(list_empty(&pool->idle_list)))
819 return NULL;
820
821 return list_first_entry(&pool->idle_list, struct worker, entry);
822}
823
824
825
826
827
828
829
830
831
832
833static void wake_up_worker(struct worker_pool *pool)
834{
835 struct worker *worker = first_idle_worker(pool);
836
837 if (likely(worker))
838 wake_up_process(worker->task);
839}
840
841
842
843
844
845
846
847
848
849
850
851
852void wq_worker_waking_up(struct task_struct *task, int cpu)
853{
854 struct worker *worker = kthread_data(task);
855
856 if (!(worker->flags & WORKER_NOT_RUNNING)) {
857 WARN_ON_ONCE(worker->pool->cpu != cpu);
858 atomic_inc(&worker->pool->nr_running);
859 }
860}
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876struct task_struct *wq_worker_sleeping(struct task_struct *task)
877{
878 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
879 struct worker_pool *pool;
880
881
882
883
884
885
886 if (worker->flags & WORKER_NOT_RUNNING)
887 return NULL;
888
889 pool = worker->pool;
890
891
892 if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
893 return NULL;
894
895
896
897
898
899
900
901
902
903
904
905
906 if (atomic_dec_and_test(&pool->nr_running) &&
907 !list_empty(&pool->worklist))
908 to_wakeup = first_idle_worker(pool);
909 return to_wakeup ? to_wakeup->task : NULL;
910}
911
912
913
914
915
916
917
918
919
920
921
922static inline void worker_set_flags(struct worker *worker, unsigned int flags)
923{
924 struct worker_pool *pool = worker->pool;
925
926 WARN_ON_ONCE(worker->task != current);
927
928
929 if ((flags & WORKER_NOT_RUNNING) &&
930 !(worker->flags & WORKER_NOT_RUNNING)) {
931 atomic_dec(&pool->nr_running);
932 }
933
934 worker->flags |= flags;
935}
936
937
938
939
940
941
942
943
944
945
946
947static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
948{
949 struct worker_pool *pool = worker->pool;
950 unsigned int oflags = worker->flags;
951
952 WARN_ON_ONCE(worker->task != current);
953
954 worker->flags &= ~flags;
955
956
957
958
959
960
961 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
962 if (!(worker->flags & WORKER_NOT_RUNNING))
963 atomic_inc(&pool->nr_running);
964}
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999static struct worker *find_worker_executing_work(struct worker_pool *pool,
1000 struct work_struct *work)
1001{
1002 struct worker *worker;
1003
1004 hash_for_each_possible(pool->busy_hash, worker, hentry,
1005 (unsigned long)work)
1006 if (worker->current_work == work &&
1007 worker->current_func == work->func)
1008 return worker;
1009
1010 return NULL;
1011}
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030static void move_linked_works(struct work_struct *work, struct list_head *head,
1031 struct work_struct **nextp)
1032{
1033 struct work_struct *n;
1034
1035
1036
1037
1038
1039 list_for_each_entry_safe_from(work, n, NULL, entry) {
1040 list_move_tail(&work->entry, head);
1041 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1042 break;
1043 }
1044
1045
1046
1047
1048
1049
1050 if (nextp)
1051 *nextp = n;
1052}
1053
1054
1055
1056
1057
1058
1059
1060
1061static void get_pwq(struct pool_workqueue *pwq)
1062{
1063 lockdep_assert_held(&pwq->pool->lock);
1064 WARN_ON_ONCE(pwq->refcnt <= 0);
1065 pwq->refcnt++;
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075static void put_pwq(struct pool_workqueue *pwq)
1076{
1077 lockdep_assert_held(&pwq->pool->lock);
1078 if (likely(--pwq->refcnt))
1079 return;
1080 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1081 return;
1082
1083
1084
1085
1086
1087
1088
1089
1090 schedule_work(&pwq->unbound_release_work);
1091}
1092
1093
1094
1095
1096
1097
1098
1099static void put_pwq_unlocked(struct pool_workqueue *pwq)
1100{
1101 if (pwq) {
1102
1103
1104
1105
1106 spin_lock_irq(&pwq->pool->lock);
1107 put_pwq(pwq);
1108 spin_unlock_irq(&pwq->pool->lock);
1109 }
1110}
1111
1112static void pwq_activate_delayed_work(struct work_struct *work)
1113{
1114 struct pool_workqueue *pwq = get_work_pwq(work);
1115
1116 trace_workqueue_activate_work(work);
1117 if (list_empty(&pwq->pool->worklist))
1118 pwq->pool->watchdog_ts = jiffies;
1119 move_linked_works(work, &pwq->pool->worklist, NULL);
1120 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1121 pwq->nr_active++;
1122}
1123
1124static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1125{
1126 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1127 struct work_struct, entry);
1128
1129 pwq_activate_delayed_work(work);
1130}
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1144{
1145
1146 if (color == WORK_NO_COLOR)
1147 goto out_put;
1148
1149 pwq->nr_in_flight[color]--;
1150
1151 pwq->nr_active--;
1152 if (!list_empty(&pwq->delayed_works)) {
1153
1154 if (pwq->nr_active < pwq->max_active)
1155 pwq_activate_first_delayed(pwq);
1156 }
1157
1158
1159 if (likely(pwq->flush_color != color))
1160 goto out_put;
1161
1162
1163 if (pwq->nr_in_flight[color])
1164 goto out_put;
1165
1166
1167 pwq->flush_color = -1;
1168
1169
1170
1171
1172
1173 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1174 complete(&pwq->wq->first_flusher->done);
1175out_put:
1176 put_pwq(pwq);
1177}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1207 unsigned long *flags)
1208{
1209 struct worker_pool *pool;
1210 struct pool_workqueue *pwq;
1211
1212 local_irq_save(*flags);
1213
1214
1215 if (is_dwork) {
1216 struct delayed_work *dwork = to_delayed_work(work);
1217
1218
1219
1220
1221
1222
1223 if (likely(del_timer(&dwork->timer)))
1224 return 1;
1225 }
1226
1227
1228 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1229 return 0;
1230
1231
1232
1233
1234
1235 pool = get_work_pool(work);
1236 if (!pool)
1237 goto fail;
1238
1239 spin_lock(&pool->lock);
1240
1241
1242
1243
1244
1245
1246
1247
1248 pwq = get_work_pwq(work);
1249 if (pwq && pwq->pool == pool) {
1250 debug_work_deactivate(work);
1251
1252
1253
1254
1255
1256
1257
1258
1259 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1260 pwq_activate_delayed_work(work);
1261
1262 list_del_init(&work->entry);
1263 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1264
1265
1266 set_work_pool_and_keep_pending(work, pool->id);
1267
1268 spin_unlock(&pool->lock);
1269 return 1;
1270 }
1271 spin_unlock(&pool->lock);
1272fail:
1273 local_irq_restore(*flags);
1274 if (work_is_canceling(work))
1275 return -ENOENT;
1276 cpu_relax();
1277 return -EAGAIN;
1278}
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1294 struct list_head *head, unsigned int extra_flags)
1295{
1296 struct worker_pool *pool = pwq->pool;
1297
1298
1299 set_work_pwq(work, pwq, extra_flags);
1300 list_add_tail(&work->entry, head);
1301 get_pwq(pwq);
1302
1303
1304
1305
1306
1307
1308 smp_mb();
1309
1310 if (__need_more_worker(pool))
1311 wake_up_worker(pool);
1312}
1313
1314
1315
1316
1317
1318static bool is_chained_work(struct workqueue_struct *wq)
1319{
1320 struct worker *worker;
1321
1322 worker = current_wq_worker();
1323
1324
1325
1326
1327 return worker && worker->current_pwq->wq == wq;
1328}
1329
1330
1331
1332
1333
1334
1335static int wq_select_unbound_cpu(int cpu)
1336{
1337 static bool printed_dbg_warning;
1338 int new_cpu;
1339
1340 if (likely(!wq_debug_force_rr_cpu)) {
1341 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1342 return cpu;
1343 } else if (!printed_dbg_warning) {
1344 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1345 printed_dbg_warning = true;
1346 }
1347
1348 if (cpumask_empty(wq_unbound_cpumask))
1349 return cpu;
1350
1351 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1352 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1353 if (unlikely(new_cpu >= nr_cpu_ids)) {
1354 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1355 if (unlikely(new_cpu >= nr_cpu_ids))
1356 return cpu;
1357 }
1358 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1359
1360 return new_cpu;
1361}
1362
1363static void __queue_work(int cpu, struct workqueue_struct *wq,
1364 struct work_struct *work)
1365{
1366 struct pool_workqueue *pwq;
1367 struct worker_pool *last_pool;
1368 struct list_head *worklist;
1369 unsigned int work_flags;
1370 unsigned int req_cpu = cpu;
1371
1372
1373
1374
1375
1376
1377
1378 lockdep_assert_irqs_disabled();
1379
1380 debug_work_activate(work);
1381
1382
1383 if (unlikely(wq->flags & __WQ_DRAINING) &&
1384 WARN_ON_ONCE(!is_chained_work(wq)))
1385 return;
1386retry:
1387 if (req_cpu == WORK_CPU_UNBOUND)
1388 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1389
1390
1391 if (!(wq->flags & WQ_UNBOUND))
1392 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1393 else
1394 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1395
1396
1397
1398
1399
1400
1401 last_pool = get_work_pool(work);
1402 if (last_pool && last_pool != pwq->pool) {
1403 struct worker *worker;
1404
1405 spin_lock(&last_pool->lock);
1406
1407 worker = find_worker_executing_work(last_pool, work);
1408
1409 if (worker && worker->current_pwq->wq == wq) {
1410 pwq = worker->current_pwq;
1411 } else {
1412
1413 spin_unlock(&last_pool->lock);
1414 spin_lock(&pwq->pool->lock);
1415 }
1416 } else {
1417 spin_lock(&pwq->pool->lock);
1418 }
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428 if (unlikely(!pwq->refcnt)) {
1429 if (wq->flags & WQ_UNBOUND) {
1430 spin_unlock(&pwq->pool->lock);
1431 cpu_relax();
1432 goto retry;
1433 }
1434
1435 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1436 wq->name, cpu);
1437 }
1438
1439
1440 trace_workqueue_queue_work(req_cpu, pwq, work);
1441
1442 if (WARN_ON(!list_empty(&work->entry))) {
1443 spin_unlock(&pwq->pool->lock);
1444 return;
1445 }
1446
1447 pwq->nr_in_flight[pwq->work_color]++;
1448 work_flags = work_color_to_flags(pwq->work_color);
1449
1450 if (likely(pwq->nr_active < pwq->max_active)) {
1451 trace_workqueue_activate_work(work);
1452 pwq->nr_active++;
1453 worklist = &pwq->pool->worklist;
1454 if (list_empty(worklist))
1455 pwq->pool->watchdog_ts = jiffies;
1456 } else {
1457 work_flags |= WORK_STRUCT_DELAYED;
1458 worklist = &pwq->delayed_works;
1459 }
1460
1461 insert_work(pwq, work, worklist, work_flags);
1462
1463 spin_unlock(&pwq->pool->lock);
1464}
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477bool queue_work_on(int cpu, struct workqueue_struct *wq,
1478 struct work_struct *work)
1479{
1480 bool ret = false;
1481 unsigned long flags;
1482
1483 local_irq_save(flags);
1484
1485 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1486 __queue_work(cpu, wq, work);
1487 ret = true;
1488 }
1489
1490 local_irq_restore(flags);
1491 return ret;
1492}
1493EXPORT_SYMBOL(queue_work_on);
1494
1495void delayed_work_timer_fn(struct timer_list *t)
1496{
1497 struct delayed_work *dwork = from_timer(dwork, t, timer);
1498
1499
1500 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1501}
1502EXPORT_SYMBOL(delayed_work_timer_fn);
1503
1504static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1505 struct delayed_work *dwork, unsigned long delay)
1506{
1507 struct timer_list *timer = &dwork->timer;
1508 struct work_struct *work = &dwork->work;
1509
1510 WARN_ON_ONCE(!wq);
1511 WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1512 WARN_ON_ONCE(timer_pending(timer));
1513 WARN_ON_ONCE(!list_empty(&work->entry));
1514
1515
1516
1517
1518
1519
1520
1521 if (!delay) {
1522 __queue_work(cpu, wq, &dwork->work);
1523 return;
1524 }
1525
1526 dwork->wq = wq;
1527 dwork->cpu = cpu;
1528 timer->expires = jiffies + delay;
1529
1530 if (unlikely(cpu != WORK_CPU_UNBOUND))
1531 add_timer_on(timer, cpu);
1532 else
1533 add_timer(timer);
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1548 struct delayed_work *dwork, unsigned long delay)
1549{
1550 struct work_struct *work = &dwork->work;
1551 bool ret = false;
1552 unsigned long flags;
1553
1554
1555 local_irq_save(flags);
1556
1557 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1558 __queue_delayed_work(cpu, wq, dwork, delay);
1559 ret = true;
1560 }
1561
1562 local_irq_restore(flags);
1563 return ret;
1564}
1565EXPORT_SYMBOL(queue_delayed_work_on);
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1586 struct delayed_work *dwork, unsigned long delay)
1587{
1588 unsigned long flags;
1589 int ret;
1590
1591 do {
1592 ret = try_to_grab_pending(&dwork->work, true, &flags);
1593 } while (unlikely(ret == -EAGAIN));
1594
1595 if (likely(ret >= 0)) {
1596 __queue_delayed_work(cpu, wq, dwork, delay);
1597 local_irq_restore(flags);
1598 }
1599
1600
1601 return ret;
1602}
1603EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1604
1605static void rcu_work_rcufn(struct rcu_head *rcu)
1606{
1607 struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1608
1609
1610 local_irq_disable();
1611 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1612 local_irq_enable();
1613}
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1626{
1627 struct work_struct *work = &rwork->work;
1628
1629 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1630 rwork->wq = wq;
1631 call_rcu(&rwork->rcu, rcu_work_rcufn);
1632 return true;
1633 }
1634
1635 return false;
1636}
1637EXPORT_SYMBOL(queue_rcu_work);
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649static void worker_enter_idle(struct worker *worker)
1650{
1651 struct worker_pool *pool = worker->pool;
1652
1653 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1654 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1655 (worker->hentry.next || worker->hentry.pprev)))
1656 return;
1657
1658
1659 worker->flags |= WORKER_IDLE;
1660 pool->nr_idle++;
1661 worker->last_active = jiffies;
1662
1663
1664 list_add(&worker->entry, &pool->idle_list);
1665
1666 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1667 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1668
1669
1670
1671
1672
1673
1674
1675 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1676 pool->nr_workers == pool->nr_idle &&
1677 atomic_read(&pool->nr_running));
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689static void worker_leave_idle(struct worker *worker)
1690{
1691 struct worker_pool *pool = worker->pool;
1692
1693 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1694 return;
1695 worker_clr_flags(worker, WORKER_IDLE);
1696 pool->nr_idle--;
1697 list_del_init(&worker->entry);
1698}
1699
1700static struct worker *alloc_worker(int node)
1701{
1702 struct worker *worker;
1703
1704 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1705 if (worker) {
1706 INIT_LIST_HEAD(&worker->entry);
1707 INIT_LIST_HEAD(&worker->scheduled);
1708 INIT_LIST_HEAD(&worker->node);
1709
1710 worker->flags = WORKER_PREP;
1711 }
1712 return worker;
1713}
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724static void worker_attach_to_pool(struct worker *worker,
1725 struct worker_pool *pool)
1726{
1727 mutex_lock(&pool->attach_mutex);
1728
1729
1730
1731
1732
1733 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1734
1735
1736
1737
1738
1739
1740 if (pool->flags & POOL_DISASSOCIATED)
1741 worker->flags |= WORKER_UNBOUND;
1742
1743 list_add_tail(&worker->node, &pool->workers);
1744
1745 mutex_unlock(&pool->attach_mutex);
1746}
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757static void worker_detach_from_pool(struct worker *worker,
1758 struct worker_pool *pool)
1759{
1760 struct completion *detach_completion = NULL;
1761
1762 mutex_lock(&pool->attach_mutex);
1763 list_del(&worker->node);
1764 if (list_empty(&pool->workers))
1765 detach_completion = pool->detach_completion;
1766 mutex_unlock(&pool->attach_mutex);
1767
1768
1769 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1770
1771 if (detach_completion)
1772 complete(detach_completion);
1773}
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787static struct worker *create_worker(struct worker_pool *pool)
1788{
1789 struct worker *worker = NULL;
1790 int id = -1;
1791 char id_buf[16];
1792
1793
1794 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1795 if (id < 0)
1796 goto fail;
1797
1798 worker = alloc_worker(pool->node);
1799 if (!worker)
1800 goto fail;
1801
1802 worker->pool = pool;
1803 worker->id = id;
1804
1805 if (pool->cpu >= 0)
1806 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1807 pool->attrs->nice < 0 ? "H" : "");
1808 else
1809 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1810
1811 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1812 "kworker/%s", id_buf);
1813 if (IS_ERR(worker->task))
1814 goto fail;
1815
1816 set_user_nice(worker->task, pool->attrs->nice);
1817 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1818
1819
1820 worker_attach_to_pool(worker, pool);
1821
1822
1823 spin_lock_irq(&pool->lock);
1824 worker->pool->nr_workers++;
1825 worker_enter_idle(worker);
1826 wake_up_process(worker->task);
1827 spin_unlock_irq(&pool->lock);
1828
1829 return worker;
1830
1831fail:
1832 if (id >= 0)
1833 ida_simple_remove(&pool->worker_ida, id);
1834 kfree(worker);
1835 return NULL;
1836}
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848static void destroy_worker(struct worker *worker)
1849{
1850 struct worker_pool *pool = worker->pool;
1851
1852 lockdep_assert_held(&pool->lock);
1853
1854
1855 if (WARN_ON(worker->current_work) ||
1856 WARN_ON(!list_empty(&worker->scheduled)) ||
1857 WARN_ON(!(worker->flags & WORKER_IDLE)))
1858 return;
1859
1860 pool->nr_workers--;
1861 pool->nr_idle--;
1862
1863 list_del_init(&worker->entry);
1864 worker->flags |= WORKER_DIE;
1865 wake_up_process(worker->task);
1866}
1867
1868static void idle_worker_timeout(struct timer_list *t)
1869{
1870 struct worker_pool *pool = from_timer(pool, t, idle_timer);
1871
1872 spin_lock_irq(&pool->lock);
1873
1874 while (too_many_workers(pool)) {
1875 struct worker *worker;
1876 unsigned long expires;
1877
1878
1879 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1880 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1881
1882 if (time_before(jiffies, expires)) {
1883 mod_timer(&pool->idle_timer, expires);
1884 break;
1885 }
1886
1887 destroy_worker(worker);
1888 }
1889
1890 spin_unlock_irq(&pool->lock);
1891}
1892
1893static void send_mayday(struct work_struct *work)
1894{
1895 struct pool_workqueue *pwq = get_work_pwq(work);
1896 struct workqueue_struct *wq = pwq->wq;
1897
1898 lockdep_assert_held(&wq_mayday_lock);
1899
1900 if (!wq->rescuer)
1901 return;
1902
1903
1904 if (list_empty(&pwq->mayday_node)) {
1905
1906
1907
1908
1909
1910 get_pwq(pwq);
1911 list_add_tail(&pwq->mayday_node, &wq->maydays);
1912 wake_up_process(wq->rescuer->task);
1913 }
1914}
1915
1916static void pool_mayday_timeout(struct timer_list *t)
1917{
1918 struct worker_pool *pool = from_timer(pool, t, mayday_timer);
1919 struct work_struct *work;
1920
1921 spin_lock_irq(&pool->lock);
1922 spin_lock(&wq_mayday_lock);
1923
1924 if (need_to_create_worker(pool)) {
1925
1926
1927
1928
1929
1930
1931 list_for_each_entry(work, &pool->worklist, entry)
1932 send_mayday(work);
1933 }
1934
1935 spin_unlock(&wq_mayday_lock);
1936 spin_unlock_irq(&pool->lock);
1937
1938 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1939}
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959static void maybe_create_worker(struct worker_pool *pool)
1960__releases(&pool->lock)
1961__acquires(&pool->lock)
1962{
1963restart:
1964 spin_unlock_irq(&pool->lock);
1965
1966
1967 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1968
1969 while (true) {
1970 if (create_worker(pool) || !need_to_create_worker(pool))
1971 break;
1972
1973 schedule_timeout_interruptible(CREATE_COOLDOWN);
1974
1975 if (!need_to_create_worker(pool))
1976 break;
1977 }
1978
1979 del_timer_sync(&pool->mayday_timer);
1980 spin_lock_irq(&pool->lock);
1981
1982
1983
1984
1985
1986 if (need_to_create_worker(pool))
1987 goto restart;
1988}
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012static bool manage_workers(struct worker *worker)
2013{
2014 struct worker_pool *pool = worker->pool;
2015
2016 if (pool->flags & POOL_MANAGER_ACTIVE)
2017 return false;
2018
2019 pool->flags |= POOL_MANAGER_ACTIVE;
2020 pool->manager = worker;
2021
2022 maybe_create_worker(pool);
2023
2024 pool->manager = NULL;
2025 pool->flags &= ~POOL_MANAGER_ACTIVE;
2026 wake_up(&wq_manager_wait);
2027 return true;
2028}
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044static void process_one_work(struct worker *worker, struct work_struct *work)
2045__releases(&pool->lock)
2046__acquires(&pool->lock)
2047{
2048 struct pool_workqueue *pwq = get_work_pwq(work);
2049 struct worker_pool *pool = worker->pool;
2050 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2051 int work_color;
2052 struct worker *collision;
2053#ifdef CONFIG_LOCKDEP
2054
2055
2056
2057
2058
2059
2060
2061 struct lockdep_map lockdep_map;
2062
2063 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2064#endif
2065
2066 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2067 raw_smp_processor_id() != pool->cpu);
2068
2069
2070
2071
2072
2073
2074
2075 collision = find_worker_executing_work(pool, work);
2076 if (unlikely(collision)) {
2077 move_linked_works(work, &collision->scheduled, NULL);
2078 return;
2079 }
2080
2081
2082 debug_work_deactivate(work);
2083 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2084 worker->current_work = work;
2085 worker->current_func = work->func;
2086 worker->current_pwq = pwq;
2087 work_color = get_work_color(work);
2088
2089 list_del_init(&work->entry);
2090
2091
2092
2093
2094
2095
2096
2097 if (unlikely(cpu_intensive))
2098 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2099
2100
2101
2102
2103
2104
2105
2106
2107 if (need_more_worker(pool))
2108 wake_up_worker(pool);
2109
2110
2111
2112
2113
2114
2115
2116 set_work_pool_and_clear_pending(work, pool->id);
2117
2118 spin_unlock_irq(&pool->lock);
2119
2120 lock_map_acquire(&pwq->wq->lockdep_map);
2121 lock_map_acquire(&lockdep_map);
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143 lockdep_invariant_state(true);
2144 trace_workqueue_execute_start(work);
2145 worker->current_func(work);
2146
2147
2148
2149
2150 trace_workqueue_execute_end(work);
2151 lock_map_release(&lockdep_map);
2152 lock_map_release(&pwq->wq->lockdep_map);
2153
2154 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2155 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2156 " last function: %pf\n",
2157 current->comm, preempt_count(), task_pid_nr(current),
2158 worker->current_func);
2159 debug_show_held_locks(current);
2160 dump_stack();
2161 }
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171 cond_resched();
2172
2173 spin_lock_irq(&pool->lock);
2174
2175
2176 if (unlikely(cpu_intensive))
2177 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2178
2179
2180 hash_del(&worker->hentry);
2181 worker->current_work = NULL;
2182 worker->current_func = NULL;
2183 worker->current_pwq = NULL;
2184 worker->desc_valid = false;
2185 pwq_dec_nr_in_flight(pwq, work_color);
2186}
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200static void process_scheduled_works(struct worker *worker)
2201{
2202 while (!list_empty(&worker->scheduled)) {
2203 struct work_struct *work = list_first_entry(&worker->scheduled,
2204 struct work_struct, entry);
2205 process_one_work(worker, work);
2206 }
2207}
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221static int worker_thread(void *__worker)
2222{
2223 struct worker *worker = __worker;
2224 struct worker_pool *pool = worker->pool;
2225
2226
2227 worker->task->flags |= PF_WQ_WORKER;
2228woke_up:
2229 spin_lock_irq(&pool->lock);
2230
2231
2232 if (unlikely(worker->flags & WORKER_DIE)) {
2233 spin_unlock_irq(&pool->lock);
2234 WARN_ON_ONCE(!list_empty(&worker->entry));
2235 worker->task->flags &= ~PF_WQ_WORKER;
2236
2237 set_task_comm(worker->task, "kworker/dying");
2238 ida_simple_remove(&pool->worker_ida, worker->id);
2239 worker_detach_from_pool(worker, pool);
2240 kfree(worker);
2241 return 0;
2242 }
2243
2244 worker_leave_idle(worker);
2245recheck:
2246
2247 if (!need_more_worker(pool))
2248 goto sleep;
2249
2250
2251 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2252 goto recheck;
2253
2254
2255
2256
2257
2258
2259 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2260
2261
2262
2263
2264
2265
2266
2267
2268 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2269
2270 do {
2271 struct work_struct *work =
2272 list_first_entry(&pool->worklist,
2273 struct work_struct, entry);
2274
2275 pool->watchdog_ts = jiffies;
2276
2277 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2278
2279 process_one_work(worker, work);
2280 if (unlikely(!list_empty(&worker->scheduled)))
2281 process_scheduled_works(worker);
2282 } else {
2283 move_linked_works(work, &worker->scheduled, NULL);
2284 process_scheduled_works(worker);
2285 }
2286 } while (keep_working(pool));
2287
2288 worker_set_flags(worker, WORKER_PREP);
2289sleep:
2290
2291
2292
2293
2294
2295
2296
2297 worker_enter_idle(worker);
2298 __set_current_state(TASK_IDLE);
2299 spin_unlock_irq(&pool->lock);
2300 schedule();
2301 goto woke_up;
2302}
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325static int rescuer_thread(void *__rescuer)
2326{
2327 struct worker *rescuer = __rescuer;
2328 struct workqueue_struct *wq = rescuer->rescue_wq;
2329 struct list_head *scheduled = &rescuer->scheduled;
2330 bool should_stop;
2331
2332 set_user_nice(current, RESCUER_NICE_LEVEL);
2333
2334
2335
2336
2337
2338 rescuer->task->flags |= PF_WQ_WORKER;
2339repeat:
2340 set_current_state(TASK_IDLE);
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350 should_stop = kthread_should_stop();
2351
2352
2353 spin_lock_irq(&wq_mayday_lock);
2354
2355 while (!list_empty(&wq->maydays)) {
2356 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2357 struct pool_workqueue, mayday_node);
2358 struct worker_pool *pool = pwq->pool;
2359 struct work_struct *work, *n;
2360 bool first = true;
2361
2362 __set_current_state(TASK_RUNNING);
2363 list_del_init(&pwq->mayday_node);
2364
2365 spin_unlock_irq(&wq_mayday_lock);
2366
2367 worker_attach_to_pool(rescuer, pool);
2368
2369 spin_lock_irq(&pool->lock);
2370 rescuer->pool = pool;
2371
2372
2373
2374
2375
2376 WARN_ON_ONCE(!list_empty(scheduled));
2377 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2378 if (get_work_pwq(work) == pwq) {
2379 if (first)
2380 pool->watchdog_ts = jiffies;
2381 move_linked_works(work, scheduled, &n);
2382 }
2383 first = false;
2384 }
2385
2386 if (!list_empty(scheduled)) {
2387 process_scheduled_works(rescuer);
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398 if (need_to_create_worker(pool)) {
2399 spin_lock(&wq_mayday_lock);
2400 get_pwq(pwq);
2401 list_move_tail(&pwq->mayday_node, &wq->maydays);
2402 spin_unlock(&wq_mayday_lock);
2403 }
2404 }
2405
2406
2407
2408
2409
2410 put_pwq(pwq);
2411
2412
2413
2414
2415
2416
2417 if (need_more_worker(pool))
2418 wake_up_worker(pool);
2419
2420 rescuer->pool = NULL;
2421 spin_unlock_irq(&pool->lock);
2422
2423 worker_detach_from_pool(rescuer, pool);
2424
2425 spin_lock_irq(&wq_mayday_lock);
2426 }
2427
2428 spin_unlock_irq(&wq_mayday_lock);
2429
2430 if (should_stop) {
2431 __set_current_state(TASK_RUNNING);
2432 rescuer->task->flags &= ~PF_WQ_WORKER;
2433 return 0;
2434 }
2435
2436
2437 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2438 schedule();
2439 goto repeat;
2440}
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453static void check_flush_dependency(struct workqueue_struct *target_wq,
2454 struct work_struct *target_work)
2455{
2456 work_func_t target_func = target_work ? target_work->func : NULL;
2457 struct worker *worker;
2458
2459 if (target_wq->flags & WQ_MEM_RECLAIM)
2460 return;
2461
2462 worker = current_wq_worker();
2463
2464 WARN_ONCE(current->flags & PF_MEMALLOC,
2465 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2466 current->pid, current->comm, target_wq->name, target_func);
2467 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2468 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2469 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2470 worker->current_pwq->wq->name, worker->current_func,
2471 target_wq->name, target_func);
2472}
2473
2474struct wq_barrier {
2475 struct work_struct work;
2476 struct completion done;
2477 struct task_struct *task;
2478};
2479
2480static void wq_barrier_func(struct work_struct *work)
2481{
2482 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2483 complete(&barr->done);
2484}
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510static void insert_wq_barrier(struct pool_workqueue *pwq,
2511 struct wq_barrier *barr,
2512 struct work_struct *target, struct worker *worker)
2513{
2514 struct list_head *head;
2515 unsigned int linked = 0;
2516
2517
2518
2519
2520
2521
2522
2523 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2524 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2525
2526 init_completion_map(&barr->done, &target->lockdep_map);
2527
2528 barr->task = current;
2529
2530
2531
2532
2533
2534 if (worker)
2535 head = worker->scheduled.next;
2536 else {
2537 unsigned long *bits = work_data_bits(target);
2538
2539 head = target->entry.next;
2540
2541 linked = *bits & WORK_STRUCT_LINKED;
2542 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2543 }
2544
2545 debug_work_activate(&barr->work);
2546 insert_work(pwq, &barr->work, head,
2547 work_color_to_flags(WORK_NO_COLOR) | linked);
2548}
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2582 int flush_color, int work_color)
2583{
2584 bool wait = false;
2585 struct pool_workqueue *pwq;
2586
2587 if (flush_color >= 0) {
2588 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2589 atomic_set(&wq->nr_pwqs_to_flush, 1);
2590 }
2591
2592 for_each_pwq(pwq, wq) {
2593 struct worker_pool *pool = pwq->pool;
2594
2595 spin_lock_irq(&pool->lock);
2596
2597 if (flush_color >= 0) {
2598 WARN_ON_ONCE(pwq->flush_color != -1);
2599
2600 if (pwq->nr_in_flight[flush_color]) {
2601 pwq->flush_color = flush_color;
2602 atomic_inc(&wq->nr_pwqs_to_flush);
2603 wait = true;
2604 }
2605 }
2606
2607 if (work_color >= 0) {
2608 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2609 pwq->work_color = work_color;
2610 }
2611
2612 spin_unlock_irq(&pool->lock);
2613 }
2614
2615 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2616 complete(&wq->first_flusher->done);
2617
2618 return wait;
2619}
2620
2621
2622
2623
2624
2625
2626
2627
2628void flush_workqueue(struct workqueue_struct *wq)
2629{
2630 struct wq_flusher this_flusher = {
2631 .list = LIST_HEAD_INIT(this_flusher.list),
2632 .flush_color = -1,
2633 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
2634 };
2635 int next_color;
2636
2637 if (WARN_ON(!wq_online))
2638 return;
2639
2640 mutex_lock(&wq->mutex);
2641
2642
2643
2644
2645 next_color = work_next_color(wq->work_color);
2646
2647 if (next_color != wq->flush_color) {
2648
2649
2650
2651
2652
2653 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2654 this_flusher.flush_color = wq->work_color;
2655 wq->work_color = next_color;
2656
2657 if (!wq->first_flusher) {
2658
2659 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2660
2661 wq->first_flusher = &this_flusher;
2662
2663 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2664 wq->work_color)) {
2665
2666 wq->flush_color = next_color;
2667 wq->first_flusher = NULL;
2668 goto out_unlock;
2669 }
2670 } else {
2671
2672 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2673 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2674 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2675 }
2676 } else {
2677
2678
2679
2680
2681
2682 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2683 }
2684
2685 check_flush_dependency(wq, NULL);
2686
2687 mutex_unlock(&wq->mutex);
2688
2689 wait_for_completion(&this_flusher.done);
2690
2691
2692
2693
2694
2695
2696
2697 if (wq->first_flusher != &this_flusher)
2698 return;
2699
2700 mutex_lock(&wq->mutex);
2701
2702
2703 if (wq->first_flusher != &this_flusher)
2704 goto out_unlock;
2705
2706 wq->first_flusher = NULL;
2707
2708 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2709 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2710
2711 while (true) {
2712 struct wq_flusher *next, *tmp;
2713
2714
2715 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2716 if (next->flush_color != wq->flush_color)
2717 break;
2718 list_del_init(&next->list);
2719 complete(&next->done);
2720 }
2721
2722 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2723 wq->flush_color != work_next_color(wq->work_color));
2724
2725
2726 wq->flush_color = work_next_color(wq->flush_color);
2727
2728
2729 if (!list_empty(&wq->flusher_overflow)) {
2730
2731
2732
2733
2734
2735
2736 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2737 tmp->flush_color = wq->work_color;
2738
2739 wq->work_color = work_next_color(wq->work_color);
2740
2741 list_splice_tail_init(&wq->flusher_overflow,
2742 &wq->flusher_queue);
2743 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2744 }
2745
2746 if (list_empty(&wq->flusher_queue)) {
2747 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2748 break;
2749 }
2750
2751
2752
2753
2754
2755 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2756 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2757
2758 list_del_init(&next->list);
2759 wq->first_flusher = next;
2760
2761 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2762 break;
2763
2764
2765
2766
2767
2768 wq->first_flusher = NULL;
2769 }
2770
2771out_unlock:
2772 mutex_unlock(&wq->mutex);
2773}
2774EXPORT_SYMBOL(flush_workqueue);
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787void drain_workqueue(struct workqueue_struct *wq)
2788{
2789 unsigned int flush_cnt = 0;
2790 struct pool_workqueue *pwq;
2791
2792
2793
2794
2795
2796
2797 mutex_lock(&wq->mutex);
2798 if (!wq->nr_drainers++)
2799 wq->flags |= __WQ_DRAINING;
2800 mutex_unlock(&wq->mutex);
2801reflush:
2802 flush_workqueue(wq);
2803
2804 mutex_lock(&wq->mutex);
2805
2806 for_each_pwq(pwq, wq) {
2807 bool drained;
2808
2809 spin_lock_irq(&pwq->pool->lock);
2810 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2811 spin_unlock_irq(&pwq->pool->lock);
2812
2813 if (drained)
2814 continue;
2815
2816 if (++flush_cnt == 10 ||
2817 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2818 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2819 wq->name, flush_cnt);
2820
2821 mutex_unlock(&wq->mutex);
2822 goto reflush;
2823 }
2824
2825 if (!--wq->nr_drainers)
2826 wq->flags &= ~__WQ_DRAINING;
2827 mutex_unlock(&wq->mutex);
2828}
2829EXPORT_SYMBOL_GPL(drain_workqueue);
2830
2831static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2832{
2833 struct worker *worker = NULL;
2834 struct worker_pool *pool;
2835 struct pool_workqueue *pwq;
2836
2837 might_sleep();
2838
2839 local_irq_disable();
2840 pool = get_work_pool(work);
2841 if (!pool) {
2842 local_irq_enable();
2843 return false;
2844 }
2845
2846 spin_lock(&pool->lock);
2847
2848 pwq = get_work_pwq(work);
2849 if (pwq) {
2850 if (unlikely(pwq->pool != pool))
2851 goto already_gone;
2852 } else {
2853 worker = find_worker_executing_work(pool, work);
2854 if (!worker)
2855 goto already_gone;
2856 pwq = worker->current_pwq;
2857 }
2858
2859 check_flush_dependency(pwq->wq, work);
2860
2861 insert_wq_barrier(pwq, barr, work, worker);
2862 spin_unlock_irq(&pool->lock);
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
2874 lock_map_acquire(&pwq->wq->lockdep_map);
2875 lock_map_release(&pwq->wq->lockdep_map);
2876 }
2877
2878 return true;
2879already_gone:
2880 spin_unlock_irq(&pool->lock);
2881 return false;
2882}
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895bool flush_work(struct work_struct *work)
2896{
2897 struct wq_barrier barr;
2898
2899 if (WARN_ON(!wq_online))
2900 return false;
2901
2902 if (start_flush_work(work, &barr)) {
2903 wait_for_completion(&barr.done);
2904 destroy_work_on_stack(&barr.work);
2905 return true;
2906 } else {
2907 return false;
2908 }
2909}
2910EXPORT_SYMBOL_GPL(flush_work);
2911
2912struct cwt_wait {
2913 wait_queue_entry_t wait;
2914 struct work_struct *work;
2915};
2916
2917static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
2918{
2919 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
2920
2921 if (cwait->work != key)
2922 return 0;
2923 return autoremove_wake_function(wait, mode, sync, key);
2924}
2925
2926static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2927{
2928 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
2929 unsigned long flags;
2930 int ret;
2931
2932 do {
2933 ret = try_to_grab_pending(work, is_dwork, &flags);
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950 if (unlikely(ret == -ENOENT)) {
2951 struct cwt_wait cwait;
2952
2953 init_wait(&cwait.wait);
2954 cwait.wait.func = cwt_wakefn;
2955 cwait.work = work;
2956
2957 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
2958 TASK_UNINTERRUPTIBLE);
2959 if (work_is_canceling(work))
2960 schedule();
2961 finish_wait(&cancel_waitq, &cwait.wait);
2962 }
2963 } while (unlikely(ret < 0));
2964
2965
2966 mark_work_canceling(work);
2967 local_irq_restore(flags);
2968
2969
2970
2971
2972
2973 if (wq_online)
2974 flush_work(work);
2975
2976 clear_work_data(work);
2977
2978
2979
2980
2981
2982
2983 smp_mb();
2984 if (waitqueue_active(&cancel_waitq))
2985 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
2986
2987 return ret;
2988}
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008bool cancel_work_sync(struct work_struct *work)
3009{
3010 return __cancel_work_timer(work, false);
3011}
3012EXPORT_SYMBOL_GPL(cancel_work_sync);
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026bool flush_delayed_work(struct delayed_work *dwork)
3027{
3028 local_irq_disable();
3029 if (del_timer_sync(&dwork->timer))
3030 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3031 local_irq_enable();
3032 return flush_work(&dwork->work);
3033}
3034EXPORT_SYMBOL(flush_delayed_work);
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044bool flush_rcu_work(struct rcu_work *rwork)
3045{
3046 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3047 rcu_barrier();
3048 flush_work(&rwork->work);
3049 return true;
3050 } else {
3051 return flush_work(&rwork->work);
3052 }
3053}
3054EXPORT_SYMBOL(flush_rcu_work);
3055
3056static bool __cancel_work(struct work_struct *work, bool is_dwork)
3057{
3058 unsigned long flags;
3059 int ret;
3060
3061 do {
3062 ret = try_to_grab_pending(work, is_dwork, &flags);
3063 } while (unlikely(ret == -EAGAIN));
3064
3065 if (unlikely(ret < 0))
3066 return false;
3067
3068 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3069 local_irq_restore(flags);
3070 return ret;
3071}
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089bool cancel_delayed_work(struct delayed_work *dwork)
3090{
3091 return __cancel_work(&dwork->work, true);
3092}
3093EXPORT_SYMBOL(cancel_delayed_work);
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104bool cancel_delayed_work_sync(struct delayed_work *dwork)
3105{
3106 return __cancel_work_timer(&dwork->work, true);
3107}
3108EXPORT_SYMBOL(cancel_delayed_work_sync);
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121int schedule_on_each_cpu(work_func_t func)
3122{
3123 int cpu;
3124 struct work_struct __percpu *works;
3125
3126 works = alloc_percpu(struct work_struct);
3127 if (!works)
3128 return -ENOMEM;
3129
3130 get_online_cpus();
3131
3132 for_each_online_cpu(cpu) {
3133 struct work_struct *work = per_cpu_ptr(works, cpu);
3134
3135 INIT_WORK(work, func);
3136 schedule_work_on(cpu, work);
3137 }
3138
3139 for_each_online_cpu(cpu)
3140 flush_work(per_cpu_ptr(works, cpu));
3141
3142 put_online_cpus();
3143 free_percpu(works);
3144 return 0;
3145}
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3160{
3161 if (!in_interrupt()) {
3162 fn(&ew->work);
3163 return 0;
3164 }
3165
3166 INIT_WORK(&ew->work, fn);
3167 schedule_work(&ew->work);
3168
3169 return 1;
3170}
3171EXPORT_SYMBOL_GPL(execute_in_process_context);
3172
3173
3174
3175
3176
3177
3178
3179void free_workqueue_attrs(struct workqueue_attrs *attrs)
3180{
3181 if (attrs) {
3182 free_cpumask_var(attrs->cpumask);
3183 kfree(attrs);
3184 }
3185}
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3197{
3198 struct workqueue_attrs *attrs;
3199
3200 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3201 if (!attrs)
3202 goto fail;
3203 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3204 goto fail;
3205
3206 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3207 return attrs;
3208fail:
3209 free_workqueue_attrs(attrs);
3210 return NULL;
3211}
3212
3213static void copy_workqueue_attrs(struct workqueue_attrs *to,
3214 const struct workqueue_attrs *from)
3215{
3216 to->nice = from->nice;
3217 cpumask_copy(to->cpumask, from->cpumask);
3218
3219
3220
3221
3222
3223 to->no_numa = from->no_numa;
3224}
3225
3226
3227static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3228{
3229 u32 hash = 0;
3230
3231 hash = jhash_1word(attrs->nice, hash);
3232 hash = jhash(cpumask_bits(attrs->cpumask),
3233 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3234 return hash;
3235}
3236
3237
3238static bool wqattrs_equal(const struct workqueue_attrs *a,
3239 const struct workqueue_attrs *b)
3240{
3241 if (a->nice != b->nice)
3242 return false;
3243 if (!cpumask_equal(a->cpumask, b->cpumask))
3244 return false;
3245 return true;
3246}
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258static int init_worker_pool(struct worker_pool *pool)
3259{
3260 spin_lock_init(&pool->lock);
3261 pool->id = -1;
3262 pool->cpu = -1;
3263 pool->node = NUMA_NO_NODE;
3264 pool->flags |= POOL_DISASSOCIATED;
3265 pool->watchdog_ts = jiffies;
3266 INIT_LIST_HEAD(&pool->worklist);
3267 INIT_LIST_HEAD(&pool->idle_list);
3268 hash_init(pool->busy_hash);
3269
3270 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3271
3272 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3273
3274 mutex_init(&pool->attach_mutex);
3275 INIT_LIST_HEAD(&pool->workers);
3276
3277 ida_init(&pool->worker_ida);
3278 INIT_HLIST_NODE(&pool->hash_node);
3279 pool->refcnt = 1;
3280
3281
3282 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3283 if (!pool->attrs)
3284 return -ENOMEM;
3285 return 0;
3286}
3287
3288static void rcu_free_wq(struct rcu_head *rcu)
3289{
3290 struct workqueue_struct *wq =
3291 container_of(rcu, struct workqueue_struct, rcu);
3292
3293 if (!(wq->flags & WQ_UNBOUND))
3294 free_percpu(wq->cpu_pwqs);
3295 else
3296 free_workqueue_attrs(wq->unbound_attrs);
3297
3298 kfree(wq->rescuer);
3299 kfree(wq);
3300}
3301
3302static void rcu_free_pool(struct rcu_head *rcu)
3303{
3304 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3305
3306 ida_destroy(&pool->worker_ida);
3307 free_workqueue_attrs(pool->attrs);
3308 kfree(pool);
3309}
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322static void put_unbound_pool(struct worker_pool *pool)
3323{
3324 DECLARE_COMPLETION_ONSTACK(detach_completion);
3325 struct worker *worker;
3326
3327 lockdep_assert_held(&wq_pool_mutex);
3328
3329 if (--pool->refcnt)
3330 return;
3331
3332
3333 if (WARN_ON(!(pool->cpu < 0)) ||
3334 WARN_ON(!list_empty(&pool->worklist)))
3335 return;
3336
3337
3338 if (pool->id >= 0)
3339 idr_remove(&worker_pool_idr, pool->id);
3340 hash_del(&pool->hash_node);
3341
3342
3343
3344
3345
3346
3347 spin_lock_irq(&pool->lock);
3348 wait_event_lock_irq(wq_manager_wait,
3349 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3350 pool->flags |= POOL_MANAGER_ACTIVE;
3351
3352 while ((worker = first_idle_worker(pool)))
3353 destroy_worker(worker);
3354 WARN_ON(pool->nr_workers || pool->nr_idle);
3355 spin_unlock_irq(&pool->lock);
3356
3357 mutex_lock(&pool->attach_mutex);
3358 if (!list_empty(&pool->workers))
3359 pool->detach_completion = &detach_completion;
3360 mutex_unlock(&pool->attach_mutex);
3361
3362 if (pool->detach_completion)
3363 wait_for_completion(pool->detach_completion);
3364
3365
3366 del_timer_sync(&pool->idle_timer);
3367 del_timer_sync(&pool->mayday_timer);
3368
3369
3370 call_rcu_sched(&pool->rcu, rcu_free_pool);
3371}
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3388{
3389 u32 hash = wqattrs_hash(attrs);
3390 struct worker_pool *pool;
3391 int node;
3392 int target_node = NUMA_NO_NODE;
3393
3394 lockdep_assert_held(&wq_pool_mutex);
3395
3396
3397 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3398 if (wqattrs_equal(pool->attrs, attrs)) {
3399 pool->refcnt++;
3400 return pool;
3401 }
3402 }
3403
3404
3405 if (wq_numa_enabled) {
3406 for_each_node(node) {
3407 if (cpumask_subset(attrs->cpumask,
3408 wq_numa_possible_cpumask[node])) {
3409 target_node = node;
3410 break;
3411 }
3412 }
3413 }
3414
3415
3416 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3417 if (!pool || init_worker_pool(pool) < 0)
3418 goto fail;
3419
3420 lockdep_set_subclass(&pool->lock, 1);
3421 copy_workqueue_attrs(pool->attrs, attrs);
3422 pool->node = target_node;
3423
3424
3425
3426
3427
3428 pool->attrs->no_numa = false;
3429
3430 if (worker_pool_assign_id(pool) < 0)
3431 goto fail;
3432
3433
3434 if (wq_online && !create_worker(pool))
3435 goto fail;
3436
3437
3438 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3439
3440 return pool;
3441fail:
3442 if (pool)
3443 put_unbound_pool(pool);
3444 return NULL;
3445}
3446
3447static void rcu_free_pwq(struct rcu_head *rcu)
3448{
3449 kmem_cache_free(pwq_cache,
3450 container_of(rcu, struct pool_workqueue, rcu));
3451}
3452
3453
3454
3455
3456
3457static void pwq_unbound_release_workfn(struct work_struct *work)
3458{
3459 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3460 unbound_release_work);
3461 struct workqueue_struct *wq = pwq->wq;
3462 struct worker_pool *pool = pwq->pool;
3463 bool is_last;
3464
3465 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3466 return;
3467
3468 mutex_lock(&wq->mutex);
3469 list_del_rcu(&pwq->pwqs_node);
3470 is_last = list_empty(&wq->pwqs);
3471 mutex_unlock(&wq->mutex);
3472
3473 mutex_lock(&wq_pool_mutex);
3474 put_unbound_pool(pool);
3475 mutex_unlock(&wq_pool_mutex);
3476
3477 call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3478
3479
3480
3481
3482
3483 if (is_last)
3484 call_rcu_sched(&wq->rcu, rcu_free_wq);
3485}
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3496{
3497 struct workqueue_struct *wq = pwq->wq;
3498 bool freezable = wq->flags & WQ_FREEZABLE;
3499 unsigned long flags;
3500
3501
3502 lockdep_assert_held(&wq->mutex);
3503
3504
3505 if (!freezable && pwq->max_active == wq->saved_max_active)
3506 return;
3507
3508
3509 spin_lock_irqsave(&pwq->pool->lock, flags);
3510
3511
3512
3513
3514
3515
3516 if (!freezable || !workqueue_freezing) {
3517 pwq->max_active = wq->saved_max_active;
3518
3519 while (!list_empty(&pwq->delayed_works) &&
3520 pwq->nr_active < pwq->max_active)
3521 pwq_activate_first_delayed(pwq);
3522
3523
3524
3525
3526
3527 wake_up_worker(pwq->pool);
3528 } else {
3529 pwq->max_active = 0;
3530 }
3531
3532 spin_unlock_irqrestore(&pwq->pool->lock, flags);
3533}
3534
3535
3536static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3537 struct worker_pool *pool)
3538{
3539 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3540
3541 memset(pwq, 0, sizeof(*pwq));
3542
3543 pwq->pool = pool;
3544 pwq->wq = wq;
3545 pwq->flush_color = -1;
3546 pwq->refcnt = 1;
3547 INIT_LIST_HEAD(&pwq->delayed_works);
3548 INIT_LIST_HEAD(&pwq->pwqs_node);
3549 INIT_LIST_HEAD(&pwq->mayday_node);
3550 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3551}
3552
3553
3554static void link_pwq(struct pool_workqueue *pwq)
3555{
3556 struct workqueue_struct *wq = pwq->wq;
3557
3558 lockdep_assert_held(&wq->mutex);
3559
3560
3561 if (!list_empty(&pwq->pwqs_node))
3562 return;
3563
3564
3565 pwq->work_color = wq->work_color;
3566
3567
3568 pwq_adjust_max_active(pwq);
3569
3570
3571 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3572}
3573
3574
3575static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3576 const struct workqueue_attrs *attrs)
3577{
3578 struct worker_pool *pool;
3579 struct pool_workqueue *pwq;
3580
3581 lockdep_assert_held(&wq_pool_mutex);
3582
3583 pool = get_unbound_pool(attrs);
3584 if (!pool)
3585 return NULL;
3586
3587 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3588 if (!pwq) {
3589 put_unbound_pool(pool);
3590 return NULL;
3591 }
3592
3593 init_pwq(pwq, wq, pool);
3594 return pwq;
3595}
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3620 int cpu_going_down, cpumask_t *cpumask)
3621{
3622 if (!wq_numa_enabled || attrs->no_numa)
3623 goto use_dfl;
3624
3625
3626 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3627 if (cpu_going_down >= 0)
3628 cpumask_clear_cpu(cpu_going_down, cpumask);
3629
3630 if (cpumask_empty(cpumask))
3631 goto use_dfl;
3632
3633
3634 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3635
3636 if (cpumask_empty(cpumask)) {
3637 pr_warn_once("WARNING: workqueue cpumask: online intersect > "
3638 "possible intersect\n");
3639 return false;
3640 }
3641
3642 return !cpumask_equal(cpumask, attrs->cpumask);
3643
3644use_dfl:
3645 cpumask_copy(cpumask, attrs->cpumask);
3646 return false;
3647}
3648
3649
3650static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3651 int node,
3652 struct pool_workqueue *pwq)
3653{
3654 struct pool_workqueue *old_pwq;
3655
3656 lockdep_assert_held(&wq_pool_mutex);
3657 lockdep_assert_held(&wq->mutex);
3658
3659
3660 link_pwq(pwq);
3661
3662 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3663 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3664 return old_pwq;
3665}
3666
3667
3668struct apply_wqattrs_ctx {
3669 struct workqueue_struct *wq;
3670 struct workqueue_attrs *attrs;
3671 struct list_head list;
3672 struct pool_workqueue *dfl_pwq;
3673 struct pool_workqueue *pwq_tbl[];
3674};
3675
3676
3677static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3678{
3679 if (ctx) {
3680 int node;
3681
3682 for_each_node(node)
3683 put_pwq_unlocked(ctx->pwq_tbl[node]);
3684 put_pwq_unlocked(ctx->dfl_pwq);
3685
3686 free_workqueue_attrs(ctx->attrs);
3687
3688 kfree(ctx);
3689 }
3690}
3691
3692
3693static struct apply_wqattrs_ctx *
3694apply_wqattrs_prepare(struct workqueue_struct *wq,
3695 const struct workqueue_attrs *attrs)
3696{
3697 struct apply_wqattrs_ctx *ctx;
3698 struct workqueue_attrs *new_attrs, *tmp_attrs;
3699 int node;
3700
3701 lockdep_assert_held(&wq_pool_mutex);
3702
3703 ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
3704 GFP_KERNEL);
3705
3706 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3707 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3708 if (!ctx || !new_attrs || !tmp_attrs)
3709 goto out_free;
3710
3711
3712
3713
3714
3715
3716 copy_workqueue_attrs(new_attrs, attrs);
3717 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3718 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3719 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3720
3721
3722
3723
3724
3725
3726 copy_workqueue_attrs(tmp_attrs, new_attrs);
3727
3728
3729
3730
3731
3732
3733 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3734 if (!ctx->dfl_pwq)
3735 goto out_free;
3736
3737 for_each_node(node) {
3738 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3739 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3740 if (!ctx->pwq_tbl[node])
3741 goto out_free;
3742 } else {
3743 ctx->dfl_pwq->refcnt++;
3744 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3745 }
3746 }
3747
3748
3749 copy_workqueue_attrs(new_attrs, attrs);
3750 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3751 ctx->attrs = new_attrs;
3752
3753 ctx->wq = wq;
3754 free_workqueue_attrs(tmp_attrs);
3755 return ctx;
3756
3757out_free:
3758 free_workqueue_attrs(tmp_attrs);
3759 free_workqueue_attrs(new_attrs);
3760 apply_wqattrs_cleanup(ctx);
3761 return NULL;
3762}
3763
3764
3765static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3766{
3767 int node;
3768
3769
3770 mutex_lock(&ctx->wq->mutex);
3771
3772 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3773
3774
3775 for_each_node(node)
3776 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3777 ctx->pwq_tbl[node]);
3778
3779
3780 link_pwq(ctx->dfl_pwq);
3781 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3782
3783 mutex_unlock(&ctx->wq->mutex);
3784}
3785
3786static void apply_wqattrs_lock(void)
3787{
3788
3789 get_online_cpus();
3790 mutex_lock(&wq_pool_mutex);
3791}
3792
3793static void apply_wqattrs_unlock(void)
3794{
3795 mutex_unlock(&wq_pool_mutex);
3796 put_online_cpus();
3797}
3798
3799static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3800 const struct workqueue_attrs *attrs)
3801{
3802 struct apply_wqattrs_ctx *ctx;
3803
3804
3805 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3806 return -EINVAL;
3807
3808
3809 if (!list_empty(&wq->pwqs)) {
3810 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
3811 return -EINVAL;
3812
3813 wq->flags &= ~__WQ_ORDERED;
3814 }
3815
3816 ctx = apply_wqattrs_prepare(wq, attrs);
3817 if (!ctx)
3818 return -ENOMEM;
3819
3820
3821 apply_wqattrs_commit(ctx);
3822 apply_wqattrs_cleanup(ctx);
3823
3824 return 0;
3825}
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843int apply_workqueue_attrs(struct workqueue_struct *wq,
3844 const struct workqueue_attrs *attrs)
3845{
3846 int ret;
3847
3848 apply_wqattrs_lock();
3849 ret = apply_workqueue_attrs_locked(wq, attrs);
3850 apply_wqattrs_unlock();
3851
3852 return ret;
3853}
3854EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3879 bool online)
3880{
3881 int node = cpu_to_node(cpu);
3882 int cpu_off = online ? -1 : cpu;
3883 struct pool_workqueue *old_pwq = NULL, *pwq;
3884 struct workqueue_attrs *target_attrs;
3885 cpumask_t *cpumask;
3886
3887 lockdep_assert_held(&wq_pool_mutex);
3888
3889 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
3890 wq->unbound_attrs->no_numa)
3891 return;
3892
3893
3894
3895
3896
3897
3898 target_attrs = wq_update_unbound_numa_attrs_buf;
3899 cpumask = target_attrs->cpumask;
3900
3901 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3902 pwq = unbound_pwq_by_node(wq, node);
3903
3904
3905
3906
3907
3908
3909
3910 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
3911 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3912 return;
3913 } else {
3914 goto use_dfl_pwq;
3915 }
3916
3917
3918 pwq = alloc_unbound_pwq(wq, target_attrs);
3919 if (!pwq) {
3920 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3921 wq->name);
3922 goto use_dfl_pwq;
3923 }
3924
3925
3926 mutex_lock(&wq->mutex);
3927 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3928 goto out_unlock;
3929
3930use_dfl_pwq:
3931 mutex_lock(&wq->mutex);
3932 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3933 get_pwq(wq->dfl_pwq);
3934 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
3935 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
3936out_unlock:
3937 mutex_unlock(&wq->mutex);
3938 put_pwq_unlocked(old_pwq);
3939}
3940
3941static int alloc_and_link_pwqs(struct workqueue_struct *wq)
3942{
3943 bool highpri = wq->flags & WQ_HIGHPRI;
3944 int cpu, ret;
3945
3946 if (!(wq->flags & WQ_UNBOUND)) {
3947 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
3948 if (!wq->cpu_pwqs)
3949 return -ENOMEM;
3950
3951 for_each_possible_cpu(cpu) {
3952 struct pool_workqueue *pwq =
3953 per_cpu_ptr(wq->cpu_pwqs, cpu);
3954 struct worker_pool *cpu_pools =
3955 per_cpu(cpu_worker_pools, cpu);
3956
3957 init_pwq(pwq, wq, &cpu_pools[highpri]);
3958
3959 mutex_lock(&wq->mutex);
3960 link_pwq(pwq);
3961 mutex_unlock(&wq->mutex);
3962 }
3963 return 0;
3964 } else if (wq->flags & __WQ_ORDERED) {
3965 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
3966
3967 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
3968 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
3969 "ordering guarantee broken for workqueue %s\n", wq->name);
3970 return ret;
3971 } else {
3972 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
3973 }
3974}
3975
3976static int wq_clamp_max_active(int max_active, unsigned int flags,
3977 const char *name)
3978{
3979 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
3980
3981 if (max_active < 1 || max_active > lim)
3982 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
3983 max_active, name, 1, lim);
3984
3985 return clamp_val(max_active, 1, lim);
3986}
3987
3988
3989
3990
3991
3992static int init_rescuer(struct workqueue_struct *wq)
3993{
3994 struct worker *rescuer;
3995 int ret;
3996
3997 if (!(wq->flags & WQ_MEM_RECLAIM))
3998 return 0;
3999
4000 rescuer = alloc_worker(NUMA_NO_NODE);
4001 if (!rescuer)
4002 return -ENOMEM;
4003
4004 rescuer->rescue_wq = wq;
4005 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4006 ret = PTR_ERR_OR_ZERO(rescuer->task);
4007 if (ret) {
4008 kfree(rescuer);
4009 return ret;
4010 }
4011
4012 wq->rescuer = rescuer;
4013 kthread_bind_mask(rescuer->task, cpu_possible_mask);
4014 wake_up_process(rescuer->task);
4015
4016 return 0;
4017}
4018
4019struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4020 unsigned int flags,
4021 int max_active,
4022 struct lock_class_key *key,
4023 const char *lock_name, ...)
4024{
4025 size_t tbl_size = 0;
4026 va_list args;
4027 struct workqueue_struct *wq;
4028 struct pool_workqueue *pwq;
4029
4030
4031
4032
4033
4034
4035
4036
4037 if ((flags & WQ_UNBOUND) && max_active == 1)
4038 flags |= __WQ_ORDERED;
4039
4040
4041 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4042 flags |= WQ_UNBOUND;
4043
4044
4045 if (flags & WQ_UNBOUND)
4046 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4047
4048 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4049 if (!wq)
4050 return NULL;
4051
4052 if (flags & WQ_UNBOUND) {
4053 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4054 if (!wq->unbound_attrs)
4055 goto err_free_wq;
4056 }
4057
4058 va_start(args, lock_name);
4059 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4060 va_end(args);
4061
4062 max_active = max_active ?: WQ_DFL_ACTIVE;
4063 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4064
4065
4066 wq->flags = flags;
4067 wq->saved_max_active = max_active;
4068 mutex_init(&wq->mutex);
4069 atomic_set(&wq->nr_pwqs_to_flush, 0);
4070 INIT_LIST_HEAD(&wq->pwqs);
4071 INIT_LIST_HEAD(&wq->flusher_queue);
4072 INIT_LIST_HEAD(&wq->flusher_overflow);
4073 INIT_LIST_HEAD(&wq->maydays);
4074
4075 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4076 INIT_LIST_HEAD(&wq->list);
4077
4078 if (alloc_and_link_pwqs(wq) < 0)
4079 goto err_free_wq;
4080
4081 if (wq_online && init_rescuer(wq) < 0)
4082 goto err_destroy;
4083
4084 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4085 goto err_destroy;
4086
4087
4088
4089
4090
4091
4092 mutex_lock(&wq_pool_mutex);
4093
4094 mutex_lock(&wq->mutex);
4095 for_each_pwq(pwq, wq)
4096 pwq_adjust_max_active(pwq);
4097 mutex_unlock(&wq->mutex);
4098
4099 list_add_tail_rcu(&wq->list, &workqueues);
4100
4101 mutex_unlock(&wq_pool_mutex);
4102
4103 return wq;
4104
4105err_free_wq:
4106 free_workqueue_attrs(wq->unbound_attrs);
4107 kfree(wq);
4108 return NULL;
4109err_destroy:
4110 destroy_workqueue(wq);
4111 return NULL;
4112}
4113EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4114
4115
4116
4117
4118
4119
4120
4121void destroy_workqueue(struct workqueue_struct *wq)
4122{
4123 struct pool_workqueue *pwq;
4124 int node;
4125
4126
4127 drain_workqueue(wq);
4128
4129
4130 mutex_lock(&wq->mutex);
4131 for_each_pwq(pwq, wq) {
4132 int i;
4133
4134 for (i = 0; i < WORK_NR_COLORS; i++) {
4135 if (WARN_ON(pwq->nr_in_flight[i])) {
4136 mutex_unlock(&wq->mutex);
4137 show_workqueue_state();
4138 return;
4139 }
4140 }
4141
4142 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4143 WARN_ON(pwq->nr_active) ||
4144 WARN_ON(!list_empty(&pwq->delayed_works))) {
4145 mutex_unlock(&wq->mutex);
4146 show_workqueue_state();
4147 return;
4148 }
4149 }
4150 mutex_unlock(&wq->mutex);
4151
4152
4153
4154
4155
4156 mutex_lock(&wq_pool_mutex);
4157 list_del_rcu(&wq->list);
4158 mutex_unlock(&wq_pool_mutex);
4159
4160 workqueue_sysfs_unregister(wq);
4161
4162 if (wq->rescuer)
4163 kthread_stop(wq->rescuer->task);
4164
4165 if (!(wq->flags & WQ_UNBOUND)) {
4166
4167
4168
4169
4170 call_rcu_sched(&wq->rcu, rcu_free_wq);
4171 } else {
4172
4173
4174
4175
4176
4177 for_each_node(node) {
4178 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4179 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4180 put_pwq_unlocked(pwq);
4181 }
4182
4183
4184
4185
4186
4187 pwq = wq->dfl_pwq;
4188 wq->dfl_pwq = NULL;
4189 put_pwq_unlocked(pwq);
4190 }
4191}
4192EXPORT_SYMBOL_GPL(destroy_workqueue);
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4205{
4206 struct pool_workqueue *pwq;
4207
4208
4209 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4210 return;
4211
4212 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4213
4214 mutex_lock(&wq->mutex);
4215
4216 wq->flags &= ~__WQ_ORDERED;
4217 wq->saved_max_active = max_active;
4218
4219 for_each_pwq(pwq, wq)
4220 pwq_adjust_max_active(pwq);
4221
4222 mutex_unlock(&wq->mutex);
4223}
4224EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234struct work_struct *current_work(void)
4235{
4236 struct worker *worker = current_wq_worker();
4237
4238 return worker ? worker->current_work : NULL;
4239}
4240EXPORT_SYMBOL(current_work);
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250bool current_is_workqueue_rescuer(void)
4251{
4252 struct worker *worker = current_wq_worker();
4253
4254 return worker && worker->rescue_wq;
4255}
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4276{
4277 struct pool_workqueue *pwq;
4278 bool ret;
4279
4280 rcu_read_lock_sched();
4281
4282 if (cpu == WORK_CPU_UNBOUND)
4283 cpu = smp_processor_id();
4284
4285 if (!(wq->flags & WQ_UNBOUND))
4286 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4287 else
4288 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4289
4290 ret = !list_empty(&pwq->delayed_works);
4291 rcu_read_unlock_sched();
4292
4293 return ret;
4294}
4295EXPORT_SYMBOL_GPL(workqueue_congested);
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308unsigned int work_busy(struct work_struct *work)
4309{
4310 struct worker_pool *pool;
4311 unsigned long flags;
4312 unsigned int ret = 0;
4313
4314 if (work_pending(work))
4315 ret |= WORK_BUSY_PENDING;
4316
4317 local_irq_save(flags);
4318 pool = get_work_pool(work);
4319 if (pool) {
4320 spin_lock(&pool->lock);
4321 if (find_worker_executing_work(pool, work))
4322 ret |= WORK_BUSY_RUNNING;
4323 spin_unlock(&pool->lock);
4324 }
4325 local_irq_restore(flags);
4326
4327 return ret;
4328}
4329EXPORT_SYMBOL_GPL(work_busy);
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341void set_worker_desc(const char *fmt, ...)
4342{
4343 struct worker *worker = current_wq_worker();
4344 va_list args;
4345
4346 if (worker) {
4347 va_start(args, fmt);
4348 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4349 va_end(args);
4350 worker->desc_valid = true;
4351 }
4352}
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367void print_worker_info(const char *log_lvl, struct task_struct *task)
4368{
4369 work_func_t *fn = NULL;
4370 char name[WQ_NAME_LEN] = { };
4371 char desc[WORKER_DESC_LEN] = { };
4372 struct pool_workqueue *pwq = NULL;
4373 struct workqueue_struct *wq = NULL;
4374 bool desc_valid = false;
4375 struct worker *worker;
4376
4377 if (!(task->flags & PF_WQ_WORKER))
4378 return;
4379
4380
4381
4382
4383
4384 worker = kthread_probe_data(task);
4385
4386
4387
4388
4389
4390 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4391 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4392 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4393 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4394
4395
4396 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4397 if (desc_valid)
4398 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4399
4400 if (fn || name[0] || desc[0]) {
4401 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4402 if (desc[0])
4403 pr_cont(" (%s)", desc);
4404 pr_cont("\n");
4405 }
4406}
4407
4408static void pr_cont_pool_info(struct worker_pool *pool)
4409{
4410 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4411 if (pool->node != NUMA_NO_NODE)
4412 pr_cont(" node=%d", pool->node);
4413 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4414}
4415
4416static void pr_cont_work(bool comma, struct work_struct *work)
4417{
4418 if (work->func == wq_barrier_func) {
4419 struct wq_barrier *barr;
4420
4421 barr = container_of(work, struct wq_barrier, work);
4422
4423 pr_cont("%s BAR(%d)", comma ? "," : "",
4424 task_pid_nr(barr->task));
4425 } else {
4426 pr_cont("%s %pf", comma ? "," : "", work->func);
4427 }
4428}
4429
4430static void show_pwq(struct pool_workqueue *pwq)
4431{
4432 struct worker_pool *pool = pwq->pool;
4433 struct work_struct *work;
4434 struct worker *worker;
4435 bool has_in_flight = false, has_pending = false;
4436 int bkt;
4437
4438 pr_info(" pwq %d:", pool->id);
4439 pr_cont_pool_info(pool);
4440
4441 pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
4442 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4443
4444 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4445 if (worker->current_pwq == pwq) {
4446 has_in_flight = true;
4447 break;
4448 }
4449 }
4450 if (has_in_flight) {
4451 bool comma = false;
4452
4453 pr_info(" in-flight:");
4454 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4455 if (worker->current_pwq != pwq)
4456 continue;
4457
4458 pr_cont("%s %d%s:%pf", comma ? "," : "",
4459 task_pid_nr(worker->task),
4460 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4461 worker->current_func);
4462 list_for_each_entry(work, &worker->scheduled, entry)
4463 pr_cont_work(false, work);
4464 comma = true;
4465 }
4466 pr_cont("\n");
4467 }
4468
4469 list_for_each_entry(work, &pool->worklist, entry) {
4470 if (get_work_pwq(work) == pwq) {
4471 has_pending = true;
4472 break;
4473 }
4474 }
4475 if (has_pending) {
4476 bool comma = false;
4477
4478 pr_info(" pending:");
4479 list_for_each_entry(work, &pool->worklist, entry) {
4480 if (get_work_pwq(work) != pwq)
4481 continue;
4482
4483 pr_cont_work(comma, work);
4484 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4485 }
4486 pr_cont("\n");
4487 }
4488
4489 if (!list_empty(&pwq->delayed_works)) {
4490 bool comma = false;
4491
4492 pr_info(" delayed:");
4493 list_for_each_entry(work, &pwq->delayed_works, entry) {
4494 pr_cont_work(comma, work);
4495 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4496 }
4497 pr_cont("\n");
4498 }
4499}
4500
4501
4502
4503
4504
4505
4506
4507void show_workqueue_state(void)
4508{
4509 struct workqueue_struct *wq;
4510 struct worker_pool *pool;
4511 unsigned long flags;
4512 int pi;
4513
4514 rcu_read_lock_sched();
4515
4516 pr_info("Showing busy workqueues and worker pools:\n");
4517
4518 list_for_each_entry_rcu(wq, &workqueues, list) {
4519 struct pool_workqueue *pwq;
4520 bool idle = true;
4521
4522 for_each_pwq(pwq, wq) {
4523 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4524 idle = false;
4525 break;
4526 }
4527 }
4528 if (idle)
4529 continue;
4530
4531 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4532
4533 for_each_pwq(pwq, wq) {
4534 spin_lock_irqsave(&pwq->pool->lock, flags);
4535 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4536 show_pwq(pwq);
4537 spin_unlock_irqrestore(&pwq->pool->lock, flags);
4538
4539
4540
4541
4542
4543 touch_nmi_watchdog();
4544 }
4545 }
4546
4547 for_each_pool(pool, pi) {
4548 struct worker *worker;
4549 bool first = true;
4550
4551 spin_lock_irqsave(&pool->lock, flags);
4552 if (pool->nr_workers == pool->nr_idle)
4553 goto next_pool;
4554
4555 pr_info("pool %d:", pool->id);
4556 pr_cont_pool_info(pool);
4557 pr_cont(" hung=%us workers=%d",
4558 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4559 pool->nr_workers);
4560 if (pool->manager)
4561 pr_cont(" manager: %d",
4562 task_pid_nr(pool->manager->task));
4563 list_for_each_entry(worker, &pool->idle_list, entry) {
4564 pr_cont(" %s%d", first ? "idle: " : "",
4565 task_pid_nr(worker->task));
4566 first = false;
4567 }
4568 pr_cont("\n");
4569 next_pool:
4570 spin_unlock_irqrestore(&pool->lock, flags);
4571
4572
4573
4574
4575
4576 touch_nmi_watchdog();
4577 }
4578
4579 rcu_read_unlock_sched();
4580}
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597static void unbind_workers(int cpu)
4598{
4599 struct worker_pool *pool;
4600 struct worker *worker;
4601
4602 for_each_cpu_worker_pool(pool, cpu) {
4603 mutex_lock(&pool->attach_mutex);
4604 spin_lock_irq(&pool->lock);
4605
4606
4607
4608
4609
4610
4611
4612
4613 for_each_pool_worker(worker, pool)
4614 worker->flags |= WORKER_UNBOUND;
4615
4616 pool->flags |= POOL_DISASSOCIATED;
4617
4618 spin_unlock_irq(&pool->lock);
4619 mutex_unlock(&pool->attach_mutex);
4620
4621
4622
4623
4624
4625
4626
4627 schedule();
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637 atomic_set(&pool->nr_running, 0);
4638
4639
4640
4641
4642
4643
4644 spin_lock_irq(&pool->lock);
4645 wake_up_worker(pool);
4646 spin_unlock_irq(&pool->lock);
4647 }
4648}
4649
4650
4651
4652
4653
4654
4655
4656static void rebind_workers(struct worker_pool *pool)
4657{
4658 struct worker *worker;
4659
4660 lockdep_assert_held(&pool->attach_mutex);
4661
4662
4663
4664
4665
4666
4667
4668
4669 for_each_pool_worker(worker, pool)
4670 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4671 pool->attrs->cpumask) < 0);
4672
4673 spin_lock_irq(&pool->lock);
4674
4675 pool->flags &= ~POOL_DISASSOCIATED;
4676
4677 for_each_pool_worker(worker, pool) {
4678 unsigned int worker_flags = worker->flags;
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688 if (worker_flags & WORKER_IDLE)
4689 wake_up_process(worker->task);
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4707 worker_flags |= WORKER_REBOUND;
4708 worker_flags &= ~WORKER_UNBOUND;
4709 WRITE_ONCE(worker->flags, worker_flags);
4710 }
4711
4712 spin_unlock_irq(&pool->lock);
4713}
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4726{
4727 static cpumask_t cpumask;
4728 struct worker *worker;
4729
4730 lockdep_assert_held(&pool->attach_mutex);
4731
4732
4733 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4734 return;
4735
4736 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
4737
4738
4739 for_each_pool_worker(worker, pool)
4740 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
4741}
4742
4743int workqueue_prepare_cpu(unsigned int cpu)
4744{
4745 struct worker_pool *pool;
4746
4747 for_each_cpu_worker_pool(pool, cpu) {
4748 if (pool->nr_workers)
4749 continue;
4750 if (!create_worker(pool))
4751 return -ENOMEM;
4752 }
4753 return 0;
4754}
4755
4756int workqueue_online_cpu(unsigned int cpu)
4757{
4758 struct worker_pool *pool;
4759 struct workqueue_struct *wq;
4760 int pi;
4761
4762 mutex_lock(&wq_pool_mutex);
4763
4764 for_each_pool(pool, pi) {
4765 mutex_lock(&pool->attach_mutex);
4766
4767 if (pool->cpu == cpu)
4768 rebind_workers(pool);
4769 else if (pool->cpu < 0)
4770 restore_unbound_workers_cpumask(pool, cpu);
4771
4772 mutex_unlock(&pool->attach_mutex);
4773 }
4774
4775
4776 list_for_each_entry(wq, &workqueues, list)
4777 wq_update_unbound_numa(wq, cpu, true);
4778
4779 mutex_unlock(&wq_pool_mutex);
4780 return 0;
4781}
4782
4783int workqueue_offline_cpu(unsigned int cpu)
4784{
4785 struct workqueue_struct *wq;
4786
4787
4788 if (WARN_ON(cpu != smp_processor_id()))
4789 return -1;
4790
4791 unbind_workers(cpu);
4792
4793
4794 mutex_lock(&wq_pool_mutex);
4795 list_for_each_entry(wq, &workqueues, list)
4796 wq_update_unbound_numa(wq, cpu, false);
4797 mutex_unlock(&wq_pool_mutex);
4798
4799 return 0;
4800}
4801
4802#ifdef CONFIG_SMP
4803
4804struct work_for_cpu {
4805 struct work_struct work;
4806 long (*fn)(void *);
4807 void *arg;
4808 long ret;
4809};
4810
4811static void work_for_cpu_fn(struct work_struct *work)
4812{
4813 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
4814
4815 wfc->ret = wfc->fn(wfc->arg);
4816}
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4830{
4831 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
4832
4833 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4834 schedule_work_on(cpu, &wfc.work);
4835 flush_work(&wfc.work);
4836 destroy_work_on_stack(&wfc.work);
4837 return wfc.ret;
4838}
4839EXPORT_SYMBOL_GPL(work_on_cpu);
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
4853{
4854 long ret = -ENODEV;
4855
4856 get_online_cpus();
4857 if (cpu_online(cpu))
4858 ret = work_on_cpu(cpu, fn, arg);
4859 put_online_cpus();
4860 return ret;
4861}
4862EXPORT_SYMBOL_GPL(work_on_cpu_safe);
4863#endif
4864
4865#ifdef CONFIG_FREEZER
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877void freeze_workqueues_begin(void)
4878{
4879 struct workqueue_struct *wq;
4880 struct pool_workqueue *pwq;
4881
4882 mutex_lock(&wq_pool_mutex);
4883
4884 WARN_ON_ONCE(workqueue_freezing);
4885 workqueue_freezing = true;
4886
4887 list_for_each_entry(wq, &workqueues, list) {
4888 mutex_lock(&wq->mutex);
4889 for_each_pwq(pwq, wq)
4890 pwq_adjust_max_active(pwq);
4891 mutex_unlock(&wq->mutex);
4892 }
4893
4894 mutex_unlock(&wq_pool_mutex);
4895}
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910bool freeze_workqueues_busy(void)
4911{
4912 bool busy = false;
4913 struct workqueue_struct *wq;
4914 struct pool_workqueue *pwq;
4915
4916 mutex_lock(&wq_pool_mutex);
4917
4918 WARN_ON_ONCE(!workqueue_freezing);
4919
4920 list_for_each_entry(wq, &workqueues, list) {
4921 if (!(wq->flags & WQ_FREEZABLE))
4922 continue;
4923
4924
4925
4926
4927 rcu_read_lock_sched();
4928 for_each_pwq(pwq, wq) {
4929 WARN_ON_ONCE(pwq->nr_active < 0);
4930 if (pwq->nr_active) {
4931 busy = true;
4932 rcu_read_unlock_sched();
4933 goto out_unlock;
4934 }
4935 }
4936 rcu_read_unlock_sched();
4937 }
4938out_unlock:
4939 mutex_unlock(&wq_pool_mutex);
4940 return busy;
4941}
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952void thaw_workqueues(void)
4953{
4954 struct workqueue_struct *wq;
4955 struct pool_workqueue *pwq;
4956
4957 mutex_lock(&wq_pool_mutex);
4958
4959 if (!workqueue_freezing)
4960 goto out_unlock;
4961
4962 workqueue_freezing = false;
4963
4964
4965 list_for_each_entry(wq, &workqueues, list) {
4966 mutex_lock(&wq->mutex);
4967 for_each_pwq(pwq, wq)
4968 pwq_adjust_max_active(pwq);
4969 mutex_unlock(&wq->mutex);
4970 }
4971
4972out_unlock:
4973 mutex_unlock(&wq_pool_mutex);
4974}
4975#endif
4976
4977static int workqueue_apply_unbound_cpumask(void)
4978{
4979 LIST_HEAD(ctxs);
4980 int ret = 0;
4981 struct workqueue_struct *wq;
4982 struct apply_wqattrs_ctx *ctx, *n;
4983
4984 lockdep_assert_held(&wq_pool_mutex);
4985
4986 list_for_each_entry(wq, &workqueues, list) {
4987 if (!(wq->flags & WQ_UNBOUND))
4988 continue;
4989
4990 if (wq->flags & __WQ_ORDERED)
4991 continue;
4992
4993 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
4994 if (!ctx) {
4995 ret = -ENOMEM;
4996 break;
4997 }
4998
4999 list_add_tail(&ctx->list, &ctxs);
5000 }
5001
5002 list_for_each_entry_safe(ctx, n, &ctxs, list) {
5003 if (!ret)
5004 apply_wqattrs_commit(ctx);
5005 apply_wqattrs_cleanup(ctx);
5006 }
5007
5008 return ret;
5009}
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5024{
5025 int ret = -EINVAL;
5026 cpumask_var_t saved_cpumask;
5027
5028 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
5029 return -ENOMEM;
5030
5031
5032
5033
5034
5035 cpumask_and(cpumask, cpumask, cpu_possible_mask);
5036 if (!cpumask_empty(cpumask)) {
5037 apply_wqattrs_lock();
5038
5039
5040 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
5041
5042
5043 cpumask_copy(wq_unbound_cpumask, cpumask);
5044 ret = workqueue_apply_unbound_cpumask();
5045
5046
5047 if (ret < 0)
5048 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
5049
5050 apply_wqattrs_unlock();
5051 }
5052
5053 free_cpumask_var(saved_cpumask);
5054 return ret;
5055}
5056
5057#ifdef CONFIG_SYSFS
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073struct wq_device {
5074 struct workqueue_struct *wq;
5075 struct device dev;
5076};
5077
5078static struct workqueue_struct *dev_to_wq(struct device *dev)
5079{
5080 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5081
5082 return wq_dev->wq;
5083}
5084
5085static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5086 char *buf)
5087{
5088 struct workqueue_struct *wq = dev_to_wq(dev);
5089
5090 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5091}
5092static DEVICE_ATTR_RO(per_cpu);
5093
5094static ssize_t max_active_show(struct device *dev,
5095 struct device_attribute *attr, char *buf)
5096{
5097 struct workqueue_struct *wq = dev_to_wq(dev);
5098
5099 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5100}
5101
5102static ssize_t max_active_store(struct device *dev,
5103 struct device_attribute *attr, const char *buf,
5104 size_t count)
5105{
5106 struct workqueue_struct *wq = dev_to_wq(dev);
5107 int val;
5108
5109 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5110 return -EINVAL;
5111
5112 workqueue_set_max_active(wq, val);
5113 return count;
5114}
5115static DEVICE_ATTR_RW(max_active);
5116
5117static struct attribute *wq_sysfs_attrs[] = {
5118 &dev_attr_per_cpu.attr,
5119 &dev_attr_max_active.attr,
5120 NULL,
5121};
5122ATTRIBUTE_GROUPS(wq_sysfs);
5123
5124static ssize_t wq_pool_ids_show(struct device *dev,
5125 struct device_attribute *attr, char *buf)
5126{
5127 struct workqueue_struct *wq = dev_to_wq(dev);
5128 const char *delim = "";
5129 int node, written = 0;
5130
5131 rcu_read_lock_sched();
5132 for_each_node(node) {
5133 written += scnprintf(buf + written, PAGE_SIZE - written,
5134 "%s%d:%d", delim, node,
5135 unbound_pwq_by_node(wq, node)->pool->id);
5136 delim = " ";
5137 }
5138 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5139 rcu_read_unlock_sched();
5140
5141 return written;
5142}
5143
5144static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5145 char *buf)
5146{
5147 struct workqueue_struct *wq = dev_to_wq(dev);
5148 int written;
5149
5150 mutex_lock(&wq->mutex);
5151 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5152 mutex_unlock(&wq->mutex);
5153
5154 return written;
5155}
5156
5157
5158static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5159{
5160 struct workqueue_attrs *attrs;
5161
5162 lockdep_assert_held(&wq_pool_mutex);
5163
5164 attrs = alloc_workqueue_attrs(GFP_KERNEL);
5165 if (!attrs)
5166 return NULL;
5167
5168 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5169 return attrs;
5170}
5171
5172static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5173 const char *buf, size_t count)
5174{
5175 struct workqueue_struct *wq = dev_to_wq(dev);
5176 struct workqueue_attrs *attrs;
5177 int ret = -ENOMEM;
5178
5179 apply_wqattrs_lock();
5180
5181 attrs = wq_sysfs_prep_attrs(wq);
5182 if (!attrs)
5183 goto out_unlock;
5184
5185 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5186 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5187 ret = apply_workqueue_attrs_locked(wq, attrs);
5188 else
5189 ret = -EINVAL;
5190
5191out_unlock:
5192 apply_wqattrs_unlock();
5193 free_workqueue_attrs(attrs);
5194 return ret ?: count;
5195}
5196
5197static ssize_t wq_cpumask_show(struct device *dev,
5198 struct device_attribute *attr, char *buf)
5199{
5200 struct workqueue_struct *wq = dev_to_wq(dev);
5201 int written;
5202
5203 mutex_lock(&wq->mutex);
5204 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5205 cpumask_pr_args(wq->unbound_attrs->cpumask));
5206 mutex_unlock(&wq->mutex);
5207 return written;
5208}
5209
5210static ssize_t wq_cpumask_store(struct device *dev,
5211 struct device_attribute *attr,
5212 const char *buf, size_t count)
5213{
5214 struct workqueue_struct *wq = dev_to_wq(dev);
5215 struct workqueue_attrs *attrs;
5216 int ret = -ENOMEM;
5217
5218 apply_wqattrs_lock();
5219
5220 attrs = wq_sysfs_prep_attrs(wq);
5221 if (!attrs)
5222 goto out_unlock;
5223
5224 ret = cpumask_parse(buf, attrs->cpumask);
5225 if (!ret)
5226 ret = apply_workqueue_attrs_locked(wq, attrs);
5227
5228out_unlock:
5229 apply_wqattrs_unlock();
5230 free_workqueue_attrs(attrs);
5231 return ret ?: count;
5232}
5233
5234static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5235 char *buf)
5236{
5237 struct workqueue_struct *wq = dev_to_wq(dev);
5238 int written;
5239
5240 mutex_lock(&wq->mutex);
5241 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5242 !wq->unbound_attrs->no_numa);
5243 mutex_unlock(&wq->mutex);
5244
5245 return written;
5246}
5247
5248static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5249 const char *buf, size_t count)
5250{
5251 struct workqueue_struct *wq = dev_to_wq(dev);
5252 struct workqueue_attrs *attrs;
5253 int v, ret = -ENOMEM;
5254
5255 apply_wqattrs_lock();
5256
5257 attrs = wq_sysfs_prep_attrs(wq);
5258 if (!attrs)
5259 goto out_unlock;
5260
5261 ret = -EINVAL;
5262 if (sscanf(buf, "%d", &v) == 1) {
5263 attrs->no_numa = !v;
5264 ret = apply_workqueue_attrs_locked(wq, attrs);
5265 }
5266
5267out_unlock:
5268 apply_wqattrs_unlock();
5269 free_workqueue_attrs(attrs);
5270 return ret ?: count;
5271}
5272
5273static struct device_attribute wq_sysfs_unbound_attrs[] = {
5274 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5275 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5276 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5277 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5278 __ATTR_NULL,
5279};
5280
5281static struct bus_type wq_subsys = {
5282 .name = "workqueue",
5283 .dev_groups = wq_sysfs_groups,
5284};
5285
5286static ssize_t wq_unbound_cpumask_show(struct device *dev,
5287 struct device_attribute *attr, char *buf)
5288{
5289 int written;
5290
5291 mutex_lock(&wq_pool_mutex);
5292 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5293 cpumask_pr_args(wq_unbound_cpumask));
5294 mutex_unlock(&wq_pool_mutex);
5295
5296 return written;
5297}
5298
5299static ssize_t wq_unbound_cpumask_store(struct device *dev,
5300 struct device_attribute *attr, const char *buf, size_t count)
5301{
5302 cpumask_var_t cpumask;
5303 int ret;
5304
5305 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5306 return -ENOMEM;
5307
5308 ret = cpumask_parse(buf, cpumask);
5309 if (!ret)
5310 ret = workqueue_set_unbound_cpumask(cpumask);
5311
5312 free_cpumask_var(cpumask);
5313 return ret ? ret : count;
5314}
5315
5316static struct device_attribute wq_sysfs_cpumask_attr =
5317 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5318 wq_unbound_cpumask_store);
5319
5320static int __init wq_sysfs_init(void)
5321{
5322 int err;
5323
5324 err = subsys_virtual_register(&wq_subsys, NULL);
5325 if (err)
5326 return err;
5327
5328 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5329}
5330core_initcall(wq_sysfs_init);
5331
5332static void wq_device_release(struct device *dev)
5333{
5334 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5335
5336 kfree(wq_dev);
5337}
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354int workqueue_sysfs_register(struct workqueue_struct *wq)
5355{
5356 struct wq_device *wq_dev;
5357 int ret;
5358
5359
5360
5361
5362
5363
5364 if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
5365 return -EINVAL;
5366
5367 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5368 if (!wq_dev)
5369 return -ENOMEM;
5370
5371 wq_dev->wq = wq;
5372 wq_dev->dev.bus = &wq_subsys;
5373 wq_dev->dev.release = wq_device_release;
5374 dev_set_name(&wq_dev->dev, "%s", wq->name);
5375
5376
5377
5378
5379
5380 dev_set_uevent_suppress(&wq_dev->dev, true);
5381
5382 ret = device_register(&wq_dev->dev);
5383 if (ret) {
5384 put_device(&wq_dev->dev);
5385 wq->wq_dev = NULL;
5386 return ret;
5387 }
5388
5389 if (wq->flags & WQ_UNBOUND) {
5390 struct device_attribute *attr;
5391
5392 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5393 ret = device_create_file(&wq_dev->dev, attr);
5394 if (ret) {
5395 device_unregister(&wq_dev->dev);
5396 wq->wq_dev = NULL;
5397 return ret;
5398 }
5399 }
5400 }
5401
5402 dev_set_uevent_suppress(&wq_dev->dev, false);
5403 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5404 return 0;
5405}
5406
5407
5408
5409
5410
5411
5412
5413static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5414{
5415 struct wq_device *wq_dev = wq->wq_dev;
5416
5417 if (!wq->wq_dev)
5418 return;
5419
5420 wq->wq_dev = NULL;
5421 device_unregister(&wq_dev->dev);
5422}
5423#else
5424static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5425#endif
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444#ifdef CONFIG_WQ_WATCHDOG
5445
5446static unsigned long wq_watchdog_thresh = 30;
5447static struct timer_list wq_watchdog_timer;
5448
5449static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5450static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5451
5452static void wq_watchdog_reset_touched(void)
5453{
5454 int cpu;
5455
5456 wq_watchdog_touched = jiffies;
5457 for_each_possible_cpu(cpu)
5458 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5459}
5460
5461static void wq_watchdog_timer_fn(struct timer_list *unused)
5462{
5463 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5464 bool lockup_detected = false;
5465 struct worker_pool *pool;
5466 int pi;
5467
5468 if (!thresh)
5469 return;
5470
5471 rcu_read_lock();
5472
5473 for_each_pool(pool, pi) {
5474 unsigned long pool_ts, touched, ts;
5475
5476 if (list_empty(&pool->worklist))
5477 continue;
5478
5479
5480 pool_ts = READ_ONCE(pool->watchdog_ts);
5481 touched = READ_ONCE(wq_watchdog_touched);
5482
5483 if (time_after(pool_ts, touched))
5484 ts = pool_ts;
5485 else
5486 ts = touched;
5487
5488 if (pool->cpu >= 0) {
5489 unsigned long cpu_touched =
5490 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
5491 pool->cpu));
5492 if (time_after(cpu_touched, ts))
5493 ts = cpu_touched;
5494 }
5495
5496
5497 if (time_after(jiffies, ts + thresh)) {
5498 lockup_detected = true;
5499 pr_emerg("BUG: workqueue lockup - pool");
5500 pr_cont_pool_info(pool);
5501 pr_cont(" stuck for %us!\n",
5502 jiffies_to_msecs(jiffies - pool_ts) / 1000);
5503 }
5504 }
5505
5506 rcu_read_unlock();
5507
5508 if (lockup_detected)
5509 show_workqueue_state();
5510
5511 wq_watchdog_reset_touched();
5512 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5513}
5514
5515void wq_watchdog_touch(int cpu)
5516{
5517 if (cpu >= 0)
5518 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5519 else
5520 wq_watchdog_touched = jiffies;
5521}
5522
5523static void wq_watchdog_set_thresh(unsigned long thresh)
5524{
5525 wq_watchdog_thresh = 0;
5526 del_timer_sync(&wq_watchdog_timer);
5527
5528 if (thresh) {
5529 wq_watchdog_thresh = thresh;
5530 wq_watchdog_reset_touched();
5531 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5532 }
5533}
5534
5535static int wq_watchdog_param_set_thresh(const char *val,
5536 const struct kernel_param *kp)
5537{
5538 unsigned long thresh;
5539 int ret;
5540
5541 ret = kstrtoul(val, 0, &thresh);
5542 if (ret)
5543 return ret;
5544
5545 if (system_wq)
5546 wq_watchdog_set_thresh(thresh);
5547 else
5548 wq_watchdog_thresh = thresh;
5549
5550 return 0;
5551}
5552
5553static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5554 .set = wq_watchdog_param_set_thresh,
5555 .get = param_get_ulong,
5556};
5557
5558module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5559 0644);
5560
5561static void wq_watchdog_init(void)
5562{
5563 timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
5564 wq_watchdog_set_thresh(wq_watchdog_thresh);
5565}
5566
5567#else
5568
5569static inline void wq_watchdog_init(void) { }
5570
5571#endif
5572
5573static void __init wq_numa_init(void)
5574{
5575 cpumask_var_t *tbl;
5576 int node, cpu;
5577
5578 if (num_possible_nodes() <= 1)
5579 return;
5580
5581 if (wq_disable_numa) {
5582 pr_info("workqueue: NUMA affinity support disabled\n");
5583 return;
5584 }
5585
5586 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5587 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5588
5589
5590
5591
5592
5593
5594 tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
5595 BUG_ON(!tbl);
5596
5597 for_each_node(node)
5598 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5599 node_online(node) ? node : NUMA_NO_NODE));
5600
5601 for_each_possible_cpu(cpu) {
5602 node = cpu_to_node(cpu);
5603 if (WARN_ON(node == NUMA_NO_NODE)) {
5604 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5605
5606 return;
5607 }
5608 cpumask_set_cpu(cpu, tbl[node]);
5609 }
5610
5611 wq_numa_possible_cpumask = tbl;
5612 wq_numa_enabled = true;
5613}
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625int __init workqueue_init_early(void)
5626{
5627 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5628 int hk_flags = HK_FLAG_DOMAIN | HK_FLAG_WQ;
5629 int i, cpu;
5630
5631 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5632
5633 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5634 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(hk_flags));
5635
5636 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5637
5638
5639 for_each_possible_cpu(cpu) {
5640 struct worker_pool *pool;
5641
5642 i = 0;
5643 for_each_cpu_worker_pool(pool, cpu) {
5644 BUG_ON(init_worker_pool(pool));
5645 pool->cpu = cpu;
5646 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
5647 pool->attrs->nice = std_nice[i++];
5648 pool->node = cpu_to_node(cpu);
5649
5650
5651 mutex_lock(&wq_pool_mutex);
5652 BUG_ON(worker_pool_assign_id(pool));
5653 mutex_unlock(&wq_pool_mutex);
5654 }
5655 }
5656
5657
5658 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5659 struct workqueue_attrs *attrs;
5660
5661 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5662 attrs->nice = std_nice[i];
5663 unbound_std_wq_attrs[i] = attrs;
5664
5665
5666
5667
5668
5669
5670 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5671 attrs->nice = std_nice[i];
5672 attrs->no_numa = true;
5673 ordered_wq_attrs[i] = attrs;
5674 }
5675
5676 system_wq = alloc_workqueue("events", 0, 0);
5677 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5678 system_long_wq = alloc_workqueue("events_long", 0, 0);
5679 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5680 WQ_UNBOUND_MAX_ACTIVE);
5681 system_freezable_wq = alloc_workqueue("events_freezable",
5682 WQ_FREEZABLE, 0);
5683 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
5684 WQ_POWER_EFFICIENT, 0);
5685 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
5686 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
5687 0);
5688 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
5689 !system_unbound_wq || !system_freezable_wq ||
5690 !system_power_efficient_wq ||
5691 !system_freezable_power_efficient_wq);
5692
5693 return 0;
5694}
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705int __init workqueue_init(void)
5706{
5707 struct workqueue_struct *wq;
5708 struct worker_pool *pool;
5709 int cpu, bkt;
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720 wq_numa_init();
5721
5722 mutex_lock(&wq_pool_mutex);
5723
5724 for_each_possible_cpu(cpu) {
5725 for_each_cpu_worker_pool(pool, cpu) {
5726 pool->node = cpu_to_node(cpu);
5727 }
5728 }
5729
5730 list_for_each_entry(wq, &workqueues, list) {
5731 wq_update_unbound_numa(wq, smp_processor_id(), true);
5732 WARN(init_rescuer(wq),
5733 "workqueue: failed to create early rescuer for %s",
5734 wq->name);
5735 }
5736
5737 mutex_unlock(&wq_pool_mutex);
5738
5739
5740 for_each_online_cpu(cpu) {
5741 for_each_cpu_worker_pool(pool, cpu) {
5742 pool->flags &= ~POOL_DISASSOCIATED;
5743 BUG_ON(!create_worker(pool));
5744 }
5745 }
5746
5747 hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
5748 BUG_ON(!create_worker(pool));
5749
5750 wq_online = true;
5751 wq_watchdog_init();
5752
5753 return 0;
5754}
5755