1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/export.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
38#include <linux/hardirq.h>
39#include <linux/mempolicy.h>
40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51
52#include "workqueue_internal.h"
53
54enum {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 POOL_DISASSOCIATED = 1 << 2,
72
73
74 WORKER_DIE = 1 << 1,
75 WORKER_IDLE = 1 << 2,
76 WORKER_PREP = 1 << 3,
77 WORKER_CPU_INTENSIVE = 1 << 6,
78 WORKER_UNBOUND = 1 << 7,
79 WORKER_REBOUND = 1 << 8,
80
81 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
82 WORKER_UNBOUND | WORKER_REBOUND,
83
84 NR_STD_WORKER_POOLS = 2,
85
86 UNBOUND_POOL_HASH_ORDER = 6,
87 BUSY_WORKER_HASH_ORDER = 6,
88
89 MAX_IDLE_WORKERS_RATIO = 4,
90 IDLE_WORKER_TIMEOUT = 300 * HZ,
91
92 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
93
94
95 MAYDAY_INTERVAL = HZ / 10,
96 CREATE_COOLDOWN = HZ,
97
98
99
100
101
102 RESCUER_NICE_LEVEL = MIN_NICE,
103 HIGHPRI_NICE_LEVEL = MIN_NICE,
104
105 WQ_NAME_LEN = 24,
106};
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144struct worker_pool {
145 spinlock_t lock;
146 int cpu;
147 int node;
148 int id;
149 unsigned int flags;
150
151 unsigned long watchdog_ts;
152
153 struct list_head worklist;
154 int nr_workers;
155
156
157 int nr_idle;
158
159 struct list_head idle_list;
160 struct timer_list idle_timer;
161 struct timer_list mayday_timer;
162
163
164 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
165
166
167
168 struct mutex manager_arb;
169 struct worker *manager;
170 struct mutex attach_mutex;
171 struct list_head workers;
172 struct completion *detach_completion;
173
174 struct ida worker_ida;
175
176 struct workqueue_attrs *attrs;
177 struct hlist_node hash_node;
178 int refcnt;
179
180
181
182
183
184
185 atomic_t nr_running ____cacheline_aligned_in_smp;
186
187
188
189
190
191 struct rcu_head rcu;
192} ____cacheline_aligned_in_smp;
193
194
195
196
197
198
199
200struct pool_workqueue {
201 struct worker_pool *pool;
202 struct workqueue_struct *wq;
203 int work_color;
204 int flush_color;
205 int refcnt;
206 int nr_in_flight[WORK_NR_COLORS];
207
208 int nr_active;
209 int max_active;
210 struct list_head delayed_works;
211 struct list_head pwqs_node;
212 struct list_head mayday_node;
213
214
215
216
217
218
219
220 struct work_struct unbound_release_work;
221 struct rcu_head rcu;
222} __aligned(1 << WORK_STRUCT_FLAG_BITS);
223
224
225
226
227struct wq_flusher {
228 struct list_head list;
229 int flush_color;
230 struct completion done;
231};
232
233struct wq_device;
234
235
236
237
238
239struct workqueue_struct {
240 struct list_head pwqs;
241 struct list_head list;
242
243 struct mutex mutex;
244 int work_color;
245 int flush_color;
246 atomic_t nr_pwqs_to_flush;
247 struct wq_flusher *first_flusher;
248 struct list_head flusher_queue;
249 struct list_head flusher_overflow;
250
251 struct list_head maydays;
252 struct worker *rescuer;
253
254 int nr_drainers;
255 int saved_max_active;
256
257 struct workqueue_attrs *unbound_attrs;
258 struct pool_workqueue *dfl_pwq;
259
260#ifdef CONFIG_SYSFS
261 struct wq_device *wq_dev;
262#endif
263#ifdef CONFIG_LOCKDEP
264 struct lockdep_map lockdep_map;
265#endif
266 char name[WQ_NAME_LEN];
267
268
269
270
271
272
273 struct rcu_head rcu;
274
275
276 unsigned int flags ____cacheline_aligned;
277 struct pool_workqueue __percpu *cpu_pwqs;
278 struct pool_workqueue __rcu *numa_pwq_tbl[];
279};
280
281static struct kmem_cache *pwq_cache;
282
283static cpumask_var_t *wq_numa_possible_cpumask;
284
285
286static bool wq_disable_numa;
287module_param_named(disable_numa, wq_disable_numa, bool, 0444);
288
289
290static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
291module_param_named(power_efficient, wq_power_efficient, bool, 0444);
292
293static bool wq_numa_enabled;
294
295
296static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
297
298static DEFINE_MUTEX(wq_pool_mutex);
299static DEFINE_SPINLOCK(wq_mayday_lock);
300
301static LIST_HEAD(workqueues);
302static bool workqueue_freezing;
303
304
305static cpumask_var_t wq_unbound_cpumask;
306
307
308static DEFINE_PER_CPU(int, wq_rr_cpu_last);
309
310
311
312
313
314
315#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
316static bool wq_debug_force_rr_cpu = true;
317#else
318static bool wq_debug_force_rr_cpu = false;
319#endif
320module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
321
322
323static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
324
325static DEFINE_IDR(worker_pool_idr);
326
327
328static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
329
330
331static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
332
333
334static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
335
336struct workqueue_struct *system_wq __read_mostly;
337EXPORT_SYMBOL(system_wq);
338struct workqueue_struct *system_highpri_wq __read_mostly;
339EXPORT_SYMBOL_GPL(system_highpri_wq);
340struct workqueue_struct *system_long_wq __read_mostly;
341EXPORT_SYMBOL_GPL(system_long_wq);
342struct workqueue_struct *system_unbound_wq __read_mostly;
343EXPORT_SYMBOL_GPL(system_unbound_wq);
344struct workqueue_struct *system_freezable_wq __read_mostly;
345EXPORT_SYMBOL_GPL(system_freezable_wq);
346struct workqueue_struct *system_power_efficient_wq __read_mostly;
347EXPORT_SYMBOL_GPL(system_power_efficient_wq);
348struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
349EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
350
351static int worker_thread(void *__worker);
352static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
353
354#define CREATE_TRACE_POINTS
355#include <trace/events/workqueue.h>
356
357#define assert_rcu_or_pool_mutex() \
358 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
359 !lockdep_is_held(&wq_pool_mutex), \
360 "sched RCU or wq_pool_mutex should be held")
361
362#define assert_rcu_or_wq_mutex(wq) \
363 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
364 !lockdep_is_held(&wq->mutex), \
365 "sched RCU or wq->mutex should be held")
366
367#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
368 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
369 !lockdep_is_held(&wq->mutex) && \
370 !lockdep_is_held(&wq_pool_mutex), \
371 "sched RCU, wq->mutex or wq_pool_mutex should be held")
372
373#define for_each_cpu_worker_pool(pool, cpu) \
374 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
375 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
376 (pool)++)
377
378
379
380
381
382
383
384
385
386
387
388
389
390#define for_each_pool(pool, pi) \
391 idr_for_each_entry(&worker_pool_idr, pool, pi) \
392 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
393 else
394
395
396
397
398
399
400
401
402
403
404
405#define for_each_pool_worker(worker, pool) \
406 list_for_each_entry((worker), &(pool)->workers, node) \
407 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
408 else
409
410
411
412
413
414
415
416
417
418
419
420
421
422#define for_each_pwq(pwq, wq) \
423 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
424 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
425 else
426
427#ifdef CONFIG_DEBUG_OBJECTS_WORK
428
429static struct debug_obj_descr work_debug_descr;
430
431static void *work_debug_hint(void *addr)
432{
433 return ((struct work_struct *) addr)->func;
434}
435
436static bool work_is_static_object(void *addr)
437{
438 struct work_struct *work = addr;
439
440 return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
441}
442
443
444
445
446
447static bool work_fixup_init(void *addr, enum debug_obj_state state)
448{
449 struct work_struct *work = addr;
450
451 switch (state) {
452 case ODEBUG_STATE_ACTIVE:
453 cancel_work_sync(work);
454 debug_object_init(work, &work_debug_descr);
455 return true;
456 default:
457 return false;
458 }
459}
460
461
462
463
464
465static bool work_fixup_free(void *addr, enum debug_obj_state state)
466{
467 struct work_struct *work = addr;
468
469 switch (state) {
470 case ODEBUG_STATE_ACTIVE:
471 cancel_work_sync(work);
472 debug_object_free(work, &work_debug_descr);
473 return true;
474 default:
475 return false;
476 }
477}
478
479static struct debug_obj_descr work_debug_descr = {
480 .name = "work_struct",
481 .debug_hint = work_debug_hint,
482 .is_static_object = work_is_static_object,
483 .fixup_init = work_fixup_init,
484 .fixup_free = work_fixup_free,
485};
486
487static inline void debug_work_activate(struct work_struct *work)
488{
489 debug_object_activate(work, &work_debug_descr);
490}
491
492static inline void debug_work_deactivate(struct work_struct *work)
493{
494 debug_object_deactivate(work, &work_debug_descr);
495}
496
497void __init_work(struct work_struct *work, int onstack)
498{
499 if (onstack)
500 debug_object_init_on_stack(work, &work_debug_descr);
501 else
502 debug_object_init(work, &work_debug_descr);
503}
504EXPORT_SYMBOL_GPL(__init_work);
505
506void destroy_work_on_stack(struct work_struct *work)
507{
508 debug_object_free(work, &work_debug_descr);
509}
510EXPORT_SYMBOL_GPL(destroy_work_on_stack);
511
512void destroy_delayed_work_on_stack(struct delayed_work *work)
513{
514 destroy_timer_on_stack(&work->timer);
515 debug_object_free(&work->work, &work_debug_descr);
516}
517EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
518
519#else
520static inline void debug_work_activate(struct work_struct *work) { }
521static inline void debug_work_deactivate(struct work_struct *work) { }
522#endif
523
524
525
526
527
528
529
530
531static int worker_pool_assign_id(struct worker_pool *pool)
532{
533 int ret;
534
535 lockdep_assert_held(&wq_pool_mutex);
536
537 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
538 GFP_KERNEL);
539 if (ret >= 0) {
540 pool->id = ret;
541 return 0;
542 }
543 return ret;
544}
545
546
547
548
549
550
551
552
553
554
555
556
557
558static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
559 int node)
560{
561 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
562
563
564
565
566
567
568
569 if (unlikely(node == NUMA_NO_NODE))
570 return wq->dfl_pwq;
571
572 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
573}
574
575static unsigned int work_color_to_flags(int color)
576{
577 return color << WORK_STRUCT_COLOR_SHIFT;
578}
579
580static int get_work_color(struct work_struct *work)
581{
582 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
583 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
584}
585
586static int work_next_color(int color)
587{
588 return (color + 1) % WORK_NR_COLORS;
589}
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611static inline void set_work_data(struct work_struct *work, unsigned long data,
612 unsigned long flags)
613{
614 WARN_ON_ONCE(!work_pending(work));
615 atomic_long_set(&work->data, data | flags | work_static(work));
616}
617
618static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
619 unsigned long extra_flags)
620{
621 set_work_data(work, (unsigned long)pwq,
622 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
623}
624
625static void set_work_pool_and_keep_pending(struct work_struct *work,
626 int pool_id)
627{
628 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
629 WORK_STRUCT_PENDING);
630}
631
632static void set_work_pool_and_clear_pending(struct work_struct *work,
633 int pool_id)
634{
635
636
637
638
639
640
641 smp_wmb();
642 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671 smp_mb();
672}
673
674static void clear_work_data(struct work_struct *work)
675{
676 smp_wmb();
677 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
678}
679
680static struct pool_workqueue *get_work_pwq(struct work_struct *work)
681{
682 unsigned long data = atomic_long_read(&work->data);
683
684 if (data & WORK_STRUCT_PWQ)
685 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
686 else
687 return NULL;
688}
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705static struct worker_pool *get_work_pool(struct work_struct *work)
706{
707 unsigned long data = atomic_long_read(&work->data);
708 int pool_id;
709
710 assert_rcu_or_pool_mutex();
711
712 if (data & WORK_STRUCT_PWQ)
713 return ((struct pool_workqueue *)
714 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
715
716 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
717 if (pool_id == WORK_OFFQ_POOL_NONE)
718 return NULL;
719
720 return idr_find(&worker_pool_idr, pool_id);
721}
722
723
724
725
726
727
728
729
730static int get_work_pool_id(struct work_struct *work)
731{
732 unsigned long data = atomic_long_read(&work->data);
733
734 if (data & WORK_STRUCT_PWQ)
735 return ((struct pool_workqueue *)
736 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
737
738 return data >> WORK_OFFQ_POOL_SHIFT;
739}
740
741static void mark_work_canceling(struct work_struct *work)
742{
743 unsigned long pool_id = get_work_pool_id(work);
744
745 pool_id <<= WORK_OFFQ_POOL_SHIFT;
746 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
747}
748
749static bool work_is_canceling(struct work_struct *work)
750{
751 unsigned long data = atomic_long_read(&work->data);
752
753 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
754}
755
756
757
758
759
760
761
762static bool __need_more_worker(struct worker_pool *pool)
763{
764 return !atomic_read(&pool->nr_running);
765}
766
767
768
769
770
771
772
773
774
775static bool need_more_worker(struct worker_pool *pool)
776{
777 return !list_empty(&pool->worklist) && __need_more_worker(pool);
778}
779
780
781static bool may_start_working(struct worker_pool *pool)
782{
783 return pool->nr_idle;
784}
785
786
787static bool keep_working(struct worker_pool *pool)
788{
789 return !list_empty(&pool->worklist) &&
790 atomic_read(&pool->nr_running) <= 1;
791}
792
793
794static bool need_to_create_worker(struct worker_pool *pool)
795{
796 return need_more_worker(pool) && !may_start_working(pool);
797}
798
799
800static bool too_many_workers(struct worker_pool *pool)
801{
802 bool managing = mutex_is_locked(&pool->manager_arb);
803 int nr_idle = pool->nr_idle + managing;
804 int nr_busy = pool->nr_workers - nr_idle;
805
806 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
807}
808
809
810
811
812
813
814static struct worker *first_idle_worker(struct worker_pool *pool)
815{
816 if (unlikely(list_empty(&pool->idle_list)))
817 return NULL;
818
819 return list_first_entry(&pool->idle_list, struct worker, entry);
820}
821
822
823
824
825
826
827
828
829
830
831static void wake_up_worker(struct worker_pool *pool)
832{
833 struct worker *worker = first_idle_worker(pool);
834
835 if (likely(worker))
836 wake_up_process(worker->task);
837}
838
839
840
841
842
843
844
845
846
847
848
849
850void wq_worker_waking_up(struct task_struct *task, int cpu)
851{
852 struct worker *worker = kthread_data(task);
853
854 if (!(worker->flags & WORKER_NOT_RUNNING)) {
855 WARN_ON_ONCE(worker->pool->cpu != cpu);
856 atomic_inc(&worker->pool->nr_running);
857 }
858}
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874struct task_struct *wq_worker_sleeping(struct task_struct *task)
875{
876 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
877 struct worker_pool *pool;
878
879
880
881
882
883
884 if (worker->flags & WORKER_NOT_RUNNING)
885 return NULL;
886
887 pool = worker->pool;
888
889
890 if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
891 return NULL;
892
893
894
895
896
897
898
899
900
901
902
903
904 if (atomic_dec_and_test(&pool->nr_running) &&
905 !list_empty(&pool->worklist))
906 to_wakeup = first_idle_worker(pool);
907 return to_wakeup ? to_wakeup->task : NULL;
908}
909
910
911
912
913
914
915
916
917
918
919
920static inline void worker_set_flags(struct worker *worker, unsigned int flags)
921{
922 struct worker_pool *pool = worker->pool;
923
924 WARN_ON_ONCE(worker->task != current);
925
926
927 if ((flags & WORKER_NOT_RUNNING) &&
928 !(worker->flags & WORKER_NOT_RUNNING)) {
929 atomic_dec(&pool->nr_running);
930 }
931
932 worker->flags |= flags;
933}
934
935
936
937
938
939
940
941
942
943
944
945static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
946{
947 struct worker_pool *pool = worker->pool;
948 unsigned int oflags = worker->flags;
949
950 WARN_ON_ONCE(worker->task != current);
951
952 worker->flags &= ~flags;
953
954
955
956
957
958
959 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
960 if (!(worker->flags & WORKER_NOT_RUNNING))
961 atomic_inc(&pool->nr_running);
962}
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997static struct worker *find_worker_executing_work(struct worker_pool *pool,
998 struct work_struct *work)
999{
1000 struct worker *worker;
1001
1002 hash_for_each_possible(pool->busy_hash, worker, hentry,
1003 (unsigned long)work)
1004 if (worker->current_work == work &&
1005 worker->current_func == work->func)
1006 return worker;
1007
1008 return NULL;
1009}
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028static void move_linked_works(struct work_struct *work, struct list_head *head,
1029 struct work_struct **nextp)
1030{
1031 struct work_struct *n;
1032
1033
1034
1035
1036
1037 list_for_each_entry_safe_from(work, n, NULL, entry) {
1038 list_move_tail(&work->entry, head);
1039 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1040 break;
1041 }
1042
1043
1044
1045
1046
1047
1048 if (nextp)
1049 *nextp = n;
1050}
1051
1052
1053
1054
1055
1056
1057
1058
1059static void get_pwq(struct pool_workqueue *pwq)
1060{
1061 lockdep_assert_held(&pwq->pool->lock);
1062 WARN_ON_ONCE(pwq->refcnt <= 0);
1063 pwq->refcnt++;
1064}
1065
1066
1067
1068
1069
1070
1071
1072
1073static void put_pwq(struct pool_workqueue *pwq)
1074{
1075 lockdep_assert_held(&pwq->pool->lock);
1076 if (likely(--pwq->refcnt))
1077 return;
1078 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1079 return;
1080
1081
1082
1083
1084
1085
1086
1087
1088 schedule_work(&pwq->unbound_release_work);
1089}
1090
1091
1092
1093
1094
1095
1096
1097static void put_pwq_unlocked(struct pool_workqueue *pwq)
1098{
1099 if (pwq) {
1100
1101
1102
1103
1104 spin_lock_irq(&pwq->pool->lock);
1105 put_pwq(pwq);
1106 spin_unlock_irq(&pwq->pool->lock);
1107 }
1108}
1109
1110static void pwq_activate_delayed_work(struct work_struct *work)
1111{
1112 struct pool_workqueue *pwq = get_work_pwq(work);
1113
1114 trace_workqueue_activate_work(work);
1115 if (list_empty(&pwq->pool->worklist))
1116 pwq->pool->watchdog_ts = jiffies;
1117 move_linked_works(work, &pwq->pool->worklist, NULL);
1118 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1119 pwq->nr_active++;
1120}
1121
1122static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1123{
1124 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1125 struct work_struct, entry);
1126
1127 pwq_activate_delayed_work(work);
1128}
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1142{
1143
1144 if (color == WORK_NO_COLOR)
1145 goto out_put;
1146
1147 pwq->nr_in_flight[color]--;
1148
1149 pwq->nr_active--;
1150 if (!list_empty(&pwq->delayed_works)) {
1151
1152 if (pwq->nr_active < pwq->max_active)
1153 pwq_activate_first_delayed(pwq);
1154 }
1155
1156
1157 if (likely(pwq->flush_color != color))
1158 goto out_put;
1159
1160
1161 if (pwq->nr_in_flight[color])
1162 goto out_put;
1163
1164
1165 pwq->flush_color = -1;
1166
1167
1168
1169
1170
1171 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1172 complete(&pwq->wq->first_flusher->done);
1173out_put:
1174 put_pwq(pwq);
1175}
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1205 unsigned long *flags)
1206{
1207 struct worker_pool *pool;
1208 struct pool_workqueue *pwq;
1209
1210 local_irq_save(*flags);
1211
1212
1213 if (is_dwork) {
1214 struct delayed_work *dwork = to_delayed_work(work);
1215
1216
1217
1218
1219
1220
1221 if (likely(del_timer(&dwork->timer)))
1222 return 1;
1223 }
1224
1225
1226 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1227 return 0;
1228
1229
1230
1231
1232
1233 pool = get_work_pool(work);
1234 if (!pool)
1235 goto fail;
1236
1237 spin_lock(&pool->lock);
1238
1239
1240
1241
1242
1243
1244
1245
1246 pwq = get_work_pwq(work);
1247 if (pwq && pwq->pool == pool) {
1248 debug_work_deactivate(work);
1249
1250
1251
1252
1253
1254
1255
1256
1257 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1258 pwq_activate_delayed_work(work);
1259
1260 list_del_init(&work->entry);
1261 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1262
1263
1264 set_work_pool_and_keep_pending(work, pool->id);
1265
1266 spin_unlock(&pool->lock);
1267 return 1;
1268 }
1269 spin_unlock(&pool->lock);
1270fail:
1271 local_irq_restore(*flags);
1272 if (work_is_canceling(work))
1273 return -ENOENT;
1274 cpu_relax();
1275 return -EAGAIN;
1276}
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1292 struct list_head *head, unsigned int extra_flags)
1293{
1294 struct worker_pool *pool = pwq->pool;
1295
1296
1297 set_work_pwq(work, pwq, extra_flags);
1298 list_add_tail(&work->entry, head);
1299 get_pwq(pwq);
1300
1301
1302
1303
1304
1305
1306 smp_mb();
1307
1308 if (__need_more_worker(pool))
1309 wake_up_worker(pool);
1310}
1311
1312
1313
1314
1315
1316static bool is_chained_work(struct workqueue_struct *wq)
1317{
1318 struct worker *worker;
1319
1320 worker = current_wq_worker();
1321
1322
1323
1324
1325 return worker && worker->current_pwq->wq == wq;
1326}
1327
1328
1329
1330
1331
1332
1333static int wq_select_unbound_cpu(int cpu)
1334{
1335 static bool printed_dbg_warning;
1336 int new_cpu;
1337
1338 if (likely(!wq_debug_force_rr_cpu)) {
1339 if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1340 return cpu;
1341 } else if (!printed_dbg_warning) {
1342 pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
1343 printed_dbg_warning = true;
1344 }
1345
1346 if (cpumask_empty(wq_unbound_cpumask))
1347 return cpu;
1348
1349 new_cpu = __this_cpu_read(wq_rr_cpu_last);
1350 new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1351 if (unlikely(new_cpu >= nr_cpu_ids)) {
1352 new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1353 if (unlikely(new_cpu >= nr_cpu_ids))
1354 return cpu;
1355 }
1356 __this_cpu_write(wq_rr_cpu_last, new_cpu);
1357
1358 return new_cpu;
1359}
1360
1361static void __queue_work(int cpu, struct workqueue_struct *wq,
1362 struct work_struct *work)
1363{
1364 struct pool_workqueue *pwq;
1365 struct worker_pool *last_pool;
1366 struct list_head *worklist;
1367 unsigned int work_flags;
1368 unsigned int req_cpu = cpu;
1369
1370
1371
1372
1373
1374
1375
1376 WARN_ON_ONCE(!irqs_disabled());
1377
1378 debug_work_activate(work);
1379
1380
1381 if (unlikely(wq->flags & __WQ_DRAINING) &&
1382 WARN_ON_ONCE(!is_chained_work(wq)))
1383 return;
1384retry:
1385 if (req_cpu == WORK_CPU_UNBOUND)
1386 cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1387
1388
1389 if (!(wq->flags & WQ_UNBOUND))
1390 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1391 else
1392 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1393
1394
1395
1396
1397
1398
1399 last_pool = get_work_pool(work);
1400 if (last_pool && last_pool != pwq->pool) {
1401 struct worker *worker;
1402
1403 spin_lock(&last_pool->lock);
1404
1405 worker = find_worker_executing_work(last_pool, work);
1406
1407 if (worker && worker->current_pwq->wq == wq) {
1408 pwq = worker->current_pwq;
1409 } else {
1410
1411 spin_unlock(&last_pool->lock);
1412 spin_lock(&pwq->pool->lock);
1413 }
1414 } else {
1415 spin_lock(&pwq->pool->lock);
1416 }
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426 if (unlikely(!pwq->refcnt)) {
1427 if (wq->flags & WQ_UNBOUND) {
1428 spin_unlock(&pwq->pool->lock);
1429 cpu_relax();
1430 goto retry;
1431 }
1432
1433 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1434 wq->name, cpu);
1435 }
1436
1437
1438 trace_workqueue_queue_work(req_cpu, pwq, work);
1439
1440 if (WARN_ON(!list_empty(&work->entry))) {
1441 spin_unlock(&pwq->pool->lock);
1442 return;
1443 }
1444
1445 pwq->nr_in_flight[pwq->work_color]++;
1446 work_flags = work_color_to_flags(pwq->work_color);
1447
1448 if (likely(pwq->nr_active < pwq->max_active)) {
1449 trace_workqueue_activate_work(work);
1450 pwq->nr_active++;
1451 worklist = &pwq->pool->worklist;
1452 if (list_empty(worklist))
1453 pwq->pool->watchdog_ts = jiffies;
1454 } else {
1455 work_flags |= WORK_STRUCT_DELAYED;
1456 worklist = &pwq->delayed_works;
1457 }
1458
1459 insert_work(pwq, work, worklist, work_flags);
1460
1461 spin_unlock(&pwq->pool->lock);
1462}
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475bool queue_work_on(int cpu, struct workqueue_struct *wq,
1476 struct work_struct *work)
1477{
1478 bool ret = false;
1479 unsigned long flags;
1480
1481 local_irq_save(flags);
1482
1483 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1484 __queue_work(cpu, wq, work);
1485 ret = true;
1486 }
1487
1488 local_irq_restore(flags);
1489 return ret;
1490}
1491EXPORT_SYMBOL(queue_work_on);
1492
1493void delayed_work_timer_fn(unsigned long __data)
1494{
1495 struct delayed_work *dwork = (struct delayed_work *)__data;
1496
1497
1498 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1499}
1500EXPORT_SYMBOL(delayed_work_timer_fn);
1501
1502static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1503 struct delayed_work *dwork, unsigned long delay)
1504{
1505 struct timer_list *timer = &dwork->timer;
1506 struct work_struct *work = &dwork->work;
1507
1508 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1509 timer->data != (unsigned long)dwork);
1510 WARN_ON_ONCE(timer_pending(timer));
1511 WARN_ON_ONCE(!list_empty(&work->entry));
1512
1513
1514
1515
1516
1517
1518
1519 if (!delay) {
1520 __queue_work(cpu, wq, &dwork->work);
1521 return;
1522 }
1523
1524 timer_stats_timer_set_start_info(&dwork->timer);
1525
1526 dwork->wq = wq;
1527 dwork->cpu = cpu;
1528 timer->expires = jiffies + delay;
1529
1530 if (unlikely(cpu != WORK_CPU_UNBOUND))
1531 add_timer_on(timer, cpu);
1532 else
1533 add_timer(timer);
1534}
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1548 struct delayed_work *dwork, unsigned long delay)
1549{
1550 struct work_struct *work = &dwork->work;
1551 bool ret = false;
1552 unsigned long flags;
1553
1554
1555 local_irq_save(flags);
1556
1557 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1558 __queue_delayed_work(cpu, wq, dwork, delay);
1559 ret = true;
1560 }
1561
1562 local_irq_restore(flags);
1563 return ret;
1564}
1565EXPORT_SYMBOL(queue_delayed_work_on);
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1586 struct delayed_work *dwork, unsigned long delay)
1587{
1588 unsigned long flags;
1589 int ret;
1590
1591 do {
1592 ret = try_to_grab_pending(&dwork->work, true, &flags);
1593 } while (unlikely(ret == -EAGAIN));
1594
1595 if (likely(ret >= 0)) {
1596 __queue_delayed_work(cpu, wq, dwork, delay);
1597 local_irq_restore(flags);
1598 }
1599
1600
1601 return ret;
1602}
1603EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615static void worker_enter_idle(struct worker *worker)
1616{
1617 struct worker_pool *pool = worker->pool;
1618
1619 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1620 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1621 (worker->hentry.next || worker->hentry.pprev)))
1622 return;
1623
1624
1625 worker->flags |= WORKER_IDLE;
1626 pool->nr_idle++;
1627 worker->last_active = jiffies;
1628
1629
1630 list_add(&worker->entry, &pool->idle_list);
1631
1632 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1633 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1634
1635
1636
1637
1638
1639
1640
1641 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1642 pool->nr_workers == pool->nr_idle &&
1643 atomic_read(&pool->nr_running));
1644}
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655static void worker_leave_idle(struct worker *worker)
1656{
1657 struct worker_pool *pool = worker->pool;
1658
1659 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1660 return;
1661 worker_clr_flags(worker, WORKER_IDLE);
1662 pool->nr_idle--;
1663 list_del_init(&worker->entry);
1664}
1665
1666static struct worker *alloc_worker(int node)
1667{
1668 struct worker *worker;
1669
1670 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1671 if (worker) {
1672 INIT_LIST_HEAD(&worker->entry);
1673 INIT_LIST_HEAD(&worker->scheduled);
1674 INIT_LIST_HEAD(&worker->node);
1675
1676 worker->flags = WORKER_PREP;
1677 }
1678 return worker;
1679}
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690static void worker_attach_to_pool(struct worker *worker,
1691 struct worker_pool *pool)
1692{
1693 mutex_lock(&pool->attach_mutex);
1694
1695
1696
1697
1698
1699 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1700
1701
1702
1703
1704
1705
1706 if (pool->flags & POOL_DISASSOCIATED)
1707 worker->flags |= WORKER_UNBOUND;
1708
1709 list_add_tail(&worker->node, &pool->workers);
1710
1711 mutex_unlock(&pool->attach_mutex);
1712}
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723static void worker_detach_from_pool(struct worker *worker,
1724 struct worker_pool *pool)
1725{
1726 struct completion *detach_completion = NULL;
1727
1728 mutex_lock(&pool->attach_mutex);
1729 list_del(&worker->node);
1730 if (list_empty(&pool->workers))
1731 detach_completion = pool->detach_completion;
1732 mutex_unlock(&pool->attach_mutex);
1733
1734
1735 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1736
1737 if (detach_completion)
1738 complete(detach_completion);
1739}
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753static struct worker *create_worker(struct worker_pool *pool)
1754{
1755 struct worker *worker = NULL;
1756 int id = -1;
1757 char id_buf[16];
1758
1759
1760 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1761 if (id < 0)
1762 goto fail;
1763
1764 worker = alloc_worker(pool->node);
1765 if (!worker)
1766 goto fail;
1767
1768 worker->pool = pool;
1769 worker->id = id;
1770
1771 if (pool->cpu >= 0)
1772 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1773 pool->attrs->nice < 0 ? "H" : "");
1774 else
1775 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1776
1777 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1778 "kworker/%s", id_buf);
1779 if (IS_ERR(worker->task))
1780 goto fail;
1781
1782 set_user_nice(worker->task, pool->attrs->nice);
1783 kthread_bind_mask(worker->task, pool->attrs->cpumask);
1784
1785
1786 worker_attach_to_pool(worker, pool);
1787
1788
1789 spin_lock_irq(&pool->lock);
1790 worker->pool->nr_workers++;
1791 worker_enter_idle(worker);
1792 wake_up_process(worker->task);
1793 spin_unlock_irq(&pool->lock);
1794
1795 return worker;
1796
1797fail:
1798 if (id >= 0)
1799 ida_simple_remove(&pool->worker_ida, id);
1800 kfree(worker);
1801 return NULL;
1802}
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814static void destroy_worker(struct worker *worker)
1815{
1816 struct worker_pool *pool = worker->pool;
1817
1818 lockdep_assert_held(&pool->lock);
1819
1820
1821 if (WARN_ON(worker->current_work) ||
1822 WARN_ON(!list_empty(&worker->scheduled)) ||
1823 WARN_ON(!(worker->flags & WORKER_IDLE)))
1824 return;
1825
1826 pool->nr_workers--;
1827 pool->nr_idle--;
1828
1829 list_del_init(&worker->entry);
1830 worker->flags |= WORKER_DIE;
1831 wake_up_process(worker->task);
1832}
1833
1834static void idle_worker_timeout(unsigned long __pool)
1835{
1836 struct worker_pool *pool = (void *)__pool;
1837
1838 spin_lock_irq(&pool->lock);
1839
1840 while (too_many_workers(pool)) {
1841 struct worker *worker;
1842 unsigned long expires;
1843
1844
1845 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1846 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1847
1848 if (time_before(jiffies, expires)) {
1849 mod_timer(&pool->idle_timer, expires);
1850 break;
1851 }
1852
1853 destroy_worker(worker);
1854 }
1855
1856 spin_unlock_irq(&pool->lock);
1857}
1858
1859static void send_mayday(struct work_struct *work)
1860{
1861 struct pool_workqueue *pwq = get_work_pwq(work);
1862 struct workqueue_struct *wq = pwq->wq;
1863
1864 lockdep_assert_held(&wq_mayday_lock);
1865
1866 if (!wq->rescuer)
1867 return;
1868
1869
1870 if (list_empty(&pwq->mayday_node)) {
1871
1872
1873
1874
1875
1876 get_pwq(pwq);
1877 list_add_tail(&pwq->mayday_node, &wq->maydays);
1878 wake_up_process(wq->rescuer->task);
1879 }
1880}
1881
1882static void pool_mayday_timeout(unsigned long __pool)
1883{
1884 struct worker_pool *pool = (void *)__pool;
1885 struct work_struct *work;
1886
1887 spin_lock_irq(&pool->lock);
1888 spin_lock(&wq_mayday_lock);
1889
1890 if (need_to_create_worker(pool)) {
1891
1892
1893
1894
1895
1896
1897 list_for_each_entry(work, &pool->worklist, entry)
1898 send_mayday(work);
1899 }
1900
1901 spin_unlock(&wq_mayday_lock);
1902 spin_unlock_irq(&pool->lock);
1903
1904 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1905}
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925static void maybe_create_worker(struct worker_pool *pool)
1926__releases(&pool->lock)
1927__acquires(&pool->lock)
1928{
1929restart:
1930 spin_unlock_irq(&pool->lock);
1931
1932
1933 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1934
1935 while (true) {
1936 if (create_worker(pool) || !need_to_create_worker(pool))
1937 break;
1938
1939 schedule_timeout_interruptible(CREATE_COOLDOWN);
1940
1941 if (!need_to_create_worker(pool))
1942 break;
1943 }
1944
1945 del_timer_sync(&pool->mayday_timer);
1946 spin_lock_irq(&pool->lock);
1947
1948
1949
1950
1951
1952 if (need_to_create_worker(pool))
1953 goto restart;
1954}
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978static bool manage_workers(struct worker *worker)
1979{
1980 struct worker_pool *pool = worker->pool;
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992 if (!mutex_trylock(&pool->manager_arb))
1993 return false;
1994 pool->manager = worker;
1995
1996 maybe_create_worker(pool);
1997
1998 pool->manager = NULL;
1999 mutex_unlock(&pool->manager_arb);
2000 return true;
2001}
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017static void process_one_work(struct worker *worker, struct work_struct *work)
2018__releases(&pool->lock)
2019__acquires(&pool->lock)
2020{
2021 struct pool_workqueue *pwq = get_work_pwq(work);
2022 struct worker_pool *pool = worker->pool;
2023 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
2024 int work_color;
2025 struct worker *collision;
2026#ifdef CONFIG_LOCKDEP
2027
2028
2029
2030
2031
2032
2033
2034 struct lockdep_map lockdep_map;
2035
2036 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2037#endif
2038
2039 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2040 raw_smp_processor_id() != pool->cpu);
2041
2042
2043
2044
2045
2046
2047
2048 collision = find_worker_executing_work(pool, work);
2049 if (unlikely(collision)) {
2050 move_linked_works(work, &collision->scheduled, NULL);
2051 return;
2052 }
2053
2054
2055 debug_work_deactivate(work);
2056 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2057 worker->current_work = work;
2058 worker->current_func = work->func;
2059 worker->current_pwq = pwq;
2060 work_color = get_work_color(work);
2061
2062 list_del_init(&work->entry);
2063
2064
2065
2066
2067
2068
2069
2070 if (unlikely(cpu_intensive))
2071 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2072
2073
2074
2075
2076
2077
2078
2079
2080 if (need_more_worker(pool))
2081 wake_up_worker(pool);
2082
2083
2084
2085
2086
2087
2088
2089 set_work_pool_and_clear_pending(work, pool->id);
2090
2091 spin_unlock_irq(&pool->lock);
2092
2093 lock_map_acquire_read(&pwq->wq->lockdep_map);
2094 lock_map_acquire(&lockdep_map);
2095 trace_workqueue_execute_start(work);
2096 worker->current_func(work);
2097
2098
2099
2100
2101 trace_workqueue_execute_end(work);
2102 lock_map_release(&lockdep_map);
2103 lock_map_release(&pwq->wq->lockdep_map);
2104
2105 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2106 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2107 " last function: %pf\n",
2108 current->comm, preempt_count(), task_pid_nr(current),
2109 worker->current_func);
2110 debug_show_held_locks(current);
2111 dump_stack();
2112 }
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122 cond_resched_rcu_qs();
2123
2124 spin_lock_irq(&pool->lock);
2125
2126
2127 if (unlikely(cpu_intensive))
2128 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2129
2130
2131 hash_del(&worker->hentry);
2132 worker->current_work = NULL;
2133 worker->current_func = NULL;
2134 worker->current_pwq = NULL;
2135 worker->desc_valid = false;
2136 pwq_dec_nr_in_flight(pwq, work_color);
2137}
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151static void process_scheduled_works(struct worker *worker)
2152{
2153 while (!list_empty(&worker->scheduled)) {
2154 struct work_struct *work = list_first_entry(&worker->scheduled,
2155 struct work_struct, entry);
2156 process_one_work(worker, work);
2157 }
2158}
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172static int worker_thread(void *__worker)
2173{
2174 struct worker *worker = __worker;
2175 struct worker_pool *pool = worker->pool;
2176
2177
2178 worker->task->flags |= PF_WQ_WORKER;
2179woke_up:
2180 spin_lock_irq(&pool->lock);
2181
2182
2183 if (unlikely(worker->flags & WORKER_DIE)) {
2184 spin_unlock_irq(&pool->lock);
2185 WARN_ON_ONCE(!list_empty(&worker->entry));
2186 worker->task->flags &= ~PF_WQ_WORKER;
2187
2188 set_task_comm(worker->task, "kworker/dying");
2189 ida_simple_remove(&pool->worker_ida, worker->id);
2190 worker_detach_from_pool(worker, pool);
2191 kfree(worker);
2192 return 0;
2193 }
2194
2195 worker_leave_idle(worker);
2196recheck:
2197
2198 if (!need_more_worker(pool))
2199 goto sleep;
2200
2201
2202 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2203 goto recheck;
2204
2205
2206
2207
2208
2209
2210 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2211
2212
2213
2214
2215
2216
2217
2218
2219 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2220
2221 do {
2222 struct work_struct *work =
2223 list_first_entry(&pool->worklist,
2224 struct work_struct, entry);
2225
2226 pool->watchdog_ts = jiffies;
2227
2228 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2229
2230 process_one_work(worker, work);
2231 if (unlikely(!list_empty(&worker->scheduled)))
2232 process_scheduled_works(worker);
2233 } else {
2234 move_linked_works(work, &worker->scheduled, NULL);
2235 process_scheduled_works(worker);
2236 }
2237 } while (keep_working(pool));
2238
2239 worker_set_flags(worker, WORKER_PREP);
2240sleep:
2241
2242
2243
2244
2245
2246
2247
2248 worker_enter_idle(worker);
2249 __set_current_state(TASK_INTERRUPTIBLE);
2250 spin_unlock_irq(&pool->lock);
2251 schedule();
2252 goto woke_up;
2253}
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276static int rescuer_thread(void *__rescuer)
2277{
2278 struct worker *rescuer = __rescuer;
2279 struct workqueue_struct *wq = rescuer->rescue_wq;
2280 struct list_head *scheduled = &rescuer->scheduled;
2281 bool should_stop;
2282
2283 set_user_nice(current, RESCUER_NICE_LEVEL);
2284
2285
2286
2287
2288
2289 rescuer->task->flags |= PF_WQ_WORKER;
2290repeat:
2291 set_current_state(TASK_INTERRUPTIBLE);
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301 should_stop = kthread_should_stop();
2302
2303
2304 spin_lock_irq(&wq_mayday_lock);
2305
2306 while (!list_empty(&wq->maydays)) {
2307 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2308 struct pool_workqueue, mayday_node);
2309 struct worker_pool *pool = pwq->pool;
2310 struct work_struct *work, *n;
2311 bool first = true;
2312
2313 __set_current_state(TASK_RUNNING);
2314 list_del_init(&pwq->mayday_node);
2315
2316 spin_unlock_irq(&wq_mayday_lock);
2317
2318 worker_attach_to_pool(rescuer, pool);
2319
2320 spin_lock_irq(&pool->lock);
2321 rescuer->pool = pool;
2322
2323
2324
2325
2326
2327 WARN_ON_ONCE(!list_empty(scheduled));
2328 list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2329 if (get_work_pwq(work) == pwq) {
2330 if (first)
2331 pool->watchdog_ts = jiffies;
2332 move_linked_works(work, scheduled, &n);
2333 }
2334 first = false;
2335 }
2336
2337 if (!list_empty(scheduled)) {
2338 process_scheduled_works(rescuer);
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349 if (need_to_create_worker(pool)) {
2350 spin_lock(&wq_mayday_lock);
2351 get_pwq(pwq);
2352 list_move_tail(&pwq->mayday_node, &wq->maydays);
2353 spin_unlock(&wq_mayday_lock);
2354 }
2355 }
2356
2357
2358
2359
2360
2361 put_pwq(pwq);
2362
2363
2364
2365
2366
2367
2368 if (need_more_worker(pool))
2369 wake_up_worker(pool);
2370
2371 rescuer->pool = NULL;
2372 spin_unlock_irq(&pool->lock);
2373
2374 worker_detach_from_pool(rescuer, pool);
2375
2376 spin_lock_irq(&wq_mayday_lock);
2377 }
2378
2379 spin_unlock_irq(&wq_mayday_lock);
2380
2381 if (should_stop) {
2382 __set_current_state(TASK_RUNNING);
2383 rescuer->task->flags &= ~PF_WQ_WORKER;
2384 return 0;
2385 }
2386
2387
2388 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2389 schedule();
2390 goto repeat;
2391}
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404static void check_flush_dependency(struct workqueue_struct *target_wq,
2405 struct work_struct *target_work)
2406{
2407 work_func_t target_func = target_work ? target_work->func : NULL;
2408 struct worker *worker;
2409
2410 if (target_wq->flags & WQ_MEM_RECLAIM)
2411 return;
2412
2413 worker = current_wq_worker();
2414
2415 WARN_ONCE(current->flags & PF_MEMALLOC,
2416 "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
2417 current->pid, current->comm, target_wq->name, target_func);
2418 WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2419 (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2420 "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
2421 worker->current_pwq->wq->name, worker->current_func,
2422 target_wq->name, target_func);
2423}
2424
2425struct wq_barrier {
2426 struct work_struct work;
2427 struct completion done;
2428 struct task_struct *task;
2429};
2430
2431static void wq_barrier_func(struct work_struct *work)
2432{
2433 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2434 complete(&barr->done);
2435}
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461static void insert_wq_barrier(struct pool_workqueue *pwq,
2462 struct wq_barrier *barr,
2463 struct work_struct *target, struct worker *worker)
2464{
2465 struct list_head *head;
2466 unsigned int linked = 0;
2467
2468
2469
2470
2471
2472
2473
2474 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2475 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2476 init_completion(&barr->done);
2477 barr->task = current;
2478
2479
2480
2481
2482
2483 if (worker)
2484 head = worker->scheduled.next;
2485 else {
2486 unsigned long *bits = work_data_bits(target);
2487
2488 head = target->entry.next;
2489
2490 linked = *bits & WORK_STRUCT_LINKED;
2491 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2492 }
2493
2494 debug_work_activate(&barr->work);
2495 insert_work(pwq, &barr->work, head,
2496 work_color_to_flags(WORK_NO_COLOR) | linked);
2497}
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2531 int flush_color, int work_color)
2532{
2533 bool wait = false;
2534 struct pool_workqueue *pwq;
2535
2536 if (flush_color >= 0) {
2537 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2538 atomic_set(&wq->nr_pwqs_to_flush, 1);
2539 }
2540
2541 for_each_pwq(pwq, wq) {
2542 struct worker_pool *pool = pwq->pool;
2543
2544 spin_lock_irq(&pool->lock);
2545
2546 if (flush_color >= 0) {
2547 WARN_ON_ONCE(pwq->flush_color != -1);
2548
2549 if (pwq->nr_in_flight[flush_color]) {
2550 pwq->flush_color = flush_color;
2551 atomic_inc(&wq->nr_pwqs_to_flush);
2552 wait = true;
2553 }
2554 }
2555
2556 if (work_color >= 0) {
2557 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2558 pwq->work_color = work_color;
2559 }
2560
2561 spin_unlock_irq(&pool->lock);
2562 }
2563
2564 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2565 complete(&wq->first_flusher->done);
2566
2567 return wait;
2568}
2569
2570
2571
2572
2573
2574
2575
2576
2577void flush_workqueue(struct workqueue_struct *wq)
2578{
2579 struct wq_flusher this_flusher = {
2580 .list = LIST_HEAD_INIT(this_flusher.list),
2581 .flush_color = -1,
2582 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2583 };
2584 int next_color;
2585
2586 lock_map_acquire(&wq->lockdep_map);
2587 lock_map_release(&wq->lockdep_map);
2588
2589 mutex_lock(&wq->mutex);
2590
2591
2592
2593
2594 next_color = work_next_color(wq->work_color);
2595
2596 if (next_color != wq->flush_color) {
2597
2598
2599
2600
2601
2602 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2603 this_flusher.flush_color = wq->work_color;
2604 wq->work_color = next_color;
2605
2606 if (!wq->first_flusher) {
2607
2608 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2609
2610 wq->first_flusher = &this_flusher;
2611
2612 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2613 wq->work_color)) {
2614
2615 wq->flush_color = next_color;
2616 wq->first_flusher = NULL;
2617 goto out_unlock;
2618 }
2619 } else {
2620
2621 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2622 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2623 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2624 }
2625 } else {
2626
2627
2628
2629
2630
2631 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2632 }
2633
2634 check_flush_dependency(wq, NULL);
2635
2636 mutex_unlock(&wq->mutex);
2637
2638 wait_for_completion(&this_flusher.done);
2639
2640
2641
2642
2643
2644
2645
2646 if (wq->first_flusher != &this_flusher)
2647 return;
2648
2649 mutex_lock(&wq->mutex);
2650
2651
2652 if (wq->first_flusher != &this_flusher)
2653 goto out_unlock;
2654
2655 wq->first_flusher = NULL;
2656
2657 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2658 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2659
2660 while (true) {
2661 struct wq_flusher *next, *tmp;
2662
2663
2664 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2665 if (next->flush_color != wq->flush_color)
2666 break;
2667 list_del_init(&next->list);
2668 complete(&next->done);
2669 }
2670
2671 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2672 wq->flush_color != work_next_color(wq->work_color));
2673
2674
2675 wq->flush_color = work_next_color(wq->flush_color);
2676
2677
2678 if (!list_empty(&wq->flusher_overflow)) {
2679
2680
2681
2682
2683
2684
2685 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2686 tmp->flush_color = wq->work_color;
2687
2688 wq->work_color = work_next_color(wq->work_color);
2689
2690 list_splice_tail_init(&wq->flusher_overflow,
2691 &wq->flusher_queue);
2692 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2693 }
2694
2695 if (list_empty(&wq->flusher_queue)) {
2696 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2697 break;
2698 }
2699
2700
2701
2702
2703
2704 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2705 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2706
2707 list_del_init(&next->list);
2708 wq->first_flusher = next;
2709
2710 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2711 break;
2712
2713
2714
2715
2716
2717 wq->first_flusher = NULL;
2718 }
2719
2720out_unlock:
2721 mutex_unlock(&wq->mutex);
2722}
2723EXPORT_SYMBOL(flush_workqueue);
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736void drain_workqueue(struct workqueue_struct *wq)
2737{
2738 unsigned int flush_cnt = 0;
2739 struct pool_workqueue *pwq;
2740
2741
2742
2743
2744
2745
2746 mutex_lock(&wq->mutex);
2747 if (!wq->nr_drainers++)
2748 wq->flags |= __WQ_DRAINING;
2749 mutex_unlock(&wq->mutex);
2750reflush:
2751 flush_workqueue(wq);
2752
2753 mutex_lock(&wq->mutex);
2754
2755 for_each_pwq(pwq, wq) {
2756 bool drained;
2757
2758 spin_lock_irq(&pwq->pool->lock);
2759 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2760 spin_unlock_irq(&pwq->pool->lock);
2761
2762 if (drained)
2763 continue;
2764
2765 if (++flush_cnt == 10 ||
2766 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2767 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2768 wq->name, flush_cnt);
2769
2770 mutex_unlock(&wq->mutex);
2771 goto reflush;
2772 }
2773
2774 if (!--wq->nr_drainers)
2775 wq->flags &= ~__WQ_DRAINING;
2776 mutex_unlock(&wq->mutex);
2777}
2778EXPORT_SYMBOL_GPL(drain_workqueue);
2779
2780static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2781{
2782 struct worker *worker = NULL;
2783 struct worker_pool *pool;
2784 struct pool_workqueue *pwq;
2785
2786 might_sleep();
2787
2788 local_irq_disable();
2789 pool = get_work_pool(work);
2790 if (!pool) {
2791 local_irq_enable();
2792 return false;
2793 }
2794
2795 spin_lock(&pool->lock);
2796
2797 pwq = get_work_pwq(work);
2798 if (pwq) {
2799 if (unlikely(pwq->pool != pool))
2800 goto already_gone;
2801 } else {
2802 worker = find_worker_executing_work(pool, work);
2803 if (!worker)
2804 goto already_gone;
2805 pwq = worker->current_pwq;
2806 }
2807
2808 check_flush_dependency(pwq->wq, work);
2809
2810 insert_wq_barrier(pwq, barr, work, worker);
2811 spin_unlock_irq(&pool->lock);
2812
2813
2814
2815
2816
2817
2818
2819 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
2820 lock_map_acquire(&pwq->wq->lockdep_map);
2821 else
2822 lock_map_acquire_read(&pwq->wq->lockdep_map);
2823 lock_map_release(&pwq->wq->lockdep_map);
2824
2825 return true;
2826already_gone:
2827 spin_unlock_irq(&pool->lock);
2828 return false;
2829}
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842bool flush_work(struct work_struct *work)
2843{
2844 struct wq_barrier barr;
2845
2846 lock_map_acquire(&work->lockdep_map);
2847 lock_map_release(&work->lockdep_map);
2848
2849 if (start_flush_work(work, &barr)) {
2850 wait_for_completion(&barr.done);
2851 destroy_work_on_stack(&barr.work);
2852 return true;
2853 } else {
2854 return false;
2855 }
2856}
2857EXPORT_SYMBOL_GPL(flush_work);
2858
2859struct cwt_wait {
2860 wait_queue_t wait;
2861 struct work_struct *work;
2862};
2863
2864static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
2865{
2866 struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
2867
2868 if (cwait->work != key)
2869 return 0;
2870 return autoremove_wake_function(wait, mode, sync, key);
2871}
2872
2873static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2874{
2875 static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
2876 unsigned long flags;
2877 int ret;
2878
2879 do {
2880 ret = try_to_grab_pending(work, is_dwork, &flags);
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897 if (unlikely(ret == -ENOENT)) {
2898 struct cwt_wait cwait;
2899
2900 init_wait(&cwait.wait);
2901 cwait.wait.func = cwt_wakefn;
2902 cwait.work = work;
2903
2904 prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
2905 TASK_UNINTERRUPTIBLE);
2906 if (work_is_canceling(work))
2907 schedule();
2908 finish_wait(&cancel_waitq, &cwait.wait);
2909 }
2910 } while (unlikely(ret < 0));
2911
2912
2913 mark_work_canceling(work);
2914 local_irq_restore(flags);
2915
2916 flush_work(work);
2917 clear_work_data(work);
2918
2919
2920
2921
2922
2923
2924 smp_mb();
2925 if (waitqueue_active(&cancel_waitq))
2926 __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
2927
2928 return ret;
2929}
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949bool cancel_work_sync(struct work_struct *work)
2950{
2951 return __cancel_work_timer(work, false);
2952}
2953EXPORT_SYMBOL_GPL(cancel_work_sync);
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967bool flush_delayed_work(struct delayed_work *dwork)
2968{
2969 local_irq_disable();
2970 if (del_timer_sync(&dwork->timer))
2971 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
2972 local_irq_enable();
2973 return flush_work(&dwork->work);
2974}
2975EXPORT_SYMBOL(flush_delayed_work);
2976
2977static bool __cancel_work(struct work_struct *work, bool is_dwork)
2978{
2979 unsigned long flags;
2980 int ret;
2981
2982 do {
2983 ret = try_to_grab_pending(work, is_dwork, &flags);
2984 } while (unlikely(ret == -EAGAIN));
2985
2986 if (unlikely(ret < 0))
2987 return false;
2988
2989 set_work_pool_and_clear_pending(work, get_work_pool_id(work));
2990 local_irq_restore(flags);
2991 return ret;
2992}
2993
2994
2995
2996
2997bool cancel_work(struct work_struct *work)
2998{
2999 return __cancel_work(work, false);
3000}
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018bool cancel_delayed_work(struct delayed_work *dwork)
3019{
3020 return __cancel_work(&dwork->work, true);
3021}
3022EXPORT_SYMBOL(cancel_delayed_work);
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033bool cancel_delayed_work_sync(struct delayed_work *dwork)
3034{
3035 return __cancel_work_timer(&dwork->work, true);
3036}
3037EXPORT_SYMBOL(cancel_delayed_work_sync);
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050int schedule_on_each_cpu(work_func_t func)
3051{
3052 int cpu;
3053 struct work_struct __percpu *works;
3054
3055 works = alloc_percpu(struct work_struct);
3056 if (!works)
3057 return -ENOMEM;
3058
3059 get_online_cpus();
3060
3061 for_each_online_cpu(cpu) {
3062 struct work_struct *work = per_cpu_ptr(works, cpu);
3063
3064 INIT_WORK(work, func);
3065 schedule_work_on(cpu, work);
3066 }
3067
3068 for_each_online_cpu(cpu)
3069 flush_work(per_cpu_ptr(works, cpu));
3070
3071 put_online_cpus();
3072 free_percpu(works);
3073 return 0;
3074}
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3089{
3090 if (!in_interrupt()) {
3091 fn(&ew->work);
3092 return 0;
3093 }
3094
3095 INIT_WORK(&ew->work, fn);
3096 schedule_work(&ew->work);
3097
3098 return 1;
3099}
3100EXPORT_SYMBOL_GPL(execute_in_process_context);
3101
3102
3103
3104
3105
3106
3107
3108void free_workqueue_attrs(struct workqueue_attrs *attrs)
3109{
3110 if (attrs) {
3111 free_cpumask_var(attrs->cpumask);
3112 kfree(attrs);
3113 }
3114}
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3126{
3127 struct workqueue_attrs *attrs;
3128
3129 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3130 if (!attrs)
3131 goto fail;
3132 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3133 goto fail;
3134
3135 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3136 return attrs;
3137fail:
3138 free_workqueue_attrs(attrs);
3139 return NULL;
3140}
3141
3142static void copy_workqueue_attrs(struct workqueue_attrs *to,
3143 const struct workqueue_attrs *from)
3144{
3145 to->nice = from->nice;
3146 cpumask_copy(to->cpumask, from->cpumask);
3147
3148
3149
3150
3151
3152 to->no_numa = from->no_numa;
3153}
3154
3155
3156static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3157{
3158 u32 hash = 0;
3159
3160 hash = jhash_1word(attrs->nice, hash);
3161 hash = jhash(cpumask_bits(attrs->cpumask),
3162 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3163 return hash;
3164}
3165
3166
3167static bool wqattrs_equal(const struct workqueue_attrs *a,
3168 const struct workqueue_attrs *b)
3169{
3170 if (a->nice != b->nice)
3171 return false;
3172 if (!cpumask_equal(a->cpumask, b->cpumask))
3173 return false;
3174 return true;
3175}
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187static int init_worker_pool(struct worker_pool *pool)
3188{
3189 spin_lock_init(&pool->lock);
3190 pool->id = -1;
3191 pool->cpu = -1;
3192 pool->node = NUMA_NO_NODE;
3193 pool->flags |= POOL_DISASSOCIATED;
3194 pool->watchdog_ts = jiffies;
3195 INIT_LIST_HEAD(&pool->worklist);
3196 INIT_LIST_HEAD(&pool->idle_list);
3197 hash_init(pool->busy_hash);
3198
3199 init_timer_deferrable(&pool->idle_timer);
3200 pool->idle_timer.function = idle_worker_timeout;
3201 pool->idle_timer.data = (unsigned long)pool;
3202
3203 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3204 (unsigned long)pool);
3205
3206 mutex_init(&pool->manager_arb);
3207 mutex_init(&pool->attach_mutex);
3208 INIT_LIST_HEAD(&pool->workers);
3209
3210 ida_init(&pool->worker_ida);
3211 INIT_HLIST_NODE(&pool->hash_node);
3212 pool->refcnt = 1;
3213
3214
3215 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3216 if (!pool->attrs)
3217 return -ENOMEM;
3218 return 0;
3219}
3220
3221static void rcu_free_wq(struct rcu_head *rcu)
3222{
3223 struct workqueue_struct *wq =
3224 container_of(rcu, struct workqueue_struct, rcu);
3225
3226 if (!(wq->flags & WQ_UNBOUND))
3227 free_percpu(wq->cpu_pwqs);
3228 else
3229 free_workqueue_attrs(wq->unbound_attrs);
3230
3231 kfree(wq->rescuer);
3232 kfree(wq);
3233}
3234
3235static void rcu_free_pool(struct rcu_head *rcu)
3236{
3237 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3238
3239 ida_destroy(&pool->worker_ida);
3240 free_workqueue_attrs(pool->attrs);
3241 kfree(pool);
3242}
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255static void put_unbound_pool(struct worker_pool *pool)
3256{
3257 DECLARE_COMPLETION_ONSTACK(detach_completion);
3258 struct worker *worker;
3259
3260 lockdep_assert_held(&wq_pool_mutex);
3261
3262 if (--pool->refcnt)
3263 return;
3264
3265
3266 if (WARN_ON(!(pool->cpu < 0)) ||
3267 WARN_ON(!list_empty(&pool->worklist)))
3268 return;
3269
3270
3271 if (pool->id >= 0)
3272 idr_remove(&worker_pool_idr, pool->id);
3273 hash_del(&pool->hash_node);
3274
3275
3276
3277
3278
3279
3280 mutex_lock(&pool->manager_arb);
3281
3282 spin_lock_irq(&pool->lock);
3283 while ((worker = first_idle_worker(pool)))
3284 destroy_worker(worker);
3285 WARN_ON(pool->nr_workers || pool->nr_idle);
3286 spin_unlock_irq(&pool->lock);
3287
3288 mutex_lock(&pool->attach_mutex);
3289 if (!list_empty(&pool->workers))
3290 pool->detach_completion = &detach_completion;
3291 mutex_unlock(&pool->attach_mutex);
3292
3293 if (pool->detach_completion)
3294 wait_for_completion(pool->detach_completion);
3295
3296 mutex_unlock(&pool->manager_arb);
3297
3298
3299 del_timer_sync(&pool->idle_timer);
3300 del_timer_sync(&pool->mayday_timer);
3301
3302
3303 call_rcu_sched(&pool->rcu, rcu_free_pool);
3304}
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3321{
3322 u32 hash = wqattrs_hash(attrs);
3323 struct worker_pool *pool;
3324 int node;
3325 int target_node = NUMA_NO_NODE;
3326
3327 lockdep_assert_held(&wq_pool_mutex);
3328
3329
3330 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3331 if (wqattrs_equal(pool->attrs, attrs)) {
3332 pool->refcnt++;
3333 return pool;
3334 }
3335 }
3336
3337
3338 if (wq_numa_enabled) {
3339 for_each_node(node) {
3340 if (cpumask_subset(attrs->cpumask,
3341 wq_numa_possible_cpumask[node])) {
3342 target_node = node;
3343 break;
3344 }
3345 }
3346 }
3347
3348
3349 pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3350 if (!pool || init_worker_pool(pool) < 0)
3351 goto fail;
3352
3353 lockdep_set_subclass(&pool->lock, 1);
3354 copy_workqueue_attrs(pool->attrs, attrs);
3355 pool->node = target_node;
3356
3357
3358
3359
3360
3361 pool->attrs->no_numa = false;
3362
3363 if (worker_pool_assign_id(pool) < 0)
3364 goto fail;
3365
3366
3367 if (!create_worker(pool))
3368 goto fail;
3369
3370
3371 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3372
3373 return pool;
3374fail:
3375 if (pool)
3376 put_unbound_pool(pool);
3377 return NULL;
3378}
3379
3380static void rcu_free_pwq(struct rcu_head *rcu)
3381{
3382 kmem_cache_free(pwq_cache,
3383 container_of(rcu, struct pool_workqueue, rcu));
3384}
3385
3386
3387
3388
3389
3390static void pwq_unbound_release_workfn(struct work_struct *work)
3391{
3392 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3393 unbound_release_work);
3394 struct workqueue_struct *wq = pwq->wq;
3395 struct worker_pool *pool = pwq->pool;
3396 bool is_last;
3397
3398 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3399 return;
3400
3401 mutex_lock(&wq->mutex);
3402 list_del_rcu(&pwq->pwqs_node);
3403 is_last = list_empty(&wq->pwqs);
3404 mutex_unlock(&wq->mutex);
3405
3406 mutex_lock(&wq_pool_mutex);
3407 put_unbound_pool(pool);
3408 mutex_unlock(&wq_pool_mutex);
3409
3410 call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3411
3412
3413
3414
3415
3416 if (is_last)
3417 call_rcu_sched(&wq->rcu, rcu_free_wq);
3418}
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3429{
3430 struct workqueue_struct *wq = pwq->wq;
3431 bool freezable = wq->flags & WQ_FREEZABLE;
3432
3433
3434 lockdep_assert_held(&wq->mutex);
3435
3436
3437 if (!freezable && pwq->max_active == wq->saved_max_active)
3438 return;
3439
3440 spin_lock_irq(&pwq->pool->lock);
3441
3442
3443
3444
3445
3446
3447 if (!freezable || !workqueue_freezing) {
3448 pwq->max_active = wq->saved_max_active;
3449
3450 while (!list_empty(&pwq->delayed_works) &&
3451 pwq->nr_active < pwq->max_active)
3452 pwq_activate_first_delayed(pwq);
3453
3454
3455
3456
3457
3458 wake_up_worker(pwq->pool);
3459 } else {
3460 pwq->max_active = 0;
3461 }
3462
3463 spin_unlock_irq(&pwq->pool->lock);
3464}
3465
3466
3467static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3468 struct worker_pool *pool)
3469{
3470 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3471
3472 memset(pwq, 0, sizeof(*pwq));
3473
3474 pwq->pool = pool;
3475 pwq->wq = wq;
3476 pwq->flush_color = -1;
3477 pwq->refcnt = 1;
3478 INIT_LIST_HEAD(&pwq->delayed_works);
3479 INIT_LIST_HEAD(&pwq->pwqs_node);
3480 INIT_LIST_HEAD(&pwq->mayday_node);
3481 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3482}
3483
3484
3485static void link_pwq(struct pool_workqueue *pwq)
3486{
3487 struct workqueue_struct *wq = pwq->wq;
3488
3489 lockdep_assert_held(&wq->mutex);
3490
3491
3492 if (!list_empty(&pwq->pwqs_node))
3493 return;
3494
3495
3496 pwq->work_color = wq->work_color;
3497
3498
3499 pwq_adjust_max_active(pwq);
3500
3501
3502 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3503}
3504
3505
3506static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3507 const struct workqueue_attrs *attrs)
3508{
3509 struct worker_pool *pool;
3510 struct pool_workqueue *pwq;
3511
3512 lockdep_assert_held(&wq_pool_mutex);
3513
3514 pool = get_unbound_pool(attrs);
3515 if (!pool)
3516 return NULL;
3517
3518 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3519 if (!pwq) {
3520 put_unbound_pool(pool);
3521 return NULL;
3522 }
3523
3524 init_pwq(pwq, wq, pool);
3525 return pwq;
3526}
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3551 int cpu_going_down, cpumask_t *cpumask)
3552{
3553 if (!wq_numa_enabled || attrs->no_numa)
3554 goto use_dfl;
3555
3556
3557 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3558 if (cpu_going_down >= 0)
3559 cpumask_clear_cpu(cpu_going_down, cpumask);
3560
3561 if (cpumask_empty(cpumask))
3562 goto use_dfl;
3563
3564
3565 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3566 return !cpumask_equal(cpumask, attrs->cpumask);
3567
3568use_dfl:
3569 cpumask_copy(cpumask, attrs->cpumask);
3570 return false;
3571}
3572
3573
3574static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3575 int node,
3576 struct pool_workqueue *pwq)
3577{
3578 struct pool_workqueue *old_pwq;
3579
3580 lockdep_assert_held(&wq_pool_mutex);
3581 lockdep_assert_held(&wq->mutex);
3582
3583
3584 link_pwq(pwq);
3585
3586 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3587 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3588 return old_pwq;
3589}
3590
3591
3592struct apply_wqattrs_ctx {
3593 struct workqueue_struct *wq;
3594 struct workqueue_attrs *attrs;
3595 struct list_head list;
3596 struct pool_workqueue *dfl_pwq;
3597 struct pool_workqueue *pwq_tbl[];
3598};
3599
3600
3601static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3602{
3603 if (ctx) {
3604 int node;
3605
3606 for_each_node(node)
3607 put_pwq_unlocked(ctx->pwq_tbl[node]);
3608 put_pwq_unlocked(ctx->dfl_pwq);
3609
3610 free_workqueue_attrs(ctx->attrs);
3611
3612 kfree(ctx);
3613 }
3614}
3615
3616
3617static struct apply_wqattrs_ctx *
3618apply_wqattrs_prepare(struct workqueue_struct *wq,
3619 const struct workqueue_attrs *attrs)
3620{
3621 struct apply_wqattrs_ctx *ctx;
3622 struct workqueue_attrs *new_attrs, *tmp_attrs;
3623 int node;
3624
3625 lockdep_assert_held(&wq_pool_mutex);
3626
3627 ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
3628 GFP_KERNEL);
3629
3630 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3631 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3632 if (!ctx || !new_attrs || !tmp_attrs)
3633 goto out_free;
3634
3635
3636
3637
3638
3639
3640 copy_workqueue_attrs(new_attrs, attrs);
3641 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3642 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3643 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3644
3645
3646
3647
3648
3649
3650 copy_workqueue_attrs(tmp_attrs, new_attrs);
3651
3652
3653
3654
3655
3656
3657 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3658 if (!ctx->dfl_pwq)
3659 goto out_free;
3660
3661 for_each_node(node) {
3662 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3663 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3664 if (!ctx->pwq_tbl[node])
3665 goto out_free;
3666 } else {
3667 ctx->dfl_pwq->refcnt++;
3668 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3669 }
3670 }
3671
3672
3673 copy_workqueue_attrs(new_attrs, attrs);
3674 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3675 ctx->attrs = new_attrs;
3676
3677 ctx->wq = wq;
3678 free_workqueue_attrs(tmp_attrs);
3679 return ctx;
3680
3681out_free:
3682 free_workqueue_attrs(tmp_attrs);
3683 free_workqueue_attrs(new_attrs);
3684 apply_wqattrs_cleanup(ctx);
3685 return NULL;
3686}
3687
3688
3689static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3690{
3691 int node;
3692
3693
3694 mutex_lock(&ctx->wq->mutex);
3695
3696 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3697
3698
3699 for_each_node(node)
3700 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3701 ctx->pwq_tbl[node]);
3702
3703
3704 link_pwq(ctx->dfl_pwq);
3705 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3706
3707 mutex_unlock(&ctx->wq->mutex);
3708}
3709
3710static void apply_wqattrs_lock(void)
3711{
3712
3713 get_online_cpus();
3714 mutex_lock(&wq_pool_mutex);
3715}
3716
3717static void apply_wqattrs_unlock(void)
3718{
3719 mutex_unlock(&wq_pool_mutex);
3720 put_online_cpus();
3721}
3722
3723static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3724 const struct workqueue_attrs *attrs)
3725{
3726 struct apply_wqattrs_ctx *ctx;
3727
3728
3729 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3730 return -EINVAL;
3731
3732
3733 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
3734 return -EINVAL;
3735
3736 ctx = apply_wqattrs_prepare(wq, attrs);
3737 if (!ctx)
3738 return -ENOMEM;
3739
3740
3741 apply_wqattrs_commit(ctx);
3742 apply_wqattrs_cleanup(ctx);
3743
3744 return 0;
3745}
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763int apply_workqueue_attrs(struct workqueue_struct *wq,
3764 const struct workqueue_attrs *attrs)
3765{
3766 int ret;
3767
3768 apply_wqattrs_lock();
3769 ret = apply_workqueue_attrs_locked(wq, attrs);
3770 apply_wqattrs_unlock();
3771
3772 return ret;
3773}
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3798 bool online)
3799{
3800 int node = cpu_to_node(cpu);
3801 int cpu_off = online ? -1 : cpu;
3802 struct pool_workqueue *old_pwq = NULL, *pwq;
3803 struct workqueue_attrs *target_attrs;
3804 cpumask_t *cpumask;
3805
3806 lockdep_assert_held(&wq_pool_mutex);
3807
3808 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
3809 wq->unbound_attrs->no_numa)
3810 return;
3811
3812
3813
3814
3815
3816
3817 target_attrs = wq_update_unbound_numa_attrs_buf;
3818 cpumask = target_attrs->cpumask;
3819
3820 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3821 pwq = unbound_pwq_by_node(wq, node);
3822
3823
3824
3825
3826
3827
3828
3829 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
3830 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3831 return;
3832 } else {
3833 goto use_dfl_pwq;
3834 }
3835
3836
3837 pwq = alloc_unbound_pwq(wq, target_attrs);
3838 if (!pwq) {
3839 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3840 wq->name);
3841 goto use_dfl_pwq;
3842 }
3843
3844
3845 mutex_lock(&wq->mutex);
3846 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3847 goto out_unlock;
3848
3849use_dfl_pwq:
3850 mutex_lock(&wq->mutex);
3851 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3852 get_pwq(wq->dfl_pwq);
3853 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
3854 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
3855out_unlock:
3856 mutex_unlock(&wq->mutex);
3857 put_pwq_unlocked(old_pwq);
3858}
3859
3860static int alloc_and_link_pwqs(struct workqueue_struct *wq)
3861{
3862 bool highpri = wq->flags & WQ_HIGHPRI;
3863 int cpu, ret;
3864
3865 if (!(wq->flags & WQ_UNBOUND)) {
3866 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
3867 if (!wq->cpu_pwqs)
3868 return -ENOMEM;
3869
3870 for_each_possible_cpu(cpu) {
3871 struct pool_workqueue *pwq =
3872 per_cpu_ptr(wq->cpu_pwqs, cpu);
3873 struct worker_pool *cpu_pools =
3874 per_cpu(cpu_worker_pools, cpu);
3875
3876 init_pwq(pwq, wq, &cpu_pools[highpri]);
3877
3878 mutex_lock(&wq->mutex);
3879 link_pwq(pwq);
3880 mutex_unlock(&wq->mutex);
3881 }
3882 return 0;
3883 } else if (wq->flags & __WQ_ORDERED) {
3884 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
3885
3886 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
3887 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
3888 "ordering guarantee broken for workqueue %s\n", wq->name);
3889 return ret;
3890 } else {
3891 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
3892 }
3893}
3894
3895static int wq_clamp_max_active(int max_active, unsigned int flags,
3896 const char *name)
3897{
3898 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
3899
3900 if (max_active < 1 || max_active > lim)
3901 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
3902 max_active, name, 1, lim);
3903
3904 return clamp_val(max_active, 1, lim);
3905}
3906
3907struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3908 unsigned int flags,
3909 int max_active,
3910 struct lock_class_key *key,
3911 const char *lock_name, ...)
3912{
3913 size_t tbl_size = 0;
3914 va_list args;
3915 struct workqueue_struct *wq;
3916 struct pool_workqueue *pwq;
3917
3918
3919 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
3920 flags |= WQ_UNBOUND;
3921
3922
3923 if (flags & WQ_UNBOUND)
3924 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
3925
3926 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
3927 if (!wq)
3928 return NULL;
3929
3930 if (flags & WQ_UNBOUND) {
3931 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3932 if (!wq->unbound_attrs)
3933 goto err_free_wq;
3934 }
3935
3936 va_start(args, lock_name);
3937 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
3938 va_end(args);
3939
3940 max_active = max_active ?: WQ_DFL_ACTIVE;
3941 max_active = wq_clamp_max_active(max_active, flags, wq->name);
3942
3943
3944 wq->flags = flags;
3945 wq->saved_max_active = max_active;
3946 mutex_init(&wq->mutex);
3947 atomic_set(&wq->nr_pwqs_to_flush, 0);
3948 INIT_LIST_HEAD(&wq->pwqs);
3949 INIT_LIST_HEAD(&wq->flusher_queue);
3950 INIT_LIST_HEAD(&wq->flusher_overflow);
3951 INIT_LIST_HEAD(&wq->maydays);
3952
3953 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
3954 INIT_LIST_HEAD(&wq->list);
3955
3956 if (alloc_and_link_pwqs(wq) < 0)
3957 goto err_free_wq;
3958
3959
3960
3961
3962
3963 if (flags & WQ_MEM_RECLAIM) {
3964 struct worker *rescuer;
3965
3966 rescuer = alloc_worker(NUMA_NO_NODE);
3967 if (!rescuer)
3968 goto err_destroy;
3969
3970 rescuer->rescue_wq = wq;
3971 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
3972 wq->name);
3973 if (IS_ERR(rescuer->task)) {
3974 kfree(rescuer);
3975 goto err_destroy;
3976 }
3977
3978 wq->rescuer = rescuer;
3979 kthread_bind_mask(rescuer->task, cpu_possible_mask);
3980 wake_up_process(rescuer->task);
3981 }
3982
3983 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
3984 goto err_destroy;
3985
3986
3987
3988
3989
3990
3991 mutex_lock(&wq_pool_mutex);
3992
3993 mutex_lock(&wq->mutex);
3994 for_each_pwq(pwq, wq)
3995 pwq_adjust_max_active(pwq);
3996 mutex_unlock(&wq->mutex);
3997
3998 list_add_tail_rcu(&wq->list, &workqueues);
3999
4000 mutex_unlock(&wq_pool_mutex);
4001
4002 return wq;
4003
4004err_free_wq:
4005 free_workqueue_attrs(wq->unbound_attrs);
4006 kfree(wq);
4007 return NULL;
4008err_destroy:
4009 destroy_workqueue(wq);
4010 return NULL;
4011}
4012EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4013
4014
4015
4016
4017
4018
4019
4020void destroy_workqueue(struct workqueue_struct *wq)
4021{
4022 struct pool_workqueue *pwq;
4023 int node;
4024
4025
4026 drain_workqueue(wq);
4027
4028
4029 mutex_lock(&wq->mutex);
4030 for_each_pwq(pwq, wq) {
4031 int i;
4032
4033 for (i = 0; i < WORK_NR_COLORS; i++) {
4034 if (WARN_ON(pwq->nr_in_flight[i])) {
4035 mutex_unlock(&wq->mutex);
4036 return;
4037 }
4038 }
4039
4040 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4041 WARN_ON(pwq->nr_active) ||
4042 WARN_ON(!list_empty(&pwq->delayed_works))) {
4043 mutex_unlock(&wq->mutex);
4044 return;
4045 }
4046 }
4047 mutex_unlock(&wq->mutex);
4048
4049
4050
4051
4052
4053 mutex_lock(&wq_pool_mutex);
4054 list_del_rcu(&wq->list);
4055 mutex_unlock(&wq_pool_mutex);
4056
4057 workqueue_sysfs_unregister(wq);
4058
4059 if (wq->rescuer)
4060 kthread_stop(wq->rescuer->task);
4061
4062 if (!(wq->flags & WQ_UNBOUND)) {
4063
4064
4065
4066
4067 call_rcu_sched(&wq->rcu, rcu_free_wq);
4068 } else {
4069
4070
4071
4072
4073
4074 for_each_node(node) {
4075 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4076 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4077 put_pwq_unlocked(pwq);
4078 }
4079
4080
4081
4082
4083
4084 pwq = wq->dfl_pwq;
4085 wq->dfl_pwq = NULL;
4086 put_pwq_unlocked(pwq);
4087 }
4088}
4089EXPORT_SYMBOL_GPL(destroy_workqueue);
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4102{
4103 struct pool_workqueue *pwq;
4104
4105
4106 if (WARN_ON(wq->flags & __WQ_ORDERED))
4107 return;
4108
4109 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4110
4111 mutex_lock(&wq->mutex);
4112
4113 wq->saved_max_active = max_active;
4114
4115 for_each_pwq(pwq, wq)
4116 pwq_adjust_max_active(pwq);
4117
4118 mutex_unlock(&wq->mutex);
4119}
4120EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130bool current_is_workqueue_rescuer(void)
4131{
4132 struct worker *worker = current_wq_worker();
4133
4134 return worker && worker->rescue_wq;
4135}
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4156{
4157 struct pool_workqueue *pwq;
4158 bool ret;
4159
4160 rcu_read_lock_sched();
4161
4162 if (cpu == WORK_CPU_UNBOUND)
4163 cpu = smp_processor_id();
4164
4165 if (!(wq->flags & WQ_UNBOUND))
4166 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4167 else
4168 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4169
4170 ret = !list_empty(&pwq->delayed_works);
4171 rcu_read_unlock_sched();
4172
4173 return ret;
4174}
4175EXPORT_SYMBOL_GPL(workqueue_congested);
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188unsigned int work_busy(struct work_struct *work)
4189{
4190 struct worker_pool *pool;
4191 unsigned long flags;
4192 unsigned int ret = 0;
4193
4194 if (work_pending(work))
4195 ret |= WORK_BUSY_PENDING;
4196
4197 local_irq_save(flags);
4198 pool = get_work_pool(work);
4199 if (pool) {
4200 spin_lock(&pool->lock);
4201 if (find_worker_executing_work(pool, work))
4202 ret |= WORK_BUSY_RUNNING;
4203 spin_unlock(&pool->lock);
4204 }
4205 local_irq_restore(flags);
4206
4207 return ret;
4208}
4209EXPORT_SYMBOL_GPL(work_busy);
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221void set_worker_desc(const char *fmt, ...)
4222{
4223 struct worker *worker = current_wq_worker();
4224 va_list args;
4225
4226 if (worker) {
4227 va_start(args, fmt);
4228 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4229 va_end(args);
4230 worker->desc_valid = true;
4231 }
4232}
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247void print_worker_info(const char *log_lvl, struct task_struct *task)
4248{
4249 work_func_t *fn = NULL;
4250 char name[WQ_NAME_LEN] = { };
4251 char desc[WORKER_DESC_LEN] = { };
4252 struct pool_workqueue *pwq = NULL;
4253 struct workqueue_struct *wq = NULL;
4254 bool desc_valid = false;
4255 struct worker *worker;
4256
4257 if (!(task->flags & PF_WQ_WORKER))
4258 return;
4259
4260
4261
4262
4263
4264 worker = kthread_probe_data(task);
4265
4266
4267
4268
4269
4270 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4271 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4272 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4273 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4274
4275
4276 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4277 if (desc_valid)
4278 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4279
4280 if (fn || name[0] || desc[0]) {
4281 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4282 if (desc[0])
4283 pr_cont(" (%s)", desc);
4284 pr_cont("\n");
4285 }
4286}
4287
4288static void pr_cont_pool_info(struct worker_pool *pool)
4289{
4290 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
4291 if (pool->node != NUMA_NO_NODE)
4292 pr_cont(" node=%d", pool->node);
4293 pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
4294}
4295
4296static void pr_cont_work(bool comma, struct work_struct *work)
4297{
4298 if (work->func == wq_barrier_func) {
4299 struct wq_barrier *barr;
4300
4301 barr = container_of(work, struct wq_barrier, work);
4302
4303 pr_cont("%s BAR(%d)", comma ? "," : "",
4304 task_pid_nr(barr->task));
4305 } else {
4306 pr_cont("%s %pf", comma ? "," : "", work->func);
4307 }
4308}
4309
4310static void show_pwq(struct pool_workqueue *pwq)
4311{
4312 struct worker_pool *pool = pwq->pool;
4313 struct work_struct *work;
4314 struct worker *worker;
4315 bool has_in_flight = false, has_pending = false;
4316 int bkt;
4317
4318 pr_info(" pwq %d:", pool->id);
4319 pr_cont_pool_info(pool);
4320
4321 pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
4322 !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
4323
4324 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4325 if (worker->current_pwq == pwq) {
4326 has_in_flight = true;
4327 break;
4328 }
4329 }
4330 if (has_in_flight) {
4331 bool comma = false;
4332
4333 pr_info(" in-flight:");
4334 hash_for_each(pool->busy_hash, bkt, worker, hentry) {
4335 if (worker->current_pwq != pwq)
4336 continue;
4337
4338 pr_cont("%s %d%s:%pf", comma ? "," : "",
4339 task_pid_nr(worker->task),
4340 worker == pwq->wq->rescuer ? "(RESCUER)" : "",
4341 worker->current_func);
4342 list_for_each_entry(work, &worker->scheduled, entry)
4343 pr_cont_work(false, work);
4344 comma = true;
4345 }
4346 pr_cont("\n");
4347 }
4348
4349 list_for_each_entry(work, &pool->worklist, entry) {
4350 if (get_work_pwq(work) == pwq) {
4351 has_pending = true;
4352 break;
4353 }
4354 }
4355 if (has_pending) {
4356 bool comma = false;
4357
4358 pr_info(" pending:");
4359 list_for_each_entry(work, &pool->worklist, entry) {
4360 if (get_work_pwq(work) != pwq)
4361 continue;
4362
4363 pr_cont_work(comma, work);
4364 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4365 }
4366 pr_cont("\n");
4367 }
4368
4369 if (!list_empty(&pwq->delayed_works)) {
4370 bool comma = false;
4371
4372 pr_info(" delayed:");
4373 list_for_each_entry(work, &pwq->delayed_works, entry) {
4374 pr_cont_work(comma, work);
4375 comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
4376 }
4377 pr_cont("\n");
4378 }
4379}
4380
4381
4382
4383
4384
4385
4386
4387void show_workqueue_state(void)
4388{
4389 struct workqueue_struct *wq;
4390 struct worker_pool *pool;
4391 unsigned long flags;
4392 int pi;
4393
4394 rcu_read_lock_sched();
4395
4396 pr_info("Showing busy workqueues and worker pools:\n");
4397
4398 list_for_each_entry_rcu(wq, &workqueues, list) {
4399 struct pool_workqueue *pwq;
4400 bool idle = true;
4401
4402 for_each_pwq(pwq, wq) {
4403 if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
4404 idle = false;
4405 break;
4406 }
4407 }
4408 if (idle)
4409 continue;
4410
4411 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
4412
4413 for_each_pwq(pwq, wq) {
4414 spin_lock_irqsave(&pwq->pool->lock, flags);
4415 if (pwq->nr_active || !list_empty(&pwq->delayed_works))
4416 show_pwq(pwq);
4417 spin_unlock_irqrestore(&pwq->pool->lock, flags);
4418 }
4419 }
4420
4421 for_each_pool(pool, pi) {
4422 struct worker *worker;
4423 bool first = true;
4424
4425 spin_lock_irqsave(&pool->lock, flags);
4426 if (pool->nr_workers == pool->nr_idle)
4427 goto next_pool;
4428
4429 pr_info("pool %d:", pool->id);
4430 pr_cont_pool_info(pool);
4431 pr_cont(" hung=%us workers=%d",
4432 jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
4433 pool->nr_workers);
4434 if (pool->manager)
4435 pr_cont(" manager: %d",
4436 task_pid_nr(pool->manager->task));
4437 list_for_each_entry(worker, &pool->idle_list, entry) {
4438 pr_cont(" %s%d", first ? "idle: " : "",
4439 task_pid_nr(worker->task));
4440 first = false;
4441 }
4442 pr_cont("\n");
4443 next_pool:
4444 spin_unlock_irqrestore(&pool->lock, flags);
4445 }
4446
4447 rcu_read_unlock_sched();
4448}
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465static void wq_unbind_fn(struct work_struct *work)
4466{
4467 int cpu = smp_processor_id();
4468 struct worker_pool *pool;
4469 struct worker *worker;
4470
4471 for_each_cpu_worker_pool(pool, cpu) {
4472 mutex_lock(&pool->attach_mutex);
4473 spin_lock_irq(&pool->lock);
4474
4475
4476
4477
4478
4479
4480
4481
4482 for_each_pool_worker(worker, pool)
4483 worker->flags |= WORKER_UNBOUND;
4484
4485 pool->flags |= POOL_DISASSOCIATED;
4486
4487 spin_unlock_irq(&pool->lock);
4488 mutex_unlock(&pool->attach_mutex);
4489
4490
4491
4492
4493
4494
4495
4496 schedule();
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506 atomic_set(&pool->nr_running, 0);
4507
4508
4509
4510
4511
4512
4513 spin_lock_irq(&pool->lock);
4514 wake_up_worker(pool);
4515 spin_unlock_irq(&pool->lock);
4516 }
4517}
4518
4519
4520
4521
4522
4523
4524
4525static void rebind_workers(struct worker_pool *pool)
4526{
4527 struct worker *worker;
4528
4529 lockdep_assert_held(&pool->attach_mutex);
4530
4531
4532
4533
4534
4535
4536
4537
4538 for_each_pool_worker(worker, pool)
4539 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4540 pool->attrs->cpumask) < 0);
4541
4542 spin_lock_irq(&pool->lock);
4543
4544
4545
4546
4547
4548
4549 if (!(pool->flags & POOL_DISASSOCIATED)) {
4550 spin_unlock_irq(&pool->lock);
4551 return;
4552 }
4553
4554 pool->flags &= ~POOL_DISASSOCIATED;
4555
4556 for_each_pool_worker(worker, pool) {
4557 unsigned int worker_flags = worker->flags;
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567 if (worker_flags & WORKER_IDLE)
4568 wake_up_process(worker->task);
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4586 worker_flags |= WORKER_REBOUND;
4587 worker_flags &= ~WORKER_UNBOUND;
4588 ACCESS_ONCE(worker->flags) = worker_flags;
4589 }
4590
4591 spin_unlock_irq(&pool->lock);
4592}
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4605{
4606 static cpumask_t cpumask;
4607 struct worker *worker;
4608
4609 lockdep_assert_held(&pool->attach_mutex);
4610
4611
4612 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4613 return;
4614
4615 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
4616
4617
4618 for_each_pool_worker(worker, pool)
4619 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
4620}
4621
4622int workqueue_prepare_cpu(unsigned int cpu)
4623{
4624 struct worker_pool *pool;
4625
4626 for_each_cpu_worker_pool(pool, cpu) {
4627 if (pool->nr_workers)
4628 continue;
4629 if (!create_worker(pool))
4630 return -ENOMEM;
4631 }
4632 return 0;
4633}
4634
4635int workqueue_online_cpu(unsigned int cpu)
4636{
4637 struct worker_pool *pool;
4638 struct workqueue_struct *wq;
4639 int pi;
4640
4641 mutex_lock(&wq_pool_mutex);
4642
4643 for_each_pool(pool, pi) {
4644 mutex_lock(&pool->attach_mutex);
4645
4646 if (pool->cpu == cpu)
4647 rebind_workers(pool);
4648 else if (pool->cpu < 0)
4649 restore_unbound_workers_cpumask(pool, cpu);
4650
4651 mutex_unlock(&pool->attach_mutex);
4652 }
4653
4654
4655 list_for_each_entry(wq, &workqueues, list)
4656 wq_update_unbound_numa(wq, cpu, true);
4657
4658 mutex_unlock(&wq_pool_mutex);
4659 return 0;
4660}
4661
4662int workqueue_offline_cpu(unsigned int cpu)
4663{
4664 struct work_struct unbind_work;
4665 struct workqueue_struct *wq;
4666
4667
4668 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
4669 queue_work_on(cpu, system_highpri_wq, &unbind_work);
4670
4671
4672 mutex_lock(&wq_pool_mutex);
4673 list_for_each_entry(wq, &workqueues, list)
4674 wq_update_unbound_numa(wq, cpu, false);
4675 mutex_unlock(&wq_pool_mutex);
4676
4677
4678 flush_work(&unbind_work);
4679 destroy_work_on_stack(&unbind_work);
4680 return 0;
4681}
4682
4683#ifdef CONFIG_SMP
4684
4685struct work_for_cpu {
4686 struct work_struct work;
4687 long (*fn)(void *);
4688 void *arg;
4689 long ret;
4690};
4691
4692static void work_for_cpu_fn(struct work_struct *work)
4693{
4694 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
4695
4696 wfc->ret = wfc->fn(wfc->arg);
4697}
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4711{
4712 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
4713
4714 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4715 schedule_work_on(cpu, &wfc.work);
4716 flush_work(&wfc.work);
4717 destroy_work_on_stack(&wfc.work);
4718 return wfc.ret;
4719}
4720EXPORT_SYMBOL_GPL(work_on_cpu);
4721#endif
4722
4723#ifdef CONFIG_FREEZER
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735void freeze_workqueues_begin(void)
4736{
4737 struct workqueue_struct *wq;
4738 struct pool_workqueue *pwq;
4739
4740 mutex_lock(&wq_pool_mutex);
4741
4742 WARN_ON_ONCE(workqueue_freezing);
4743 workqueue_freezing = true;
4744
4745 list_for_each_entry(wq, &workqueues, list) {
4746 mutex_lock(&wq->mutex);
4747 for_each_pwq(pwq, wq)
4748 pwq_adjust_max_active(pwq);
4749 mutex_unlock(&wq->mutex);
4750 }
4751
4752 mutex_unlock(&wq_pool_mutex);
4753}
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768bool freeze_workqueues_busy(void)
4769{
4770 bool busy = false;
4771 struct workqueue_struct *wq;
4772 struct pool_workqueue *pwq;
4773
4774 mutex_lock(&wq_pool_mutex);
4775
4776 WARN_ON_ONCE(!workqueue_freezing);
4777
4778 list_for_each_entry(wq, &workqueues, list) {
4779 if (!(wq->flags & WQ_FREEZABLE))
4780 continue;
4781
4782
4783
4784
4785 rcu_read_lock_sched();
4786 for_each_pwq(pwq, wq) {
4787 WARN_ON_ONCE(pwq->nr_active < 0);
4788 if (pwq->nr_active) {
4789 busy = true;
4790 rcu_read_unlock_sched();
4791 goto out_unlock;
4792 }
4793 }
4794 rcu_read_unlock_sched();
4795 }
4796out_unlock:
4797 mutex_unlock(&wq_pool_mutex);
4798 return busy;
4799}
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810void thaw_workqueues(void)
4811{
4812 struct workqueue_struct *wq;
4813 struct pool_workqueue *pwq;
4814
4815 mutex_lock(&wq_pool_mutex);
4816
4817 if (!workqueue_freezing)
4818 goto out_unlock;
4819
4820 workqueue_freezing = false;
4821
4822
4823 list_for_each_entry(wq, &workqueues, list) {
4824 mutex_lock(&wq->mutex);
4825 for_each_pwq(pwq, wq)
4826 pwq_adjust_max_active(pwq);
4827 mutex_unlock(&wq->mutex);
4828 }
4829
4830out_unlock:
4831 mutex_unlock(&wq_pool_mutex);
4832}
4833#endif
4834
4835static int workqueue_apply_unbound_cpumask(void)
4836{
4837 LIST_HEAD(ctxs);
4838 int ret = 0;
4839 struct workqueue_struct *wq;
4840 struct apply_wqattrs_ctx *ctx, *n;
4841
4842 lockdep_assert_held(&wq_pool_mutex);
4843
4844 list_for_each_entry(wq, &workqueues, list) {
4845 if (!(wq->flags & WQ_UNBOUND))
4846 continue;
4847
4848 if (wq->flags & __WQ_ORDERED)
4849 continue;
4850
4851 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
4852 if (!ctx) {
4853 ret = -ENOMEM;
4854 break;
4855 }
4856
4857 list_add_tail(&ctx->list, &ctxs);
4858 }
4859
4860 list_for_each_entry_safe(ctx, n, &ctxs, list) {
4861 if (!ret)
4862 apply_wqattrs_commit(ctx);
4863 apply_wqattrs_cleanup(ctx);
4864 }
4865
4866 return ret;
4867}
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
4882{
4883 int ret = -EINVAL;
4884 cpumask_var_t saved_cpumask;
4885
4886 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
4887 return -ENOMEM;
4888
4889 cpumask_and(cpumask, cpumask, cpu_possible_mask);
4890 if (!cpumask_empty(cpumask)) {
4891 apply_wqattrs_lock();
4892
4893
4894 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
4895
4896
4897 cpumask_copy(wq_unbound_cpumask, cpumask);
4898 ret = workqueue_apply_unbound_cpumask();
4899
4900
4901 if (ret < 0)
4902 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
4903
4904 apply_wqattrs_unlock();
4905 }
4906
4907 free_cpumask_var(saved_cpumask);
4908 return ret;
4909}
4910
4911#ifdef CONFIG_SYSFS
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926struct wq_device {
4927 struct workqueue_struct *wq;
4928 struct device dev;
4929};
4930
4931static struct workqueue_struct *dev_to_wq(struct device *dev)
4932{
4933 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
4934
4935 return wq_dev->wq;
4936}
4937
4938static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
4939 char *buf)
4940{
4941 struct workqueue_struct *wq = dev_to_wq(dev);
4942
4943 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
4944}
4945static DEVICE_ATTR_RO(per_cpu);
4946
4947static ssize_t max_active_show(struct device *dev,
4948 struct device_attribute *attr, char *buf)
4949{
4950 struct workqueue_struct *wq = dev_to_wq(dev);
4951
4952 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
4953}
4954
4955static ssize_t max_active_store(struct device *dev,
4956 struct device_attribute *attr, const char *buf,
4957 size_t count)
4958{
4959 struct workqueue_struct *wq = dev_to_wq(dev);
4960 int val;
4961
4962 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
4963 return -EINVAL;
4964
4965 workqueue_set_max_active(wq, val);
4966 return count;
4967}
4968static DEVICE_ATTR_RW(max_active);
4969
4970static struct attribute *wq_sysfs_attrs[] = {
4971 &dev_attr_per_cpu.attr,
4972 &dev_attr_max_active.attr,
4973 NULL,
4974};
4975ATTRIBUTE_GROUPS(wq_sysfs);
4976
4977static ssize_t wq_pool_ids_show(struct device *dev,
4978 struct device_attribute *attr, char *buf)
4979{
4980 struct workqueue_struct *wq = dev_to_wq(dev);
4981 const char *delim = "";
4982 int node, written = 0;
4983
4984 rcu_read_lock_sched();
4985 for_each_node(node) {
4986 written += scnprintf(buf + written, PAGE_SIZE - written,
4987 "%s%d:%d", delim, node,
4988 unbound_pwq_by_node(wq, node)->pool->id);
4989 delim = " ";
4990 }
4991 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
4992 rcu_read_unlock_sched();
4993
4994 return written;
4995}
4996
4997static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
4998 char *buf)
4999{
5000 struct workqueue_struct *wq = dev_to_wq(dev);
5001 int written;
5002
5003 mutex_lock(&wq->mutex);
5004 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5005 mutex_unlock(&wq->mutex);
5006
5007 return written;
5008}
5009
5010
5011static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5012{
5013 struct workqueue_attrs *attrs;
5014
5015 lockdep_assert_held(&wq_pool_mutex);
5016
5017 attrs = alloc_workqueue_attrs(GFP_KERNEL);
5018 if (!attrs)
5019 return NULL;
5020
5021 copy_workqueue_attrs(attrs, wq->unbound_attrs);
5022 return attrs;
5023}
5024
5025static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5026 const char *buf, size_t count)
5027{
5028 struct workqueue_struct *wq = dev_to_wq(dev);
5029 struct workqueue_attrs *attrs;
5030 int ret = -ENOMEM;
5031
5032 apply_wqattrs_lock();
5033
5034 attrs = wq_sysfs_prep_attrs(wq);
5035 if (!attrs)
5036 goto out_unlock;
5037
5038 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5039 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5040 ret = apply_workqueue_attrs_locked(wq, attrs);
5041 else
5042 ret = -EINVAL;
5043
5044out_unlock:
5045 apply_wqattrs_unlock();
5046 free_workqueue_attrs(attrs);
5047 return ret ?: count;
5048}
5049
5050static ssize_t wq_cpumask_show(struct device *dev,
5051 struct device_attribute *attr, char *buf)
5052{
5053 struct workqueue_struct *wq = dev_to_wq(dev);
5054 int written;
5055
5056 mutex_lock(&wq->mutex);
5057 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5058 cpumask_pr_args(wq->unbound_attrs->cpumask));
5059 mutex_unlock(&wq->mutex);
5060 return written;
5061}
5062
5063static ssize_t wq_cpumask_store(struct device *dev,
5064 struct device_attribute *attr,
5065 const char *buf, size_t count)
5066{
5067 struct workqueue_struct *wq = dev_to_wq(dev);
5068 struct workqueue_attrs *attrs;
5069 int ret = -ENOMEM;
5070
5071 apply_wqattrs_lock();
5072
5073 attrs = wq_sysfs_prep_attrs(wq);
5074 if (!attrs)
5075 goto out_unlock;
5076
5077 ret = cpumask_parse(buf, attrs->cpumask);
5078 if (!ret)
5079 ret = apply_workqueue_attrs_locked(wq, attrs);
5080
5081out_unlock:
5082 apply_wqattrs_unlock();
5083 free_workqueue_attrs(attrs);
5084 return ret ?: count;
5085}
5086
5087static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5088 char *buf)
5089{
5090 struct workqueue_struct *wq = dev_to_wq(dev);
5091 int written;
5092
5093 mutex_lock(&wq->mutex);
5094 written = scnprintf(buf, PAGE_SIZE, "%d\n",
5095 !wq->unbound_attrs->no_numa);
5096 mutex_unlock(&wq->mutex);
5097
5098 return written;
5099}
5100
5101static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5102 const char *buf, size_t count)
5103{
5104 struct workqueue_struct *wq = dev_to_wq(dev);
5105 struct workqueue_attrs *attrs;
5106 int v, ret = -ENOMEM;
5107
5108 apply_wqattrs_lock();
5109
5110 attrs = wq_sysfs_prep_attrs(wq);
5111 if (!attrs)
5112 goto out_unlock;
5113
5114 ret = -EINVAL;
5115 if (sscanf(buf, "%d", &v) == 1) {
5116 attrs->no_numa = !v;
5117 ret = apply_workqueue_attrs_locked(wq, attrs);
5118 }
5119
5120out_unlock:
5121 apply_wqattrs_unlock();
5122 free_workqueue_attrs(attrs);
5123 return ret ?: count;
5124}
5125
5126static struct device_attribute wq_sysfs_unbound_attrs[] = {
5127 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
5128 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
5129 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
5130 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
5131 __ATTR_NULL,
5132};
5133
5134static struct bus_type wq_subsys = {
5135 .name = "workqueue",
5136 .dev_groups = wq_sysfs_groups,
5137};
5138
5139static ssize_t wq_unbound_cpumask_show(struct device *dev,
5140 struct device_attribute *attr, char *buf)
5141{
5142 int written;
5143
5144 mutex_lock(&wq_pool_mutex);
5145 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5146 cpumask_pr_args(wq_unbound_cpumask));
5147 mutex_unlock(&wq_pool_mutex);
5148
5149 return written;
5150}
5151
5152static ssize_t wq_unbound_cpumask_store(struct device *dev,
5153 struct device_attribute *attr, const char *buf, size_t count)
5154{
5155 cpumask_var_t cpumask;
5156 int ret;
5157
5158 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5159 return -ENOMEM;
5160
5161 ret = cpumask_parse(buf, cpumask);
5162 if (!ret)
5163 ret = workqueue_set_unbound_cpumask(cpumask);
5164
5165 free_cpumask_var(cpumask);
5166 return ret ? ret : count;
5167}
5168
5169static struct device_attribute wq_sysfs_cpumask_attr =
5170 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5171 wq_unbound_cpumask_store);
5172
5173static int __init wq_sysfs_init(void)
5174{
5175 int err;
5176
5177 err = subsys_virtual_register(&wq_subsys, NULL);
5178 if (err)
5179 return err;
5180
5181 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
5182}
5183core_initcall(wq_sysfs_init);
5184
5185static void wq_device_release(struct device *dev)
5186{
5187 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5188
5189 kfree(wq_dev);
5190}
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207int workqueue_sysfs_register(struct workqueue_struct *wq)
5208{
5209 struct wq_device *wq_dev;
5210 int ret;
5211
5212
5213
5214
5215
5216
5217 if (WARN_ON(wq->flags & __WQ_ORDERED))
5218 return -EINVAL;
5219
5220 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
5221 if (!wq_dev)
5222 return -ENOMEM;
5223
5224 wq_dev->wq = wq;
5225 wq_dev->dev.bus = &wq_subsys;
5226 wq_dev->dev.release = wq_device_release;
5227 dev_set_name(&wq_dev->dev, "%s", wq->name);
5228
5229
5230
5231
5232
5233 dev_set_uevent_suppress(&wq_dev->dev, true);
5234
5235 ret = device_register(&wq_dev->dev);
5236 if (ret) {
5237 kfree(wq_dev);
5238 wq->wq_dev = NULL;
5239 return ret;
5240 }
5241
5242 if (wq->flags & WQ_UNBOUND) {
5243 struct device_attribute *attr;
5244
5245 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
5246 ret = device_create_file(&wq_dev->dev, attr);
5247 if (ret) {
5248 device_unregister(&wq_dev->dev);
5249 wq->wq_dev = NULL;
5250 return ret;
5251 }
5252 }
5253 }
5254
5255 dev_set_uevent_suppress(&wq_dev->dev, false);
5256 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
5257 return 0;
5258}
5259
5260
5261
5262
5263
5264
5265
5266static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
5267{
5268 struct wq_device *wq_dev = wq->wq_dev;
5269
5270 if (!wq->wq_dev)
5271 return;
5272
5273 wq->wq_dev = NULL;
5274 device_unregister(&wq_dev->dev);
5275}
5276#else
5277static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
5278#endif
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297#ifdef CONFIG_WQ_WATCHDOG
5298
5299static void wq_watchdog_timer_fn(unsigned long data);
5300
5301static unsigned long wq_watchdog_thresh = 30;
5302static struct timer_list wq_watchdog_timer =
5303 TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
5304
5305static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
5306static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
5307
5308static void wq_watchdog_reset_touched(void)
5309{
5310 int cpu;
5311
5312 wq_watchdog_touched = jiffies;
5313 for_each_possible_cpu(cpu)
5314 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5315}
5316
5317static void wq_watchdog_timer_fn(unsigned long data)
5318{
5319 unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
5320 bool lockup_detected = false;
5321 struct worker_pool *pool;
5322 int pi;
5323
5324 if (!thresh)
5325 return;
5326
5327 rcu_read_lock();
5328
5329 for_each_pool(pool, pi) {
5330 unsigned long pool_ts, touched, ts;
5331
5332 if (list_empty(&pool->worklist))
5333 continue;
5334
5335
5336 pool_ts = READ_ONCE(pool->watchdog_ts);
5337 touched = READ_ONCE(wq_watchdog_touched);
5338
5339 if (time_after(pool_ts, touched))
5340 ts = pool_ts;
5341 else
5342 ts = touched;
5343
5344 if (pool->cpu >= 0) {
5345 unsigned long cpu_touched =
5346 READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
5347 pool->cpu));
5348 if (time_after(cpu_touched, ts))
5349 ts = cpu_touched;
5350 }
5351
5352
5353 if (time_after(jiffies, ts + thresh)) {
5354 lockup_detected = true;
5355 pr_emerg("BUG: workqueue lockup - pool");
5356 pr_cont_pool_info(pool);
5357 pr_cont(" stuck for %us!\n",
5358 jiffies_to_msecs(jiffies - pool_ts) / 1000);
5359 }
5360 }
5361
5362 rcu_read_unlock();
5363
5364 if (lockup_detected)
5365 show_workqueue_state();
5366
5367 wq_watchdog_reset_touched();
5368 mod_timer(&wq_watchdog_timer, jiffies + thresh);
5369}
5370
5371void wq_watchdog_touch(int cpu)
5372{
5373 if (cpu >= 0)
5374 per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
5375 else
5376 wq_watchdog_touched = jiffies;
5377}
5378
5379static void wq_watchdog_set_thresh(unsigned long thresh)
5380{
5381 wq_watchdog_thresh = 0;
5382 del_timer_sync(&wq_watchdog_timer);
5383
5384 if (thresh) {
5385 wq_watchdog_thresh = thresh;
5386 wq_watchdog_reset_touched();
5387 mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
5388 }
5389}
5390
5391static int wq_watchdog_param_set_thresh(const char *val,
5392 const struct kernel_param *kp)
5393{
5394 unsigned long thresh;
5395 int ret;
5396
5397 ret = kstrtoul(val, 0, &thresh);
5398 if (ret)
5399 return ret;
5400
5401 if (system_wq)
5402 wq_watchdog_set_thresh(thresh);
5403 else
5404 wq_watchdog_thresh = thresh;
5405
5406 return 0;
5407}
5408
5409static const struct kernel_param_ops wq_watchdog_thresh_ops = {
5410 .set = wq_watchdog_param_set_thresh,
5411 .get = param_get_ulong,
5412};
5413
5414module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
5415 0644);
5416
5417static void wq_watchdog_init(void)
5418{
5419 wq_watchdog_set_thresh(wq_watchdog_thresh);
5420}
5421
5422#else
5423
5424static inline void wq_watchdog_init(void) { }
5425
5426#endif
5427
5428static void __init wq_numa_init(void)
5429{
5430 cpumask_var_t *tbl;
5431 int node, cpu;
5432
5433 if (num_possible_nodes() <= 1)
5434 return;
5435
5436 if (wq_disable_numa) {
5437 pr_info("workqueue: NUMA affinity support disabled\n");
5438 return;
5439 }
5440
5441 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
5442 BUG_ON(!wq_update_unbound_numa_attrs_buf);
5443
5444
5445
5446
5447
5448
5449 tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
5450 BUG_ON(!tbl);
5451
5452 for_each_node(node)
5453 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
5454 node_online(node) ? node : NUMA_NO_NODE));
5455
5456 for_each_possible_cpu(cpu) {
5457 node = cpu_to_node(cpu);
5458 if (WARN_ON(node == NUMA_NO_NODE)) {
5459 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
5460
5461 return;
5462 }
5463 cpumask_set_cpu(cpu, tbl[node]);
5464 }
5465
5466 wq_numa_possible_cpumask = tbl;
5467 wq_numa_enabled = true;
5468}
5469
5470static int __init init_workqueues(void)
5471{
5472 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
5473 int i, cpu;
5474
5475 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5476
5477 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5478 cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
5479
5480 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5481
5482 wq_numa_init();
5483
5484
5485 for_each_possible_cpu(cpu) {
5486 struct worker_pool *pool;
5487
5488 i = 0;
5489 for_each_cpu_worker_pool(pool, cpu) {
5490 BUG_ON(init_worker_pool(pool));
5491 pool->cpu = cpu;
5492 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
5493 pool->attrs->nice = std_nice[i++];
5494 pool->node = cpu_to_node(cpu);
5495
5496
5497 mutex_lock(&wq_pool_mutex);
5498 BUG_ON(worker_pool_assign_id(pool));
5499 mutex_unlock(&wq_pool_mutex);
5500 }
5501 }
5502
5503
5504 for_each_online_cpu(cpu) {
5505 struct worker_pool *pool;
5506
5507 for_each_cpu_worker_pool(pool, cpu) {
5508 pool->flags &= ~POOL_DISASSOCIATED;
5509 BUG_ON(!create_worker(pool));
5510 }
5511 }
5512
5513
5514 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
5515 struct workqueue_attrs *attrs;
5516
5517 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5518 attrs->nice = std_nice[i];
5519 unbound_std_wq_attrs[i] = attrs;
5520
5521
5522
5523
5524
5525
5526 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
5527 attrs->nice = std_nice[i];
5528 attrs->no_numa = true;
5529 ordered_wq_attrs[i] = attrs;
5530 }
5531
5532 system_wq = alloc_workqueue("events", 0, 0);
5533 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
5534 system_long_wq = alloc_workqueue("events_long", 0, 0);
5535 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
5536 WQ_UNBOUND_MAX_ACTIVE);
5537 system_freezable_wq = alloc_workqueue("events_freezable",
5538 WQ_FREEZABLE, 0);
5539 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
5540 WQ_POWER_EFFICIENT, 0);
5541 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
5542 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
5543 0);
5544 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
5545 !system_unbound_wq || !system_freezable_wq ||
5546 !system_power_efficient_wq ||
5547 !system_freezable_power_efficient_wq);
5548
5549 wq_watchdog_init();
5550
5551 return 0;
5552}
5553early_initcall(init_workqueues);
5554