1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/export.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
38#include <linux/hardirq.h>
39#include <linux/mempolicy.h>
40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51
52#include "workqueue_internal.h"
53
54enum {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 POOL_DISASSOCIATED = 1 << 2,
72
73
74 WORKER_DIE = 1 << 1,
75 WORKER_IDLE = 1 << 2,
76 WORKER_PREP = 1 << 3,
77 WORKER_CPU_INTENSIVE = 1 << 6,
78 WORKER_UNBOUND = 1 << 7,
79 WORKER_REBOUND = 1 << 8,
80
81 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
82 WORKER_UNBOUND | WORKER_REBOUND,
83
84 NR_STD_WORKER_POOLS = 2,
85
86 UNBOUND_POOL_HASH_ORDER = 6,
87 BUSY_WORKER_HASH_ORDER = 6,
88
89 MAX_IDLE_WORKERS_RATIO = 4,
90 IDLE_WORKER_TIMEOUT = 300 * HZ,
91
92 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
93
94
95 MAYDAY_INTERVAL = HZ / 10,
96 CREATE_COOLDOWN = HZ,
97
98
99
100
101
102 RESCUER_NICE_LEVEL = MIN_NICE,
103 HIGHPRI_NICE_LEVEL = MIN_NICE,
104
105 WQ_NAME_LEN = 24,
106};
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139struct worker_pool {
140 spinlock_t lock;
141 int cpu;
142 int node;
143 int id;
144 unsigned int flags;
145
146 struct list_head worklist;
147 int nr_workers;
148
149
150 int nr_idle;
151
152 struct list_head idle_list;
153 struct timer_list idle_timer;
154 struct timer_list mayday_timer;
155
156
157 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
158
159
160
161 struct mutex manager_arb;
162 struct mutex attach_mutex;
163 struct list_head workers;
164 struct completion *detach_completion;
165
166 struct ida worker_ida;
167
168 struct workqueue_attrs *attrs;
169 struct hlist_node hash_node;
170 int refcnt;
171
172
173
174
175
176
177 atomic_t nr_running ____cacheline_aligned_in_smp;
178
179
180
181
182
183 struct rcu_head rcu;
184} ____cacheline_aligned_in_smp;
185
186
187
188
189
190
191
192struct pool_workqueue {
193 struct worker_pool *pool;
194 struct workqueue_struct *wq;
195 int work_color;
196 int flush_color;
197 int refcnt;
198 int nr_in_flight[WORK_NR_COLORS];
199
200 int nr_active;
201 int max_active;
202 struct list_head delayed_works;
203 struct list_head pwqs_node;
204 struct list_head mayday_node;
205
206
207
208
209
210
211
212 struct work_struct unbound_release_work;
213 struct rcu_head rcu;
214} __aligned(1 << WORK_STRUCT_FLAG_BITS);
215
216
217
218
219struct wq_flusher {
220 struct list_head list;
221 int flush_color;
222 struct completion done;
223};
224
225struct wq_device;
226
227
228
229
230
231struct workqueue_struct {
232 struct list_head pwqs;
233 struct list_head list;
234
235 struct mutex mutex;
236 int work_color;
237 int flush_color;
238 atomic_t nr_pwqs_to_flush;
239 struct wq_flusher *first_flusher;
240 struct list_head flusher_queue;
241 struct list_head flusher_overflow;
242
243 struct list_head maydays;
244 struct worker *rescuer;
245
246 int nr_drainers;
247 int saved_max_active;
248
249 struct workqueue_attrs *unbound_attrs;
250 struct pool_workqueue *dfl_pwq;
251
252#ifdef CONFIG_SYSFS
253 struct wq_device *wq_dev;
254#endif
255#ifdef CONFIG_LOCKDEP
256 struct lockdep_map lockdep_map;
257#endif
258 char name[WQ_NAME_LEN];
259
260
261 unsigned int flags ____cacheline_aligned;
262 struct pool_workqueue __percpu *cpu_pwqs;
263 struct pool_workqueue __rcu *numa_pwq_tbl[];
264};
265
266static struct kmem_cache *pwq_cache;
267
268static cpumask_var_t *wq_numa_possible_cpumask;
269
270
271static bool wq_disable_numa;
272module_param_named(disable_numa, wq_disable_numa, bool, 0444);
273
274
275#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
276static bool wq_power_efficient = true;
277#else
278static bool wq_power_efficient;
279#endif
280
281module_param_named(power_efficient, wq_power_efficient, bool, 0444);
282
283static bool wq_numa_enabled;
284
285
286static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
287
288static DEFINE_MUTEX(wq_pool_mutex);
289static DEFINE_SPINLOCK(wq_mayday_lock);
290
291static LIST_HEAD(workqueues);
292static bool workqueue_freezing;
293
294
295static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
296 cpu_worker_pools);
297
298static DEFINE_IDR(worker_pool_idr);
299
300
301static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
302
303
304static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
305
306
307static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
308
309struct workqueue_struct *system_wq __read_mostly;
310EXPORT_SYMBOL(system_wq);
311struct workqueue_struct *system_highpri_wq __read_mostly;
312EXPORT_SYMBOL_GPL(system_highpri_wq);
313struct workqueue_struct *system_long_wq __read_mostly;
314EXPORT_SYMBOL_GPL(system_long_wq);
315struct workqueue_struct *system_unbound_wq __read_mostly;
316EXPORT_SYMBOL_GPL(system_unbound_wq);
317struct workqueue_struct *system_freezable_wq __read_mostly;
318EXPORT_SYMBOL_GPL(system_freezable_wq);
319struct workqueue_struct *system_power_efficient_wq __read_mostly;
320EXPORT_SYMBOL_GPL(system_power_efficient_wq);
321struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
322EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
323
324static int worker_thread(void *__worker);
325static void copy_workqueue_attrs(struct workqueue_attrs *to,
326 const struct workqueue_attrs *from);
327
328#define CREATE_TRACE_POINTS
329#include <trace/events/workqueue.h>
330
331#define assert_rcu_or_pool_mutex() \
332 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
333 lockdep_is_held(&wq_pool_mutex), \
334 "sched RCU or wq_pool_mutex should be held")
335
336#define assert_rcu_or_wq_mutex(wq) \
337 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
338 lockdep_is_held(&wq->mutex), \
339 "sched RCU or wq->mutex should be held")
340
341#define for_each_cpu_worker_pool(pool, cpu) \
342 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
343 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
344 (pool)++)
345
346
347
348
349
350
351
352
353
354
355
356
357
358#define for_each_pool(pool, pi) \
359 idr_for_each_entry(&worker_pool_idr, pool, pi) \
360 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
361 else
362
363
364
365
366
367
368
369
370
371
372
373#define for_each_pool_worker(worker, pool) \
374 list_for_each_entry((worker), &(pool)->workers, node) \
375 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
376 else
377
378
379
380
381
382
383
384
385
386
387
388
389
390#define for_each_pwq(pwq, wq) \
391 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
392 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
393 else
394
395#ifdef CONFIG_DEBUG_OBJECTS_WORK
396
397static struct debug_obj_descr work_debug_descr;
398
399static void *work_debug_hint(void *addr)
400{
401 return ((struct work_struct *) addr)->func;
402}
403
404
405
406
407
408static int work_fixup_init(void *addr, enum debug_obj_state state)
409{
410 struct work_struct *work = addr;
411
412 switch (state) {
413 case ODEBUG_STATE_ACTIVE:
414 cancel_work_sync(work);
415 debug_object_init(work, &work_debug_descr);
416 return 1;
417 default:
418 return 0;
419 }
420}
421
422
423
424
425
426
427static int work_fixup_activate(void *addr, enum debug_obj_state state)
428{
429 struct work_struct *work = addr;
430
431 switch (state) {
432
433 case ODEBUG_STATE_NOTAVAILABLE:
434
435
436
437
438
439 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
440 debug_object_init(work, &work_debug_descr);
441 debug_object_activate(work, &work_debug_descr);
442 return 0;
443 }
444 WARN_ON_ONCE(1);
445 return 0;
446
447 case ODEBUG_STATE_ACTIVE:
448 WARN_ON(1);
449
450 default:
451 return 0;
452 }
453}
454
455
456
457
458
459static int work_fixup_free(void *addr, enum debug_obj_state state)
460{
461 struct work_struct *work = addr;
462
463 switch (state) {
464 case ODEBUG_STATE_ACTIVE:
465 cancel_work_sync(work);
466 debug_object_free(work, &work_debug_descr);
467 return 1;
468 default:
469 return 0;
470 }
471}
472
473static struct debug_obj_descr work_debug_descr = {
474 .name = "work_struct",
475 .debug_hint = work_debug_hint,
476 .fixup_init = work_fixup_init,
477 .fixup_activate = work_fixup_activate,
478 .fixup_free = work_fixup_free,
479};
480
481static inline void debug_work_activate(struct work_struct *work)
482{
483 debug_object_activate(work, &work_debug_descr);
484}
485
486static inline void debug_work_deactivate(struct work_struct *work)
487{
488 debug_object_deactivate(work, &work_debug_descr);
489}
490
491void __init_work(struct work_struct *work, int onstack)
492{
493 if (onstack)
494 debug_object_init_on_stack(work, &work_debug_descr);
495 else
496 debug_object_init(work, &work_debug_descr);
497}
498EXPORT_SYMBOL_GPL(__init_work);
499
500void destroy_work_on_stack(struct work_struct *work)
501{
502 debug_object_free(work, &work_debug_descr);
503}
504EXPORT_SYMBOL_GPL(destroy_work_on_stack);
505
506void destroy_delayed_work_on_stack(struct delayed_work *work)
507{
508 destroy_timer_on_stack(&work->timer);
509 debug_object_free(&work->work, &work_debug_descr);
510}
511EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
512
513#else
514static inline void debug_work_activate(struct work_struct *work) { }
515static inline void debug_work_deactivate(struct work_struct *work) { }
516#endif
517
518
519
520
521
522
523
524
525static int worker_pool_assign_id(struct worker_pool *pool)
526{
527 int ret;
528
529 lockdep_assert_held(&wq_pool_mutex);
530
531 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
532 GFP_KERNEL);
533 if (ret >= 0) {
534 pool->id = ret;
535 return 0;
536 }
537 return ret;
538}
539
540
541
542
543
544
545
546
547
548
549
550
551static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
552 int node)
553{
554 assert_rcu_or_wq_mutex(wq);
555 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
556}
557
558static unsigned int work_color_to_flags(int color)
559{
560 return color << WORK_STRUCT_COLOR_SHIFT;
561}
562
563static int get_work_color(struct work_struct *work)
564{
565 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
566 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
567}
568
569static int work_next_color(int color)
570{
571 return (color + 1) % WORK_NR_COLORS;
572}
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594static inline void set_work_data(struct work_struct *work, unsigned long data,
595 unsigned long flags)
596{
597 WARN_ON_ONCE(!work_pending(work));
598 atomic_long_set(&work->data, data | flags | work_static(work));
599}
600
601static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
602 unsigned long extra_flags)
603{
604 set_work_data(work, (unsigned long)pwq,
605 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
606}
607
608static void set_work_pool_and_keep_pending(struct work_struct *work,
609 int pool_id)
610{
611 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
612 WORK_STRUCT_PENDING);
613}
614
615static void set_work_pool_and_clear_pending(struct work_struct *work,
616 int pool_id)
617{
618
619
620
621
622
623
624 smp_wmb();
625 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
626}
627
628static void clear_work_data(struct work_struct *work)
629{
630 smp_wmb();
631 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
632}
633
634static struct pool_workqueue *get_work_pwq(struct work_struct *work)
635{
636 unsigned long data = atomic_long_read(&work->data);
637
638 if (data & WORK_STRUCT_PWQ)
639 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
640 else
641 return NULL;
642}
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659static struct worker_pool *get_work_pool(struct work_struct *work)
660{
661 unsigned long data = atomic_long_read(&work->data);
662 int pool_id;
663
664 assert_rcu_or_pool_mutex();
665
666 if (data & WORK_STRUCT_PWQ)
667 return ((struct pool_workqueue *)
668 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
669
670 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
671 if (pool_id == WORK_OFFQ_POOL_NONE)
672 return NULL;
673
674 return idr_find(&worker_pool_idr, pool_id);
675}
676
677
678
679
680
681
682
683
684static int get_work_pool_id(struct work_struct *work)
685{
686 unsigned long data = atomic_long_read(&work->data);
687
688 if (data & WORK_STRUCT_PWQ)
689 return ((struct pool_workqueue *)
690 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
691
692 return data >> WORK_OFFQ_POOL_SHIFT;
693}
694
695static void mark_work_canceling(struct work_struct *work)
696{
697 unsigned long pool_id = get_work_pool_id(work);
698
699 pool_id <<= WORK_OFFQ_POOL_SHIFT;
700 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
701}
702
703static bool work_is_canceling(struct work_struct *work)
704{
705 unsigned long data = atomic_long_read(&work->data);
706
707 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
708}
709
710
711
712
713
714
715
716static bool __need_more_worker(struct worker_pool *pool)
717{
718 return !atomic_read(&pool->nr_running);
719}
720
721
722
723
724
725
726
727
728
729static bool need_more_worker(struct worker_pool *pool)
730{
731 return !list_empty(&pool->worklist) && __need_more_worker(pool);
732}
733
734
735static bool may_start_working(struct worker_pool *pool)
736{
737 return pool->nr_idle;
738}
739
740
741static bool keep_working(struct worker_pool *pool)
742{
743 return !list_empty(&pool->worklist) &&
744 atomic_read(&pool->nr_running) <= 1;
745}
746
747
748static bool need_to_create_worker(struct worker_pool *pool)
749{
750 return need_more_worker(pool) && !may_start_working(pool);
751}
752
753
754static bool too_many_workers(struct worker_pool *pool)
755{
756 bool managing = mutex_is_locked(&pool->manager_arb);
757 int nr_idle = pool->nr_idle + managing;
758 int nr_busy = pool->nr_workers - nr_idle;
759
760 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
761}
762
763
764
765
766
767
768static struct worker *first_idle_worker(struct worker_pool *pool)
769{
770 if (unlikely(list_empty(&pool->idle_list)))
771 return NULL;
772
773 return list_first_entry(&pool->idle_list, struct worker, entry);
774}
775
776
777
778
779
780
781
782
783
784
785static void wake_up_worker(struct worker_pool *pool)
786{
787 struct worker *worker = first_idle_worker(pool);
788
789 if (likely(worker))
790 wake_up_process(worker->task);
791}
792
793
794
795
796
797
798
799
800
801
802
803
804void wq_worker_waking_up(struct task_struct *task, int cpu)
805{
806 struct worker *worker = kthread_data(task);
807
808 if (!(worker->flags & WORKER_NOT_RUNNING)) {
809 WARN_ON_ONCE(worker->pool->cpu != cpu);
810 atomic_inc(&worker->pool->nr_running);
811 }
812}
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
830{
831 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
832 struct worker_pool *pool;
833
834
835
836
837
838
839 if (worker->flags & WORKER_NOT_RUNNING)
840 return NULL;
841
842 pool = worker->pool;
843
844
845 if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
846 return NULL;
847
848
849
850
851
852
853
854
855
856
857
858
859 if (atomic_dec_and_test(&pool->nr_running) &&
860 !list_empty(&pool->worklist))
861 to_wakeup = first_idle_worker(pool);
862 return to_wakeup ? to_wakeup->task : NULL;
863}
864
865
866
867
868
869
870
871
872
873
874
875static inline void worker_set_flags(struct worker *worker, unsigned int flags)
876{
877 struct worker_pool *pool = worker->pool;
878
879 WARN_ON_ONCE(worker->task != current);
880
881
882 if ((flags & WORKER_NOT_RUNNING) &&
883 !(worker->flags & WORKER_NOT_RUNNING)) {
884 atomic_dec(&pool->nr_running);
885 }
886
887 worker->flags |= flags;
888}
889
890
891
892
893
894
895
896
897
898
899
900static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
901{
902 struct worker_pool *pool = worker->pool;
903 unsigned int oflags = worker->flags;
904
905 WARN_ON_ONCE(worker->task != current);
906
907 worker->flags &= ~flags;
908
909
910
911
912
913
914 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
915 if (!(worker->flags & WORKER_NOT_RUNNING))
916 atomic_inc(&pool->nr_running);
917}
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952static struct worker *find_worker_executing_work(struct worker_pool *pool,
953 struct work_struct *work)
954{
955 struct worker *worker;
956
957 hash_for_each_possible(pool->busy_hash, worker, hentry,
958 (unsigned long)work)
959 if (worker->current_work == work &&
960 worker->current_func == work->func)
961 return worker;
962
963 return NULL;
964}
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983static void move_linked_works(struct work_struct *work, struct list_head *head,
984 struct work_struct **nextp)
985{
986 struct work_struct *n;
987
988
989
990
991
992 list_for_each_entry_safe_from(work, n, NULL, entry) {
993 list_move_tail(&work->entry, head);
994 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
995 break;
996 }
997
998
999
1000
1001
1002
1003 if (nextp)
1004 *nextp = n;
1005}
1006
1007
1008
1009
1010
1011
1012
1013
1014static void get_pwq(struct pool_workqueue *pwq)
1015{
1016 lockdep_assert_held(&pwq->pool->lock);
1017 WARN_ON_ONCE(pwq->refcnt <= 0);
1018 pwq->refcnt++;
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028static void put_pwq(struct pool_workqueue *pwq)
1029{
1030 lockdep_assert_held(&pwq->pool->lock);
1031 if (likely(--pwq->refcnt))
1032 return;
1033 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1034 return;
1035
1036
1037
1038
1039
1040
1041
1042
1043 schedule_work(&pwq->unbound_release_work);
1044}
1045
1046
1047
1048
1049
1050
1051
1052static void put_pwq_unlocked(struct pool_workqueue *pwq)
1053{
1054 if (pwq) {
1055
1056
1057
1058
1059 spin_lock_irq(&pwq->pool->lock);
1060 put_pwq(pwq);
1061 spin_unlock_irq(&pwq->pool->lock);
1062 }
1063}
1064
1065static void pwq_activate_delayed_work(struct work_struct *work)
1066{
1067 struct pool_workqueue *pwq = get_work_pwq(work);
1068
1069 trace_workqueue_activate_work(work);
1070 move_linked_works(work, &pwq->pool->worklist, NULL);
1071 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1072 pwq->nr_active++;
1073}
1074
1075static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1076{
1077 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1078 struct work_struct, entry);
1079
1080 pwq_activate_delayed_work(work);
1081}
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1095{
1096
1097 if (color == WORK_NO_COLOR)
1098 goto out_put;
1099
1100 pwq->nr_in_flight[color]--;
1101
1102 pwq->nr_active--;
1103 if (!list_empty(&pwq->delayed_works)) {
1104
1105 if (pwq->nr_active < pwq->max_active)
1106 pwq_activate_first_delayed(pwq);
1107 }
1108
1109
1110 if (likely(pwq->flush_color != color))
1111 goto out_put;
1112
1113
1114 if (pwq->nr_in_flight[color])
1115 goto out_put;
1116
1117
1118 pwq->flush_color = -1;
1119
1120
1121
1122
1123
1124 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1125 complete(&pwq->wq->first_flusher->done);
1126out_put:
1127 put_pwq(pwq);
1128}
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1158 unsigned long *flags)
1159{
1160 struct worker_pool *pool;
1161 struct pool_workqueue *pwq;
1162
1163 local_irq_save(*flags);
1164
1165
1166 if (is_dwork) {
1167 struct delayed_work *dwork = to_delayed_work(work);
1168
1169
1170
1171
1172
1173
1174 if (likely(del_timer(&dwork->timer)))
1175 return 1;
1176 }
1177
1178
1179 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1180 return 0;
1181
1182
1183
1184
1185
1186 pool = get_work_pool(work);
1187 if (!pool)
1188 goto fail;
1189
1190 spin_lock(&pool->lock);
1191
1192
1193
1194
1195
1196
1197
1198
1199 pwq = get_work_pwq(work);
1200 if (pwq && pwq->pool == pool) {
1201 debug_work_deactivate(work);
1202
1203
1204
1205
1206
1207
1208
1209
1210 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1211 pwq_activate_delayed_work(work);
1212
1213 list_del_init(&work->entry);
1214 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1215
1216
1217 set_work_pool_and_keep_pending(work, pool->id);
1218
1219 spin_unlock(&pool->lock);
1220 return 1;
1221 }
1222 spin_unlock(&pool->lock);
1223fail:
1224 local_irq_restore(*flags);
1225 if (work_is_canceling(work))
1226 return -ENOENT;
1227 cpu_relax();
1228 return -EAGAIN;
1229}
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1245 struct list_head *head, unsigned int extra_flags)
1246{
1247 struct worker_pool *pool = pwq->pool;
1248
1249
1250 set_work_pwq(work, pwq, extra_flags);
1251 list_add_tail(&work->entry, head);
1252 get_pwq(pwq);
1253
1254
1255
1256
1257
1258
1259 smp_mb();
1260
1261 if (__need_more_worker(pool))
1262 wake_up_worker(pool);
1263}
1264
1265
1266
1267
1268
1269static bool is_chained_work(struct workqueue_struct *wq)
1270{
1271 struct worker *worker;
1272
1273 worker = current_wq_worker();
1274
1275
1276
1277
1278 return worker && worker->current_pwq->wq == wq;
1279}
1280
1281static void __queue_work(int cpu, struct workqueue_struct *wq,
1282 struct work_struct *work)
1283{
1284 struct pool_workqueue *pwq;
1285 struct worker_pool *last_pool;
1286 struct list_head *worklist;
1287 unsigned int work_flags;
1288 unsigned int req_cpu = cpu;
1289
1290
1291
1292
1293
1294
1295
1296 WARN_ON_ONCE(!irqs_disabled());
1297
1298 debug_work_activate(work);
1299
1300
1301 if (unlikely(wq->flags & __WQ_DRAINING) &&
1302 WARN_ON_ONCE(!is_chained_work(wq)))
1303 return;
1304retry:
1305 if (req_cpu == WORK_CPU_UNBOUND)
1306 cpu = raw_smp_processor_id();
1307
1308
1309 if (!(wq->flags & WQ_UNBOUND))
1310 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1311 else
1312 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1313
1314
1315
1316
1317
1318
1319 last_pool = get_work_pool(work);
1320 if (last_pool && last_pool != pwq->pool) {
1321 struct worker *worker;
1322
1323 spin_lock(&last_pool->lock);
1324
1325 worker = find_worker_executing_work(last_pool, work);
1326
1327 if (worker && worker->current_pwq->wq == wq) {
1328 pwq = worker->current_pwq;
1329 } else {
1330
1331 spin_unlock(&last_pool->lock);
1332 spin_lock(&pwq->pool->lock);
1333 }
1334 } else {
1335 spin_lock(&pwq->pool->lock);
1336 }
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346 if (unlikely(!pwq->refcnt)) {
1347 if (wq->flags & WQ_UNBOUND) {
1348 spin_unlock(&pwq->pool->lock);
1349 cpu_relax();
1350 goto retry;
1351 }
1352
1353 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1354 wq->name, cpu);
1355 }
1356
1357
1358 trace_workqueue_queue_work(req_cpu, pwq, work);
1359
1360 if (WARN_ON(!list_empty(&work->entry))) {
1361 spin_unlock(&pwq->pool->lock);
1362 return;
1363 }
1364
1365 pwq->nr_in_flight[pwq->work_color]++;
1366 work_flags = work_color_to_flags(pwq->work_color);
1367
1368 if (likely(pwq->nr_active < pwq->max_active)) {
1369 trace_workqueue_activate_work(work);
1370 pwq->nr_active++;
1371 worklist = &pwq->pool->worklist;
1372 } else {
1373 work_flags |= WORK_STRUCT_DELAYED;
1374 worklist = &pwq->delayed_works;
1375 }
1376
1377 insert_work(pwq, work, worklist, work_flags);
1378
1379 spin_unlock(&pwq->pool->lock);
1380}
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393bool queue_work_on(int cpu, struct workqueue_struct *wq,
1394 struct work_struct *work)
1395{
1396 bool ret = false;
1397 unsigned long flags;
1398
1399 local_irq_save(flags);
1400
1401 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1402 __queue_work(cpu, wq, work);
1403 ret = true;
1404 }
1405
1406 local_irq_restore(flags);
1407 return ret;
1408}
1409EXPORT_SYMBOL(queue_work_on);
1410
1411void delayed_work_timer_fn(unsigned long __data)
1412{
1413 struct delayed_work *dwork = (struct delayed_work *)__data;
1414
1415
1416 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1417}
1418EXPORT_SYMBOL(delayed_work_timer_fn);
1419
1420static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1421 struct delayed_work *dwork, unsigned long delay)
1422{
1423 struct timer_list *timer = &dwork->timer;
1424 struct work_struct *work = &dwork->work;
1425
1426 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1427 timer->data != (unsigned long)dwork);
1428 WARN_ON_ONCE(timer_pending(timer));
1429 WARN_ON_ONCE(!list_empty(&work->entry));
1430
1431
1432
1433
1434
1435
1436
1437 if (!delay) {
1438 __queue_work(cpu, wq, &dwork->work);
1439 return;
1440 }
1441
1442 timer_stats_timer_set_start_info(&dwork->timer);
1443
1444 dwork->wq = wq;
1445 dwork->cpu = cpu;
1446 timer->expires = jiffies + delay;
1447
1448 if (unlikely(cpu != WORK_CPU_UNBOUND))
1449 add_timer_on(timer, cpu);
1450 else
1451 add_timer(timer);
1452}
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1466 struct delayed_work *dwork, unsigned long delay)
1467{
1468 struct work_struct *work = &dwork->work;
1469 bool ret = false;
1470 unsigned long flags;
1471
1472
1473 local_irq_save(flags);
1474
1475 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1476 __queue_delayed_work(cpu, wq, dwork, delay);
1477 ret = true;
1478 }
1479
1480 local_irq_restore(flags);
1481 return ret;
1482}
1483EXPORT_SYMBOL(queue_delayed_work_on);
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1504 struct delayed_work *dwork, unsigned long delay)
1505{
1506 unsigned long flags;
1507 int ret;
1508
1509 do {
1510 ret = try_to_grab_pending(&dwork->work, true, &flags);
1511 } while (unlikely(ret == -EAGAIN));
1512
1513 if (likely(ret >= 0)) {
1514 __queue_delayed_work(cpu, wq, dwork, delay);
1515 local_irq_restore(flags);
1516 }
1517
1518
1519 return ret;
1520}
1521EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533static void worker_enter_idle(struct worker *worker)
1534{
1535 struct worker_pool *pool = worker->pool;
1536
1537 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1538 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1539 (worker->hentry.next || worker->hentry.pprev)))
1540 return;
1541
1542
1543 worker->flags |= WORKER_IDLE;
1544 pool->nr_idle++;
1545 worker->last_active = jiffies;
1546
1547
1548 list_add(&worker->entry, &pool->idle_list);
1549
1550 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1551 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1552
1553
1554
1555
1556
1557
1558
1559 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1560 pool->nr_workers == pool->nr_idle &&
1561 atomic_read(&pool->nr_running));
1562}
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573static void worker_leave_idle(struct worker *worker)
1574{
1575 struct worker_pool *pool = worker->pool;
1576
1577 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1578 return;
1579 worker_clr_flags(worker, WORKER_IDLE);
1580 pool->nr_idle--;
1581 list_del_init(&worker->entry);
1582}
1583
1584static struct worker *alloc_worker(int node)
1585{
1586 struct worker *worker;
1587
1588 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1589 if (worker) {
1590 INIT_LIST_HEAD(&worker->entry);
1591 INIT_LIST_HEAD(&worker->scheduled);
1592 INIT_LIST_HEAD(&worker->node);
1593
1594 worker->flags = WORKER_PREP;
1595 }
1596 return worker;
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608static void worker_attach_to_pool(struct worker *worker,
1609 struct worker_pool *pool)
1610{
1611 mutex_lock(&pool->attach_mutex);
1612
1613
1614
1615
1616
1617 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1618
1619
1620
1621
1622
1623
1624 if (pool->flags & POOL_DISASSOCIATED)
1625 worker->flags |= WORKER_UNBOUND;
1626
1627 list_add_tail(&worker->node, &pool->workers);
1628
1629 mutex_unlock(&pool->attach_mutex);
1630}
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641static void worker_detach_from_pool(struct worker *worker,
1642 struct worker_pool *pool)
1643{
1644 struct completion *detach_completion = NULL;
1645
1646 mutex_lock(&pool->attach_mutex);
1647 list_del(&worker->node);
1648 if (list_empty(&pool->workers))
1649 detach_completion = pool->detach_completion;
1650 mutex_unlock(&pool->attach_mutex);
1651
1652
1653 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1654
1655 if (detach_completion)
1656 complete(detach_completion);
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671static struct worker *create_worker(struct worker_pool *pool)
1672{
1673 struct worker *worker = NULL;
1674 int id = -1;
1675 char id_buf[16];
1676
1677
1678 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1679 if (id < 0)
1680 goto fail;
1681
1682 worker = alloc_worker(pool->node);
1683 if (!worker)
1684 goto fail;
1685
1686 worker->pool = pool;
1687 worker->id = id;
1688
1689 if (pool->cpu >= 0)
1690 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1691 pool->attrs->nice < 0 ? "H" : "");
1692 else
1693 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1694
1695 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1696 "kworker/%s", id_buf);
1697 if (IS_ERR(worker->task))
1698 goto fail;
1699
1700 set_user_nice(worker->task, pool->attrs->nice);
1701
1702
1703 worker->task->flags |= PF_NO_SETAFFINITY;
1704
1705
1706 worker_attach_to_pool(worker, pool);
1707
1708
1709 spin_lock_irq(&pool->lock);
1710 worker->pool->nr_workers++;
1711 worker_enter_idle(worker);
1712 wake_up_process(worker->task);
1713 spin_unlock_irq(&pool->lock);
1714
1715 return worker;
1716
1717fail:
1718 if (id >= 0)
1719 ida_simple_remove(&pool->worker_ida, id);
1720 kfree(worker);
1721 return NULL;
1722}
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734static void destroy_worker(struct worker *worker)
1735{
1736 struct worker_pool *pool = worker->pool;
1737
1738 lockdep_assert_held(&pool->lock);
1739
1740
1741 if (WARN_ON(worker->current_work) ||
1742 WARN_ON(!list_empty(&worker->scheduled)) ||
1743 WARN_ON(!(worker->flags & WORKER_IDLE)))
1744 return;
1745
1746 pool->nr_workers--;
1747 pool->nr_idle--;
1748
1749 list_del_init(&worker->entry);
1750 worker->flags |= WORKER_DIE;
1751 wake_up_process(worker->task);
1752}
1753
1754static void idle_worker_timeout(unsigned long __pool)
1755{
1756 struct worker_pool *pool = (void *)__pool;
1757
1758 spin_lock_irq(&pool->lock);
1759
1760 while (too_many_workers(pool)) {
1761 struct worker *worker;
1762 unsigned long expires;
1763
1764
1765 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1766 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1767
1768 if (time_before(jiffies, expires)) {
1769 mod_timer(&pool->idle_timer, expires);
1770 break;
1771 }
1772
1773 destroy_worker(worker);
1774 }
1775
1776 spin_unlock_irq(&pool->lock);
1777}
1778
1779static void send_mayday(struct work_struct *work)
1780{
1781 struct pool_workqueue *pwq = get_work_pwq(work);
1782 struct workqueue_struct *wq = pwq->wq;
1783
1784 lockdep_assert_held(&wq_mayday_lock);
1785
1786 if (!wq->rescuer)
1787 return;
1788
1789
1790 if (list_empty(&pwq->mayday_node)) {
1791
1792
1793
1794
1795
1796 get_pwq(pwq);
1797 list_add_tail(&pwq->mayday_node, &wq->maydays);
1798 wake_up_process(wq->rescuer->task);
1799 }
1800}
1801
1802static void pool_mayday_timeout(unsigned long __pool)
1803{
1804 struct worker_pool *pool = (void *)__pool;
1805 struct work_struct *work;
1806
1807 spin_lock_irq(&pool->lock);
1808 spin_lock(&wq_mayday_lock);
1809
1810 if (need_to_create_worker(pool)) {
1811
1812
1813
1814
1815
1816
1817 list_for_each_entry(work, &pool->worklist, entry)
1818 send_mayday(work);
1819 }
1820
1821 spin_unlock(&wq_mayday_lock);
1822 spin_unlock_irq(&pool->lock);
1823
1824 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1825}
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845static void maybe_create_worker(struct worker_pool *pool)
1846__releases(&pool->lock)
1847__acquires(&pool->lock)
1848{
1849restart:
1850 spin_unlock_irq(&pool->lock);
1851
1852
1853 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1854
1855 while (true) {
1856 if (create_worker(pool) || !need_to_create_worker(pool))
1857 break;
1858
1859 schedule_timeout_interruptible(CREATE_COOLDOWN);
1860
1861 if (!need_to_create_worker(pool))
1862 break;
1863 }
1864
1865 del_timer_sync(&pool->mayday_timer);
1866 spin_lock_irq(&pool->lock);
1867
1868
1869
1870
1871
1872 if (need_to_create_worker(pool))
1873 goto restart;
1874}
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898static bool manage_workers(struct worker *worker)
1899{
1900 struct worker_pool *pool = worker->pool;
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912 if (!mutex_trylock(&pool->manager_arb))
1913 return false;
1914
1915 maybe_create_worker(pool);
1916
1917 mutex_unlock(&pool->manager_arb);
1918 return true;
1919}
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935static void process_one_work(struct worker *worker, struct work_struct *work)
1936__releases(&pool->lock)
1937__acquires(&pool->lock)
1938{
1939 struct pool_workqueue *pwq = get_work_pwq(work);
1940 struct worker_pool *pool = worker->pool;
1941 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
1942 int work_color;
1943 struct worker *collision;
1944#ifdef CONFIG_LOCKDEP
1945
1946
1947
1948
1949
1950
1951
1952 struct lockdep_map lockdep_map;
1953
1954 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
1955#endif
1956
1957 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1958 raw_smp_processor_id() != pool->cpu);
1959
1960
1961
1962
1963
1964
1965
1966 collision = find_worker_executing_work(pool, work);
1967 if (unlikely(collision)) {
1968 move_linked_works(work, &collision->scheduled, NULL);
1969 return;
1970 }
1971
1972
1973 debug_work_deactivate(work);
1974 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
1975 worker->current_work = work;
1976 worker->current_func = work->func;
1977 worker->current_pwq = pwq;
1978 work_color = get_work_color(work);
1979
1980 list_del_init(&work->entry);
1981
1982
1983
1984
1985
1986
1987
1988 if (unlikely(cpu_intensive))
1989 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
1990
1991
1992
1993
1994
1995
1996
1997
1998 if (need_more_worker(pool))
1999 wake_up_worker(pool);
2000
2001
2002
2003
2004
2005
2006
2007 set_work_pool_and_clear_pending(work, pool->id);
2008
2009 spin_unlock_irq(&pool->lock);
2010
2011 lock_map_acquire_read(&pwq->wq->lockdep_map);
2012 lock_map_acquire(&lockdep_map);
2013 trace_workqueue_execute_start(work);
2014 worker->current_func(work);
2015
2016
2017
2018
2019 trace_workqueue_execute_end(work);
2020 lock_map_release(&lockdep_map);
2021 lock_map_release(&pwq->wq->lockdep_map);
2022
2023 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2024 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2025 " last function: %pf\n",
2026 current->comm, preempt_count(), task_pid_nr(current),
2027 worker->current_func);
2028 debug_show_held_locks(current);
2029 dump_stack();
2030 }
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040 cond_resched_rcu_qs();
2041
2042 spin_lock_irq(&pool->lock);
2043
2044
2045 if (unlikely(cpu_intensive))
2046 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2047
2048
2049 hash_del(&worker->hentry);
2050 worker->current_work = NULL;
2051 worker->current_func = NULL;
2052 worker->current_pwq = NULL;
2053 worker->desc_valid = false;
2054 pwq_dec_nr_in_flight(pwq, work_color);
2055}
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069static void process_scheduled_works(struct worker *worker)
2070{
2071 while (!list_empty(&worker->scheduled)) {
2072 struct work_struct *work = list_first_entry(&worker->scheduled,
2073 struct work_struct, entry);
2074 process_one_work(worker, work);
2075 }
2076}
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090static int worker_thread(void *__worker)
2091{
2092 struct worker *worker = __worker;
2093 struct worker_pool *pool = worker->pool;
2094
2095
2096 worker->task->flags |= PF_WQ_WORKER;
2097woke_up:
2098 spin_lock_irq(&pool->lock);
2099
2100
2101 if (unlikely(worker->flags & WORKER_DIE)) {
2102 spin_unlock_irq(&pool->lock);
2103 WARN_ON_ONCE(!list_empty(&worker->entry));
2104 worker->task->flags &= ~PF_WQ_WORKER;
2105
2106 set_task_comm(worker->task, "kworker/dying");
2107 ida_simple_remove(&pool->worker_ida, worker->id);
2108 worker_detach_from_pool(worker, pool);
2109 kfree(worker);
2110 return 0;
2111 }
2112
2113 worker_leave_idle(worker);
2114recheck:
2115
2116 if (!need_more_worker(pool))
2117 goto sleep;
2118
2119
2120 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2121 goto recheck;
2122
2123
2124
2125
2126
2127
2128 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2129
2130
2131
2132
2133
2134
2135
2136
2137 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2138
2139 do {
2140 struct work_struct *work =
2141 list_first_entry(&pool->worklist,
2142 struct work_struct, entry);
2143
2144 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2145
2146 process_one_work(worker, work);
2147 if (unlikely(!list_empty(&worker->scheduled)))
2148 process_scheduled_works(worker);
2149 } else {
2150 move_linked_works(work, &worker->scheduled, NULL);
2151 process_scheduled_works(worker);
2152 }
2153 } while (keep_working(pool));
2154
2155 worker_set_flags(worker, WORKER_PREP);
2156sleep:
2157
2158
2159
2160
2161
2162
2163
2164 worker_enter_idle(worker);
2165 __set_current_state(TASK_INTERRUPTIBLE);
2166 spin_unlock_irq(&pool->lock);
2167 schedule();
2168 goto woke_up;
2169}
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192static int rescuer_thread(void *__rescuer)
2193{
2194 struct worker *rescuer = __rescuer;
2195 struct workqueue_struct *wq = rescuer->rescue_wq;
2196 struct list_head *scheduled = &rescuer->scheduled;
2197 bool should_stop;
2198
2199 set_user_nice(current, RESCUER_NICE_LEVEL);
2200
2201
2202
2203
2204
2205 rescuer->task->flags |= PF_WQ_WORKER;
2206repeat:
2207 set_current_state(TASK_INTERRUPTIBLE);
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217 should_stop = kthread_should_stop();
2218
2219
2220 spin_lock_irq(&wq_mayday_lock);
2221
2222 while (!list_empty(&wq->maydays)) {
2223 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2224 struct pool_workqueue, mayday_node);
2225 struct worker_pool *pool = pwq->pool;
2226 struct work_struct *work, *n;
2227
2228 __set_current_state(TASK_RUNNING);
2229 list_del_init(&pwq->mayday_node);
2230
2231 spin_unlock_irq(&wq_mayday_lock);
2232
2233 worker_attach_to_pool(rescuer, pool);
2234
2235 spin_lock_irq(&pool->lock);
2236 rescuer->pool = pool;
2237
2238
2239
2240
2241
2242 WARN_ON_ONCE(!list_empty(scheduled));
2243 list_for_each_entry_safe(work, n, &pool->worklist, entry)
2244 if (get_work_pwq(work) == pwq)
2245 move_linked_works(work, scheduled, &n);
2246
2247 if (!list_empty(scheduled)) {
2248 process_scheduled_works(rescuer);
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259 if (need_to_create_worker(pool)) {
2260 spin_lock(&wq_mayday_lock);
2261 get_pwq(pwq);
2262 list_move_tail(&pwq->mayday_node, &wq->maydays);
2263 spin_unlock(&wq_mayday_lock);
2264 }
2265 }
2266
2267
2268
2269
2270
2271 put_pwq(pwq);
2272
2273
2274
2275
2276
2277
2278 if (need_more_worker(pool))
2279 wake_up_worker(pool);
2280
2281 rescuer->pool = NULL;
2282 spin_unlock_irq(&pool->lock);
2283
2284 worker_detach_from_pool(rescuer, pool);
2285
2286 spin_lock_irq(&wq_mayday_lock);
2287 }
2288
2289 spin_unlock_irq(&wq_mayday_lock);
2290
2291 if (should_stop) {
2292 __set_current_state(TASK_RUNNING);
2293 rescuer->task->flags &= ~PF_WQ_WORKER;
2294 return 0;
2295 }
2296
2297
2298 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2299 schedule();
2300 goto repeat;
2301}
2302
2303struct wq_barrier {
2304 struct work_struct work;
2305 struct completion done;
2306};
2307
2308static void wq_barrier_func(struct work_struct *work)
2309{
2310 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2311 complete(&barr->done);
2312}
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338static void insert_wq_barrier(struct pool_workqueue *pwq,
2339 struct wq_barrier *barr,
2340 struct work_struct *target, struct worker *worker)
2341{
2342 struct list_head *head;
2343 unsigned int linked = 0;
2344
2345
2346
2347
2348
2349
2350
2351 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2352 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2353 init_completion(&barr->done);
2354
2355
2356
2357
2358
2359 if (worker)
2360 head = worker->scheduled.next;
2361 else {
2362 unsigned long *bits = work_data_bits(target);
2363
2364 head = target->entry.next;
2365
2366 linked = *bits & WORK_STRUCT_LINKED;
2367 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2368 }
2369
2370 debug_work_activate(&barr->work);
2371 insert_work(pwq, &barr->work, head,
2372 work_color_to_flags(WORK_NO_COLOR) | linked);
2373}
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2407 int flush_color, int work_color)
2408{
2409 bool wait = false;
2410 struct pool_workqueue *pwq;
2411
2412 if (flush_color >= 0) {
2413 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2414 atomic_set(&wq->nr_pwqs_to_flush, 1);
2415 }
2416
2417 for_each_pwq(pwq, wq) {
2418 struct worker_pool *pool = pwq->pool;
2419
2420 spin_lock_irq(&pool->lock);
2421
2422 if (flush_color >= 0) {
2423 WARN_ON_ONCE(pwq->flush_color != -1);
2424
2425 if (pwq->nr_in_flight[flush_color]) {
2426 pwq->flush_color = flush_color;
2427 atomic_inc(&wq->nr_pwqs_to_flush);
2428 wait = true;
2429 }
2430 }
2431
2432 if (work_color >= 0) {
2433 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2434 pwq->work_color = work_color;
2435 }
2436
2437 spin_unlock_irq(&pool->lock);
2438 }
2439
2440 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2441 complete(&wq->first_flusher->done);
2442
2443 return wait;
2444}
2445
2446
2447
2448
2449
2450
2451
2452
2453void flush_workqueue(struct workqueue_struct *wq)
2454{
2455 struct wq_flusher this_flusher = {
2456 .list = LIST_HEAD_INIT(this_flusher.list),
2457 .flush_color = -1,
2458 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2459 };
2460 int next_color;
2461
2462 lock_map_acquire(&wq->lockdep_map);
2463 lock_map_release(&wq->lockdep_map);
2464
2465 mutex_lock(&wq->mutex);
2466
2467
2468
2469
2470 next_color = work_next_color(wq->work_color);
2471
2472 if (next_color != wq->flush_color) {
2473
2474
2475
2476
2477
2478 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2479 this_flusher.flush_color = wq->work_color;
2480 wq->work_color = next_color;
2481
2482 if (!wq->first_flusher) {
2483
2484 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2485
2486 wq->first_flusher = &this_flusher;
2487
2488 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2489 wq->work_color)) {
2490
2491 wq->flush_color = next_color;
2492 wq->first_flusher = NULL;
2493 goto out_unlock;
2494 }
2495 } else {
2496
2497 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2498 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2499 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2500 }
2501 } else {
2502
2503
2504
2505
2506
2507 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2508 }
2509
2510 mutex_unlock(&wq->mutex);
2511
2512 wait_for_completion(&this_flusher.done);
2513
2514
2515
2516
2517
2518
2519
2520 if (wq->first_flusher != &this_flusher)
2521 return;
2522
2523 mutex_lock(&wq->mutex);
2524
2525
2526 if (wq->first_flusher != &this_flusher)
2527 goto out_unlock;
2528
2529 wq->first_flusher = NULL;
2530
2531 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2532 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2533
2534 while (true) {
2535 struct wq_flusher *next, *tmp;
2536
2537
2538 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2539 if (next->flush_color != wq->flush_color)
2540 break;
2541 list_del_init(&next->list);
2542 complete(&next->done);
2543 }
2544
2545 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2546 wq->flush_color != work_next_color(wq->work_color));
2547
2548
2549 wq->flush_color = work_next_color(wq->flush_color);
2550
2551
2552 if (!list_empty(&wq->flusher_overflow)) {
2553
2554
2555
2556
2557
2558
2559 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2560 tmp->flush_color = wq->work_color;
2561
2562 wq->work_color = work_next_color(wq->work_color);
2563
2564 list_splice_tail_init(&wq->flusher_overflow,
2565 &wq->flusher_queue);
2566 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2567 }
2568
2569 if (list_empty(&wq->flusher_queue)) {
2570 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2571 break;
2572 }
2573
2574
2575
2576
2577
2578 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2579 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2580
2581 list_del_init(&next->list);
2582 wq->first_flusher = next;
2583
2584 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2585 break;
2586
2587
2588
2589
2590
2591 wq->first_flusher = NULL;
2592 }
2593
2594out_unlock:
2595 mutex_unlock(&wq->mutex);
2596}
2597EXPORT_SYMBOL_GPL(flush_workqueue);
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610void drain_workqueue(struct workqueue_struct *wq)
2611{
2612 unsigned int flush_cnt = 0;
2613 struct pool_workqueue *pwq;
2614
2615
2616
2617
2618
2619
2620 mutex_lock(&wq->mutex);
2621 if (!wq->nr_drainers++)
2622 wq->flags |= __WQ_DRAINING;
2623 mutex_unlock(&wq->mutex);
2624reflush:
2625 flush_workqueue(wq);
2626
2627 mutex_lock(&wq->mutex);
2628
2629 for_each_pwq(pwq, wq) {
2630 bool drained;
2631
2632 spin_lock_irq(&pwq->pool->lock);
2633 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2634 spin_unlock_irq(&pwq->pool->lock);
2635
2636 if (drained)
2637 continue;
2638
2639 if (++flush_cnt == 10 ||
2640 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2641 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2642 wq->name, flush_cnt);
2643
2644 mutex_unlock(&wq->mutex);
2645 goto reflush;
2646 }
2647
2648 if (!--wq->nr_drainers)
2649 wq->flags &= ~__WQ_DRAINING;
2650 mutex_unlock(&wq->mutex);
2651}
2652EXPORT_SYMBOL_GPL(drain_workqueue);
2653
2654static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2655{
2656 struct worker *worker = NULL;
2657 struct worker_pool *pool;
2658 struct pool_workqueue *pwq;
2659
2660 might_sleep();
2661
2662 local_irq_disable();
2663 pool = get_work_pool(work);
2664 if (!pool) {
2665 local_irq_enable();
2666 return false;
2667 }
2668
2669 spin_lock(&pool->lock);
2670
2671 pwq = get_work_pwq(work);
2672 if (pwq) {
2673 if (unlikely(pwq->pool != pool))
2674 goto already_gone;
2675 } else {
2676 worker = find_worker_executing_work(pool, work);
2677 if (!worker)
2678 goto already_gone;
2679 pwq = worker->current_pwq;
2680 }
2681
2682 insert_wq_barrier(pwq, barr, work, worker);
2683 spin_unlock_irq(&pool->lock);
2684
2685
2686
2687
2688
2689
2690
2691 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
2692 lock_map_acquire(&pwq->wq->lockdep_map);
2693 else
2694 lock_map_acquire_read(&pwq->wq->lockdep_map);
2695 lock_map_release(&pwq->wq->lockdep_map);
2696
2697 return true;
2698already_gone:
2699 spin_unlock_irq(&pool->lock);
2700 return false;
2701}
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714bool flush_work(struct work_struct *work)
2715{
2716 struct wq_barrier barr;
2717
2718 lock_map_acquire(&work->lockdep_map);
2719 lock_map_release(&work->lockdep_map);
2720
2721 if (start_flush_work(work, &barr)) {
2722 wait_for_completion(&barr.done);
2723 destroy_work_on_stack(&barr.work);
2724 return true;
2725 } else {
2726 return false;
2727 }
2728}
2729EXPORT_SYMBOL_GPL(flush_work);
2730
2731static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2732{
2733 unsigned long flags;
2734 int ret;
2735
2736 do {
2737 ret = try_to_grab_pending(work, is_dwork, &flags);
2738
2739
2740
2741
2742 if (unlikely(ret == -ENOENT))
2743 flush_work(work);
2744 } while (unlikely(ret < 0));
2745
2746
2747 mark_work_canceling(work);
2748 local_irq_restore(flags);
2749
2750 flush_work(work);
2751 clear_work_data(work);
2752 return ret;
2753}
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773bool cancel_work_sync(struct work_struct *work)
2774{
2775 return __cancel_work_timer(work, false);
2776}
2777EXPORT_SYMBOL_GPL(cancel_work_sync);
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791bool flush_delayed_work(struct delayed_work *dwork)
2792{
2793 local_irq_disable();
2794 if (del_timer_sync(&dwork->timer))
2795 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
2796 local_irq_enable();
2797 return flush_work(&dwork->work);
2798}
2799EXPORT_SYMBOL(flush_delayed_work);
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817bool cancel_delayed_work(struct delayed_work *dwork)
2818{
2819 unsigned long flags;
2820 int ret;
2821
2822 do {
2823 ret = try_to_grab_pending(&dwork->work, true, &flags);
2824 } while (unlikely(ret == -EAGAIN));
2825
2826 if (unlikely(ret < 0))
2827 return false;
2828
2829 set_work_pool_and_clear_pending(&dwork->work,
2830 get_work_pool_id(&dwork->work));
2831 local_irq_restore(flags);
2832 return ret;
2833}
2834EXPORT_SYMBOL(cancel_delayed_work);
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845bool cancel_delayed_work_sync(struct delayed_work *dwork)
2846{
2847 return __cancel_work_timer(&dwork->work, true);
2848}
2849EXPORT_SYMBOL(cancel_delayed_work_sync);
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862int schedule_on_each_cpu(work_func_t func)
2863{
2864 int cpu;
2865 struct work_struct __percpu *works;
2866
2867 works = alloc_percpu(struct work_struct);
2868 if (!works)
2869 return -ENOMEM;
2870
2871 get_online_cpus();
2872
2873 for_each_online_cpu(cpu) {
2874 struct work_struct *work = per_cpu_ptr(works, cpu);
2875
2876 INIT_WORK(work, func);
2877 schedule_work_on(cpu, work);
2878 }
2879
2880 for_each_online_cpu(cpu)
2881 flush_work(per_cpu_ptr(works, cpu));
2882
2883 put_online_cpus();
2884 free_percpu(works);
2885 return 0;
2886}
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912void flush_scheduled_work(void)
2913{
2914 flush_workqueue(system_wq);
2915}
2916EXPORT_SYMBOL(flush_scheduled_work);
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930int execute_in_process_context(work_func_t fn, struct execute_work *ew)
2931{
2932 if (!in_interrupt()) {
2933 fn(&ew->work);
2934 return 0;
2935 }
2936
2937 INIT_WORK(&ew->work, fn);
2938 schedule_work(&ew->work);
2939
2940 return 1;
2941}
2942EXPORT_SYMBOL_GPL(execute_in_process_context);
2943
2944#ifdef CONFIG_SYSFS
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959struct wq_device {
2960 struct workqueue_struct *wq;
2961 struct device dev;
2962};
2963
2964static struct workqueue_struct *dev_to_wq(struct device *dev)
2965{
2966 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
2967
2968 return wq_dev->wq;
2969}
2970
2971static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
2972 char *buf)
2973{
2974 struct workqueue_struct *wq = dev_to_wq(dev);
2975
2976 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
2977}
2978static DEVICE_ATTR_RO(per_cpu);
2979
2980static ssize_t max_active_show(struct device *dev,
2981 struct device_attribute *attr, char *buf)
2982{
2983 struct workqueue_struct *wq = dev_to_wq(dev);
2984
2985 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
2986}
2987
2988static ssize_t max_active_store(struct device *dev,
2989 struct device_attribute *attr, const char *buf,
2990 size_t count)
2991{
2992 struct workqueue_struct *wq = dev_to_wq(dev);
2993 int val;
2994
2995 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
2996 return -EINVAL;
2997
2998 workqueue_set_max_active(wq, val);
2999 return count;
3000}
3001static DEVICE_ATTR_RW(max_active);
3002
3003static struct attribute *wq_sysfs_attrs[] = {
3004 &dev_attr_per_cpu.attr,
3005 &dev_attr_max_active.attr,
3006 NULL,
3007};
3008ATTRIBUTE_GROUPS(wq_sysfs);
3009
3010static ssize_t wq_pool_ids_show(struct device *dev,
3011 struct device_attribute *attr, char *buf)
3012{
3013 struct workqueue_struct *wq = dev_to_wq(dev);
3014 const char *delim = "";
3015 int node, written = 0;
3016
3017 rcu_read_lock_sched();
3018 for_each_node(node) {
3019 written += scnprintf(buf + written, PAGE_SIZE - written,
3020 "%s%d:%d", delim, node,
3021 unbound_pwq_by_node(wq, node)->pool->id);
3022 delim = " ";
3023 }
3024 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3025 rcu_read_unlock_sched();
3026
3027 return written;
3028}
3029
3030static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
3031 char *buf)
3032{
3033 struct workqueue_struct *wq = dev_to_wq(dev);
3034 int written;
3035
3036 mutex_lock(&wq->mutex);
3037 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
3038 mutex_unlock(&wq->mutex);
3039
3040 return written;
3041}
3042
3043
3044static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
3045{
3046 struct workqueue_attrs *attrs;
3047
3048 attrs = alloc_workqueue_attrs(GFP_KERNEL);
3049 if (!attrs)
3050 return NULL;
3051
3052 mutex_lock(&wq->mutex);
3053 copy_workqueue_attrs(attrs, wq->unbound_attrs);
3054 mutex_unlock(&wq->mutex);
3055 return attrs;
3056}
3057
3058static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
3059 const char *buf, size_t count)
3060{
3061 struct workqueue_struct *wq = dev_to_wq(dev);
3062 struct workqueue_attrs *attrs;
3063 int ret;
3064
3065 attrs = wq_sysfs_prep_attrs(wq);
3066 if (!attrs)
3067 return -ENOMEM;
3068
3069 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
3070 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
3071 ret = apply_workqueue_attrs(wq, attrs);
3072 else
3073 ret = -EINVAL;
3074
3075 free_workqueue_attrs(attrs);
3076 return ret ?: count;
3077}
3078
3079static ssize_t wq_cpumask_show(struct device *dev,
3080 struct device_attribute *attr, char *buf)
3081{
3082 struct workqueue_struct *wq = dev_to_wq(dev);
3083 int written;
3084
3085 mutex_lock(&wq->mutex);
3086 written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask);
3087 mutex_unlock(&wq->mutex);
3088
3089 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3090 return written;
3091}
3092
3093static ssize_t wq_cpumask_store(struct device *dev,
3094 struct device_attribute *attr,
3095 const char *buf, size_t count)
3096{
3097 struct workqueue_struct *wq = dev_to_wq(dev);
3098 struct workqueue_attrs *attrs;
3099 int ret;
3100
3101 attrs = wq_sysfs_prep_attrs(wq);
3102 if (!attrs)
3103 return -ENOMEM;
3104
3105 ret = cpumask_parse(buf, attrs->cpumask);
3106 if (!ret)
3107 ret = apply_workqueue_attrs(wq, attrs);
3108
3109 free_workqueue_attrs(attrs);
3110 return ret ?: count;
3111}
3112
3113static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
3114 char *buf)
3115{
3116 struct workqueue_struct *wq = dev_to_wq(dev);
3117 int written;
3118
3119 mutex_lock(&wq->mutex);
3120 written = scnprintf(buf, PAGE_SIZE, "%d\n",
3121 !wq->unbound_attrs->no_numa);
3122 mutex_unlock(&wq->mutex);
3123
3124 return written;
3125}
3126
3127static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
3128 const char *buf, size_t count)
3129{
3130 struct workqueue_struct *wq = dev_to_wq(dev);
3131 struct workqueue_attrs *attrs;
3132 int v, ret;
3133
3134 attrs = wq_sysfs_prep_attrs(wq);
3135 if (!attrs)
3136 return -ENOMEM;
3137
3138 ret = -EINVAL;
3139 if (sscanf(buf, "%d", &v) == 1) {
3140 attrs->no_numa = !v;
3141 ret = apply_workqueue_attrs(wq, attrs);
3142 }
3143
3144 free_workqueue_attrs(attrs);
3145 return ret ?: count;
3146}
3147
3148static struct device_attribute wq_sysfs_unbound_attrs[] = {
3149 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
3150 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
3151 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
3152 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
3153 __ATTR_NULL,
3154};
3155
3156static struct bus_type wq_subsys = {
3157 .name = "workqueue",
3158 .dev_groups = wq_sysfs_groups,
3159};
3160
3161static int __init wq_sysfs_init(void)
3162{
3163 return subsys_virtual_register(&wq_subsys, NULL);
3164}
3165core_initcall(wq_sysfs_init);
3166
3167static void wq_device_release(struct device *dev)
3168{
3169 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
3170
3171 kfree(wq_dev);
3172}
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189int workqueue_sysfs_register(struct workqueue_struct *wq)
3190{
3191 struct wq_device *wq_dev;
3192 int ret;
3193
3194
3195
3196
3197
3198
3199 if (WARN_ON(wq->flags & __WQ_ORDERED))
3200 return -EINVAL;
3201
3202 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
3203 if (!wq_dev)
3204 return -ENOMEM;
3205
3206 wq_dev->wq = wq;
3207 wq_dev->dev.bus = &wq_subsys;
3208 wq_dev->dev.init_name = wq->name;
3209 wq_dev->dev.release = wq_device_release;
3210
3211
3212
3213
3214
3215 dev_set_uevent_suppress(&wq_dev->dev, true);
3216
3217 ret = device_register(&wq_dev->dev);
3218 if (ret) {
3219 kfree(wq_dev);
3220 wq->wq_dev = NULL;
3221 return ret;
3222 }
3223
3224 if (wq->flags & WQ_UNBOUND) {
3225 struct device_attribute *attr;
3226
3227 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
3228 ret = device_create_file(&wq_dev->dev, attr);
3229 if (ret) {
3230 device_unregister(&wq_dev->dev);
3231 wq->wq_dev = NULL;
3232 return ret;
3233 }
3234 }
3235 }
3236
3237 dev_set_uevent_suppress(&wq_dev->dev, false);
3238 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
3239 return 0;
3240}
3241
3242
3243
3244
3245
3246
3247
3248static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
3249{
3250 struct wq_device *wq_dev = wq->wq_dev;
3251
3252 if (!wq->wq_dev)
3253 return;
3254
3255 wq->wq_dev = NULL;
3256 device_unregister(&wq_dev->dev);
3257}
3258#else
3259static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
3260#endif
3261
3262
3263
3264
3265
3266
3267
3268void free_workqueue_attrs(struct workqueue_attrs *attrs)
3269{
3270 if (attrs) {
3271 free_cpumask_var(attrs->cpumask);
3272 kfree(attrs);
3273 }
3274}
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3286{
3287 struct workqueue_attrs *attrs;
3288
3289 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3290 if (!attrs)
3291 goto fail;
3292 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3293 goto fail;
3294
3295 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3296 return attrs;
3297fail:
3298 free_workqueue_attrs(attrs);
3299 return NULL;
3300}
3301
3302static void copy_workqueue_attrs(struct workqueue_attrs *to,
3303 const struct workqueue_attrs *from)
3304{
3305 to->nice = from->nice;
3306 cpumask_copy(to->cpumask, from->cpumask);
3307
3308
3309
3310
3311
3312 to->no_numa = from->no_numa;
3313}
3314
3315
3316static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3317{
3318 u32 hash = 0;
3319
3320 hash = jhash_1word(attrs->nice, hash);
3321 hash = jhash(cpumask_bits(attrs->cpumask),
3322 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3323 return hash;
3324}
3325
3326
3327static bool wqattrs_equal(const struct workqueue_attrs *a,
3328 const struct workqueue_attrs *b)
3329{
3330 if (a->nice != b->nice)
3331 return false;
3332 if (!cpumask_equal(a->cpumask, b->cpumask))
3333 return false;
3334 return true;
3335}
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347static int init_worker_pool(struct worker_pool *pool)
3348{
3349 spin_lock_init(&pool->lock);
3350 pool->id = -1;
3351 pool->cpu = -1;
3352 pool->node = NUMA_NO_NODE;
3353 pool->flags |= POOL_DISASSOCIATED;
3354 INIT_LIST_HEAD(&pool->worklist);
3355 INIT_LIST_HEAD(&pool->idle_list);
3356 hash_init(pool->busy_hash);
3357
3358 init_timer_deferrable(&pool->idle_timer);
3359 pool->idle_timer.function = idle_worker_timeout;
3360 pool->idle_timer.data = (unsigned long)pool;
3361
3362 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3363 (unsigned long)pool);
3364
3365 mutex_init(&pool->manager_arb);
3366 mutex_init(&pool->attach_mutex);
3367 INIT_LIST_HEAD(&pool->workers);
3368
3369 ida_init(&pool->worker_ida);
3370 INIT_HLIST_NODE(&pool->hash_node);
3371 pool->refcnt = 1;
3372
3373
3374 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3375 if (!pool->attrs)
3376 return -ENOMEM;
3377 return 0;
3378}
3379
3380static void rcu_free_pool(struct rcu_head *rcu)
3381{
3382 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3383
3384 ida_destroy(&pool->worker_ida);
3385 free_workqueue_attrs(pool->attrs);
3386 kfree(pool);
3387}
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400static void put_unbound_pool(struct worker_pool *pool)
3401{
3402 DECLARE_COMPLETION_ONSTACK(detach_completion);
3403 struct worker *worker;
3404
3405 lockdep_assert_held(&wq_pool_mutex);
3406
3407 if (--pool->refcnt)
3408 return;
3409
3410
3411 if (WARN_ON(!(pool->cpu < 0)) ||
3412 WARN_ON(!list_empty(&pool->worklist)))
3413 return;
3414
3415
3416 if (pool->id >= 0)
3417 idr_remove(&worker_pool_idr, pool->id);
3418 hash_del(&pool->hash_node);
3419
3420
3421
3422
3423
3424
3425 mutex_lock(&pool->manager_arb);
3426
3427 spin_lock_irq(&pool->lock);
3428 while ((worker = first_idle_worker(pool)))
3429 destroy_worker(worker);
3430 WARN_ON(pool->nr_workers || pool->nr_idle);
3431 spin_unlock_irq(&pool->lock);
3432
3433 mutex_lock(&pool->attach_mutex);
3434 if (!list_empty(&pool->workers))
3435 pool->detach_completion = &detach_completion;
3436 mutex_unlock(&pool->attach_mutex);
3437
3438 if (pool->detach_completion)
3439 wait_for_completion(pool->detach_completion);
3440
3441 mutex_unlock(&pool->manager_arb);
3442
3443
3444 del_timer_sync(&pool->idle_timer);
3445 del_timer_sync(&pool->mayday_timer);
3446
3447
3448 call_rcu_sched(&pool->rcu, rcu_free_pool);
3449}
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3466{
3467 u32 hash = wqattrs_hash(attrs);
3468 struct worker_pool *pool;
3469 int node;
3470
3471 lockdep_assert_held(&wq_pool_mutex);
3472
3473
3474 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3475 if (wqattrs_equal(pool->attrs, attrs)) {
3476 pool->refcnt++;
3477 return pool;
3478 }
3479 }
3480
3481
3482 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
3483 if (!pool || init_worker_pool(pool) < 0)
3484 goto fail;
3485
3486 lockdep_set_subclass(&pool->lock, 1);
3487 copy_workqueue_attrs(pool->attrs, attrs);
3488
3489
3490
3491
3492
3493 pool->attrs->no_numa = false;
3494
3495
3496 if (wq_numa_enabled) {
3497 for_each_node(node) {
3498 if (cpumask_subset(pool->attrs->cpumask,
3499 wq_numa_possible_cpumask[node])) {
3500 pool->node = node;
3501 break;
3502 }
3503 }
3504 }
3505
3506 if (worker_pool_assign_id(pool) < 0)
3507 goto fail;
3508
3509
3510 if (!create_worker(pool))
3511 goto fail;
3512
3513
3514 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3515
3516 return pool;
3517fail:
3518 if (pool)
3519 put_unbound_pool(pool);
3520 return NULL;
3521}
3522
3523static void rcu_free_pwq(struct rcu_head *rcu)
3524{
3525 kmem_cache_free(pwq_cache,
3526 container_of(rcu, struct pool_workqueue, rcu));
3527}
3528
3529
3530
3531
3532
3533static void pwq_unbound_release_workfn(struct work_struct *work)
3534{
3535 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3536 unbound_release_work);
3537 struct workqueue_struct *wq = pwq->wq;
3538 struct worker_pool *pool = pwq->pool;
3539 bool is_last;
3540
3541 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3542 return;
3543
3544 mutex_lock(&wq->mutex);
3545 list_del_rcu(&pwq->pwqs_node);
3546 is_last = list_empty(&wq->pwqs);
3547 mutex_unlock(&wq->mutex);
3548
3549 mutex_lock(&wq_pool_mutex);
3550 put_unbound_pool(pool);
3551 mutex_unlock(&wq_pool_mutex);
3552
3553 call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3554
3555
3556
3557
3558
3559 if (is_last) {
3560 free_workqueue_attrs(wq->unbound_attrs);
3561 kfree(wq);
3562 }
3563}
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3574{
3575 struct workqueue_struct *wq = pwq->wq;
3576 bool freezable = wq->flags & WQ_FREEZABLE;
3577
3578
3579 lockdep_assert_held(&wq->mutex);
3580
3581
3582 if (!freezable && pwq->max_active == wq->saved_max_active)
3583 return;
3584
3585 spin_lock_irq(&pwq->pool->lock);
3586
3587
3588
3589
3590
3591
3592 if (!freezable || !workqueue_freezing) {
3593 pwq->max_active = wq->saved_max_active;
3594
3595 while (!list_empty(&pwq->delayed_works) &&
3596 pwq->nr_active < pwq->max_active)
3597 pwq_activate_first_delayed(pwq);
3598
3599
3600
3601
3602
3603 wake_up_worker(pwq->pool);
3604 } else {
3605 pwq->max_active = 0;
3606 }
3607
3608 spin_unlock_irq(&pwq->pool->lock);
3609}
3610
3611
3612static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3613 struct worker_pool *pool)
3614{
3615 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3616
3617 memset(pwq, 0, sizeof(*pwq));
3618
3619 pwq->pool = pool;
3620 pwq->wq = wq;
3621 pwq->flush_color = -1;
3622 pwq->refcnt = 1;
3623 INIT_LIST_HEAD(&pwq->delayed_works);
3624 INIT_LIST_HEAD(&pwq->pwqs_node);
3625 INIT_LIST_HEAD(&pwq->mayday_node);
3626 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3627}
3628
3629
3630static void link_pwq(struct pool_workqueue *pwq)
3631{
3632 struct workqueue_struct *wq = pwq->wq;
3633
3634 lockdep_assert_held(&wq->mutex);
3635
3636
3637 if (!list_empty(&pwq->pwqs_node))
3638 return;
3639
3640
3641 pwq->work_color = wq->work_color;
3642
3643
3644 pwq_adjust_max_active(pwq);
3645
3646
3647 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3648}
3649
3650
3651static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3652 const struct workqueue_attrs *attrs)
3653{
3654 struct worker_pool *pool;
3655 struct pool_workqueue *pwq;
3656
3657 lockdep_assert_held(&wq_pool_mutex);
3658
3659 pool = get_unbound_pool(attrs);
3660 if (!pool)
3661 return NULL;
3662
3663 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3664 if (!pwq) {
3665 put_unbound_pool(pool);
3666 return NULL;
3667 }
3668
3669 init_pwq(pwq, wq, pool);
3670 return pwq;
3671}
3672
3673
3674static void free_unbound_pwq(struct pool_workqueue *pwq)
3675{
3676 lockdep_assert_held(&wq_pool_mutex);
3677
3678 if (pwq) {
3679 put_unbound_pool(pwq->pool);
3680 kmem_cache_free(pwq_cache, pwq);
3681 }
3682}
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3707 int cpu_going_down, cpumask_t *cpumask)
3708{
3709 if (!wq_numa_enabled || attrs->no_numa)
3710 goto use_dfl;
3711
3712
3713 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3714 if (cpu_going_down >= 0)
3715 cpumask_clear_cpu(cpu_going_down, cpumask);
3716
3717 if (cpumask_empty(cpumask))
3718 goto use_dfl;
3719
3720
3721 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3722 return !cpumask_equal(cpumask, attrs->cpumask);
3723
3724use_dfl:
3725 cpumask_copy(cpumask, attrs->cpumask);
3726 return false;
3727}
3728
3729
3730static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3731 int node,
3732 struct pool_workqueue *pwq)
3733{
3734 struct pool_workqueue *old_pwq;
3735
3736 lockdep_assert_held(&wq->mutex);
3737
3738
3739 link_pwq(pwq);
3740
3741 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3742 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3743 return old_pwq;
3744}
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762int apply_workqueue_attrs(struct workqueue_struct *wq,
3763 const struct workqueue_attrs *attrs)
3764{
3765 struct workqueue_attrs *new_attrs, *tmp_attrs;
3766 struct pool_workqueue **pwq_tbl, *dfl_pwq;
3767 int node, ret;
3768
3769
3770 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3771 return -EINVAL;
3772
3773
3774 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
3775 return -EINVAL;
3776
3777 pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);
3778 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3779 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3780 if (!pwq_tbl || !new_attrs || !tmp_attrs)
3781 goto enomem;
3782
3783
3784 copy_workqueue_attrs(new_attrs, attrs);
3785 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3786
3787
3788
3789
3790
3791
3792 copy_workqueue_attrs(tmp_attrs, new_attrs);
3793
3794
3795
3796
3797
3798
3799 get_online_cpus();
3800
3801 mutex_lock(&wq_pool_mutex);
3802
3803
3804
3805
3806
3807
3808 dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3809 if (!dfl_pwq)
3810 goto enomem_pwq;
3811
3812 for_each_node(node) {
3813 if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
3814 pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3815 if (!pwq_tbl[node])
3816 goto enomem_pwq;
3817 } else {
3818 dfl_pwq->refcnt++;
3819 pwq_tbl[node] = dfl_pwq;
3820 }
3821 }
3822
3823 mutex_unlock(&wq_pool_mutex);
3824
3825
3826 mutex_lock(&wq->mutex);
3827
3828 copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
3829
3830
3831 for_each_node(node)
3832 pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]);
3833
3834
3835 link_pwq(dfl_pwq);
3836 swap(wq->dfl_pwq, dfl_pwq);
3837
3838 mutex_unlock(&wq->mutex);
3839
3840
3841 for_each_node(node)
3842 put_pwq_unlocked(pwq_tbl[node]);
3843 put_pwq_unlocked(dfl_pwq);
3844
3845 put_online_cpus();
3846 ret = 0;
3847
3848out_free:
3849 free_workqueue_attrs(tmp_attrs);
3850 free_workqueue_attrs(new_attrs);
3851 kfree(pwq_tbl);
3852 return ret;
3853
3854enomem_pwq:
3855 free_unbound_pwq(dfl_pwq);
3856 for_each_node(node)
3857 if (pwq_tbl && pwq_tbl[node] != dfl_pwq)
3858 free_unbound_pwq(pwq_tbl[node]);
3859 mutex_unlock(&wq_pool_mutex);
3860 put_online_cpus();
3861enomem:
3862 ret = -ENOMEM;
3863 goto out_free;
3864}
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3889 bool online)
3890{
3891 int node = cpu_to_node(cpu);
3892 int cpu_off = online ? -1 : cpu;
3893 struct pool_workqueue *old_pwq = NULL, *pwq;
3894 struct workqueue_attrs *target_attrs;
3895 cpumask_t *cpumask;
3896
3897 lockdep_assert_held(&wq_pool_mutex);
3898
3899 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
3900 return;
3901
3902
3903
3904
3905
3906
3907 target_attrs = wq_update_unbound_numa_attrs_buf;
3908 cpumask = target_attrs->cpumask;
3909
3910 mutex_lock(&wq->mutex);
3911 if (wq->unbound_attrs->no_numa)
3912 goto out_unlock;
3913
3914 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3915 pwq = unbound_pwq_by_node(wq, node);
3916
3917
3918
3919
3920
3921
3922
3923 if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
3924 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3925 goto out_unlock;
3926 } else {
3927 goto use_dfl_pwq;
3928 }
3929
3930 mutex_unlock(&wq->mutex);
3931
3932
3933 pwq = alloc_unbound_pwq(wq, target_attrs);
3934 if (!pwq) {
3935 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3936 wq->name);
3937 mutex_lock(&wq->mutex);
3938 goto use_dfl_pwq;
3939 }
3940
3941
3942
3943
3944
3945
3946
3947 mutex_lock(&wq->mutex);
3948 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3949 goto out_unlock;
3950
3951use_dfl_pwq:
3952 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3953 get_pwq(wq->dfl_pwq);
3954 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
3955 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
3956out_unlock:
3957 mutex_unlock(&wq->mutex);
3958 put_pwq_unlocked(old_pwq);
3959}
3960
3961static int alloc_and_link_pwqs(struct workqueue_struct *wq)
3962{
3963 bool highpri = wq->flags & WQ_HIGHPRI;
3964 int cpu, ret;
3965
3966 if (!(wq->flags & WQ_UNBOUND)) {
3967 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
3968 if (!wq->cpu_pwqs)
3969 return -ENOMEM;
3970
3971 for_each_possible_cpu(cpu) {
3972 struct pool_workqueue *pwq =
3973 per_cpu_ptr(wq->cpu_pwqs, cpu);
3974 struct worker_pool *cpu_pools =
3975 per_cpu(cpu_worker_pools, cpu);
3976
3977 init_pwq(pwq, wq, &cpu_pools[highpri]);
3978
3979 mutex_lock(&wq->mutex);
3980 link_pwq(pwq);
3981 mutex_unlock(&wq->mutex);
3982 }
3983 return 0;
3984 } else if (wq->flags & __WQ_ORDERED) {
3985 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
3986
3987 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
3988 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
3989 "ordering guarantee broken for workqueue %s\n", wq->name);
3990 return ret;
3991 } else {
3992 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
3993 }
3994}
3995
3996static int wq_clamp_max_active(int max_active, unsigned int flags,
3997 const char *name)
3998{
3999 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4000
4001 if (max_active < 1 || max_active > lim)
4002 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4003 max_active, name, 1, lim);
4004
4005 return clamp_val(max_active, 1, lim);
4006}
4007
4008struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
4009 unsigned int flags,
4010 int max_active,
4011 struct lock_class_key *key,
4012 const char *lock_name, ...)
4013{
4014 size_t tbl_size = 0;
4015 va_list args;
4016 struct workqueue_struct *wq;
4017 struct pool_workqueue *pwq;
4018
4019
4020 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4021 flags |= WQ_UNBOUND;
4022
4023
4024 if (flags & WQ_UNBOUND)
4025 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4026
4027 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4028 if (!wq)
4029 return NULL;
4030
4031 if (flags & WQ_UNBOUND) {
4032 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4033 if (!wq->unbound_attrs)
4034 goto err_free_wq;
4035 }
4036
4037 va_start(args, lock_name);
4038 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4039 va_end(args);
4040
4041 max_active = max_active ?: WQ_DFL_ACTIVE;
4042 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4043
4044
4045 wq->flags = flags;
4046 wq->saved_max_active = max_active;
4047 mutex_init(&wq->mutex);
4048 atomic_set(&wq->nr_pwqs_to_flush, 0);
4049 INIT_LIST_HEAD(&wq->pwqs);
4050 INIT_LIST_HEAD(&wq->flusher_queue);
4051 INIT_LIST_HEAD(&wq->flusher_overflow);
4052 INIT_LIST_HEAD(&wq->maydays);
4053
4054 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4055 INIT_LIST_HEAD(&wq->list);
4056
4057 if (alloc_and_link_pwqs(wq) < 0)
4058 goto err_free_wq;
4059
4060
4061
4062
4063
4064 if (flags & WQ_MEM_RECLAIM) {
4065 struct worker *rescuer;
4066
4067 rescuer = alloc_worker(NUMA_NO_NODE);
4068 if (!rescuer)
4069 goto err_destroy;
4070
4071 rescuer->rescue_wq = wq;
4072 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
4073 wq->name);
4074 if (IS_ERR(rescuer->task)) {
4075 kfree(rescuer);
4076 goto err_destroy;
4077 }
4078
4079 wq->rescuer = rescuer;
4080 rescuer->task->flags |= PF_NO_SETAFFINITY;
4081 wake_up_process(rescuer->task);
4082 }
4083
4084 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4085 goto err_destroy;
4086
4087
4088
4089
4090
4091
4092 mutex_lock(&wq_pool_mutex);
4093
4094 mutex_lock(&wq->mutex);
4095 for_each_pwq(pwq, wq)
4096 pwq_adjust_max_active(pwq);
4097 mutex_unlock(&wq->mutex);
4098
4099 list_add(&wq->list, &workqueues);
4100
4101 mutex_unlock(&wq_pool_mutex);
4102
4103 return wq;
4104
4105err_free_wq:
4106 free_workqueue_attrs(wq->unbound_attrs);
4107 kfree(wq);
4108 return NULL;
4109err_destroy:
4110 destroy_workqueue(wq);
4111 return NULL;
4112}
4113EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4114
4115
4116
4117
4118
4119
4120
4121void destroy_workqueue(struct workqueue_struct *wq)
4122{
4123 struct pool_workqueue *pwq;
4124 int node;
4125
4126
4127 drain_workqueue(wq);
4128
4129
4130 mutex_lock(&wq->mutex);
4131 for_each_pwq(pwq, wq) {
4132 int i;
4133
4134 for (i = 0; i < WORK_NR_COLORS; i++) {
4135 if (WARN_ON(pwq->nr_in_flight[i])) {
4136 mutex_unlock(&wq->mutex);
4137 return;
4138 }
4139 }
4140
4141 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4142 WARN_ON(pwq->nr_active) ||
4143 WARN_ON(!list_empty(&pwq->delayed_works))) {
4144 mutex_unlock(&wq->mutex);
4145 return;
4146 }
4147 }
4148 mutex_unlock(&wq->mutex);
4149
4150
4151
4152
4153
4154 mutex_lock(&wq_pool_mutex);
4155 list_del_init(&wq->list);
4156 mutex_unlock(&wq_pool_mutex);
4157
4158 workqueue_sysfs_unregister(wq);
4159
4160 if (wq->rescuer) {
4161 kthread_stop(wq->rescuer->task);
4162 kfree(wq->rescuer);
4163 wq->rescuer = NULL;
4164 }
4165
4166 if (!(wq->flags & WQ_UNBOUND)) {
4167
4168
4169
4170
4171 free_percpu(wq->cpu_pwqs);
4172 kfree(wq);
4173 } else {
4174
4175
4176
4177
4178
4179 for_each_node(node) {
4180 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4181 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4182 put_pwq_unlocked(pwq);
4183 }
4184
4185
4186
4187
4188
4189 pwq = wq->dfl_pwq;
4190 wq->dfl_pwq = NULL;
4191 put_pwq_unlocked(pwq);
4192 }
4193}
4194EXPORT_SYMBOL_GPL(destroy_workqueue);
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4207{
4208 struct pool_workqueue *pwq;
4209
4210
4211 if (WARN_ON(wq->flags & __WQ_ORDERED))
4212 return;
4213
4214 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4215
4216 mutex_lock(&wq->mutex);
4217
4218 wq->saved_max_active = max_active;
4219
4220 for_each_pwq(pwq, wq)
4221 pwq_adjust_max_active(pwq);
4222
4223 mutex_unlock(&wq->mutex);
4224}
4225EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235bool current_is_workqueue_rescuer(void)
4236{
4237 struct worker *worker = current_wq_worker();
4238
4239 return worker && worker->rescue_wq;
4240}
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4261{
4262 struct pool_workqueue *pwq;
4263 bool ret;
4264
4265 rcu_read_lock_sched();
4266
4267 if (cpu == WORK_CPU_UNBOUND)
4268 cpu = smp_processor_id();
4269
4270 if (!(wq->flags & WQ_UNBOUND))
4271 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4272 else
4273 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4274
4275 ret = !list_empty(&pwq->delayed_works);
4276 rcu_read_unlock_sched();
4277
4278 return ret;
4279}
4280EXPORT_SYMBOL_GPL(workqueue_congested);
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293unsigned int work_busy(struct work_struct *work)
4294{
4295 struct worker_pool *pool;
4296 unsigned long flags;
4297 unsigned int ret = 0;
4298
4299 if (work_pending(work))
4300 ret |= WORK_BUSY_PENDING;
4301
4302 local_irq_save(flags);
4303 pool = get_work_pool(work);
4304 if (pool) {
4305 spin_lock(&pool->lock);
4306 if (find_worker_executing_work(pool, work))
4307 ret |= WORK_BUSY_RUNNING;
4308 spin_unlock(&pool->lock);
4309 }
4310 local_irq_restore(flags);
4311
4312 return ret;
4313}
4314EXPORT_SYMBOL_GPL(work_busy);
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326void set_worker_desc(const char *fmt, ...)
4327{
4328 struct worker *worker = current_wq_worker();
4329 va_list args;
4330
4331 if (worker) {
4332 va_start(args, fmt);
4333 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4334 va_end(args);
4335 worker->desc_valid = true;
4336 }
4337}
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352void print_worker_info(const char *log_lvl, struct task_struct *task)
4353{
4354 work_func_t *fn = NULL;
4355 char name[WQ_NAME_LEN] = { };
4356 char desc[WORKER_DESC_LEN] = { };
4357 struct pool_workqueue *pwq = NULL;
4358 struct workqueue_struct *wq = NULL;
4359 bool desc_valid = false;
4360 struct worker *worker;
4361
4362 if (!(task->flags & PF_WQ_WORKER))
4363 return;
4364
4365
4366
4367
4368
4369 worker = probe_kthread_data(task);
4370
4371
4372
4373
4374
4375 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4376 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4377 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4378 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4379
4380
4381 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4382 if (desc_valid)
4383 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4384
4385 if (fn || name[0] || desc[0]) {
4386 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4387 if (desc[0])
4388 pr_cont(" (%s)", desc);
4389 pr_cont("\n");
4390 }
4391}
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408static void wq_unbind_fn(struct work_struct *work)
4409{
4410 int cpu = smp_processor_id();
4411 struct worker_pool *pool;
4412 struct worker *worker;
4413
4414 for_each_cpu_worker_pool(pool, cpu) {
4415 mutex_lock(&pool->attach_mutex);
4416 spin_lock_irq(&pool->lock);
4417
4418
4419
4420
4421
4422
4423
4424
4425 for_each_pool_worker(worker, pool)
4426 worker->flags |= WORKER_UNBOUND;
4427
4428 pool->flags |= POOL_DISASSOCIATED;
4429
4430 spin_unlock_irq(&pool->lock);
4431 mutex_unlock(&pool->attach_mutex);
4432
4433
4434
4435
4436
4437
4438
4439 schedule();
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449 atomic_set(&pool->nr_running, 0);
4450
4451
4452
4453
4454
4455
4456 spin_lock_irq(&pool->lock);
4457 wake_up_worker(pool);
4458 spin_unlock_irq(&pool->lock);
4459 }
4460}
4461
4462
4463
4464
4465
4466
4467
4468static void rebind_workers(struct worker_pool *pool)
4469{
4470 struct worker *worker;
4471
4472 lockdep_assert_held(&pool->attach_mutex);
4473
4474
4475
4476
4477
4478
4479
4480
4481 for_each_pool_worker(worker, pool)
4482 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4483 pool->attrs->cpumask) < 0);
4484
4485 spin_lock_irq(&pool->lock);
4486 pool->flags &= ~POOL_DISASSOCIATED;
4487
4488 for_each_pool_worker(worker, pool) {
4489 unsigned int worker_flags = worker->flags;
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499 if (worker_flags & WORKER_IDLE)
4500 wake_up_process(worker->task);
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4518 worker_flags |= WORKER_REBOUND;
4519 worker_flags &= ~WORKER_UNBOUND;
4520 ACCESS_ONCE(worker->flags) = worker_flags;
4521 }
4522
4523 spin_unlock_irq(&pool->lock);
4524}
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4537{
4538 static cpumask_t cpumask;
4539 struct worker *worker;
4540
4541 lockdep_assert_held(&pool->attach_mutex);
4542
4543
4544 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4545 return;
4546
4547
4548 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
4549 if (cpumask_weight(&cpumask) != 1)
4550 return;
4551
4552
4553 for_each_pool_worker(worker, pool)
4554 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4555 pool->attrs->cpumask) < 0);
4556}
4557
4558
4559
4560
4561
4562static int workqueue_cpu_up_callback(struct notifier_block *nfb,
4563 unsigned long action,
4564 void *hcpu)
4565{
4566 int cpu = (unsigned long)hcpu;
4567 struct worker_pool *pool;
4568 struct workqueue_struct *wq;
4569 int pi;
4570
4571 switch (action & ~CPU_TASKS_FROZEN) {
4572 case CPU_UP_PREPARE:
4573 for_each_cpu_worker_pool(pool, cpu) {
4574 if (pool->nr_workers)
4575 continue;
4576 if (!create_worker(pool))
4577 return NOTIFY_BAD;
4578 }
4579 break;
4580
4581 case CPU_DOWN_FAILED:
4582 case CPU_ONLINE:
4583 mutex_lock(&wq_pool_mutex);
4584
4585 for_each_pool(pool, pi) {
4586 mutex_lock(&pool->attach_mutex);
4587
4588 if (pool->cpu == cpu)
4589 rebind_workers(pool);
4590 else if (pool->cpu < 0)
4591 restore_unbound_workers_cpumask(pool, cpu);
4592
4593 mutex_unlock(&pool->attach_mutex);
4594 }
4595
4596
4597 list_for_each_entry(wq, &workqueues, list)
4598 wq_update_unbound_numa(wq, cpu, true);
4599
4600 mutex_unlock(&wq_pool_mutex);
4601 break;
4602 }
4603 return NOTIFY_OK;
4604}
4605
4606
4607
4608
4609
4610static int workqueue_cpu_down_callback(struct notifier_block *nfb,
4611 unsigned long action,
4612 void *hcpu)
4613{
4614 int cpu = (unsigned long)hcpu;
4615 struct work_struct unbind_work;
4616 struct workqueue_struct *wq;
4617
4618 switch (action & ~CPU_TASKS_FROZEN) {
4619 case CPU_DOWN_PREPARE:
4620
4621 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
4622 queue_work_on(cpu, system_highpri_wq, &unbind_work);
4623
4624
4625 mutex_lock(&wq_pool_mutex);
4626 list_for_each_entry(wq, &workqueues, list)
4627 wq_update_unbound_numa(wq, cpu, false);
4628 mutex_unlock(&wq_pool_mutex);
4629
4630
4631 flush_work(&unbind_work);
4632 destroy_work_on_stack(&unbind_work);
4633 break;
4634 }
4635 return NOTIFY_OK;
4636}
4637
4638#ifdef CONFIG_SMP
4639
4640struct work_for_cpu {
4641 struct work_struct work;
4642 long (*fn)(void *);
4643 void *arg;
4644 long ret;
4645};
4646
4647static void work_for_cpu_fn(struct work_struct *work)
4648{
4649 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
4650
4651 wfc->ret = wfc->fn(wfc->arg);
4652}
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4666{
4667 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
4668
4669 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4670 schedule_work_on(cpu, &wfc.work);
4671 flush_work(&wfc.work);
4672 destroy_work_on_stack(&wfc.work);
4673 return wfc.ret;
4674}
4675EXPORT_SYMBOL_GPL(work_on_cpu);
4676#endif
4677
4678#ifdef CONFIG_FREEZER
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690void freeze_workqueues_begin(void)
4691{
4692 struct workqueue_struct *wq;
4693 struct pool_workqueue *pwq;
4694
4695 mutex_lock(&wq_pool_mutex);
4696
4697 WARN_ON_ONCE(workqueue_freezing);
4698 workqueue_freezing = true;
4699
4700 list_for_each_entry(wq, &workqueues, list) {
4701 mutex_lock(&wq->mutex);
4702 for_each_pwq(pwq, wq)
4703 pwq_adjust_max_active(pwq);
4704 mutex_unlock(&wq->mutex);
4705 }
4706
4707 mutex_unlock(&wq_pool_mutex);
4708}
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723bool freeze_workqueues_busy(void)
4724{
4725 bool busy = false;
4726 struct workqueue_struct *wq;
4727 struct pool_workqueue *pwq;
4728
4729 mutex_lock(&wq_pool_mutex);
4730
4731 WARN_ON_ONCE(!workqueue_freezing);
4732
4733 list_for_each_entry(wq, &workqueues, list) {
4734 if (!(wq->flags & WQ_FREEZABLE))
4735 continue;
4736
4737
4738
4739
4740 rcu_read_lock_sched();
4741 for_each_pwq(pwq, wq) {
4742 WARN_ON_ONCE(pwq->nr_active < 0);
4743 if (pwq->nr_active) {
4744 busy = true;
4745 rcu_read_unlock_sched();
4746 goto out_unlock;
4747 }
4748 }
4749 rcu_read_unlock_sched();
4750 }
4751out_unlock:
4752 mutex_unlock(&wq_pool_mutex);
4753 return busy;
4754}
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765void thaw_workqueues(void)
4766{
4767 struct workqueue_struct *wq;
4768 struct pool_workqueue *pwq;
4769
4770 mutex_lock(&wq_pool_mutex);
4771
4772 if (!workqueue_freezing)
4773 goto out_unlock;
4774
4775 workqueue_freezing = false;
4776
4777
4778 list_for_each_entry(wq, &workqueues, list) {
4779 mutex_lock(&wq->mutex);
4780 for_each_pwq(pwq, wq)
4781 pwq_adjust_max_active(pwq);
4782 mutex_unlock(&wq->mutex);
4783 }
4784
4785out_unlock:
4786 mutex_unlock(&wq_pool_mutex);
4787}
4788#endif
4789
4790static void __init wq_numa_init(void)
4791{
4792 cpumask_var_t *tbl;
4793 int node, cpu;
4794
4795 if (num_possible_nodes() <= 1)
4796 return;
4797
4798 if (wq_disable_numa) {
4799 pr_info("workqueue: NUMA affinity support disabled\n");
4800 return;
4801 }
4802
4803 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
4804 BUG_ON(!wq_update_unbound_numa_attrs_buf);
4805
4806
4807
4808
4809
4810
4811 tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
4812 BUG_ON(!tbl);
4813
4814 for_each_node(node)
4815 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
4816 node_online(node) ? node : NUMA_NO_NODE));
4817
4818 for_each_possible_cpu(cpu) {
4819 node = cpu_to_node(cpu);
4820 if (WARN_ON(node == NUMA_NO_NODE)) {
4821 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
4822
4823 return;
4824 }
4825 cpumask_set_cpu(cpu, tbl[node]);
4826 }
4827
4828 wq_numa_possible_cpumask = tbl;
4829 wq_numa_enabled = true;
4830}
4831
4832static int __init init_workqueues(void)
4833{
4834 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
4835 int i, cpu;
4836
4837 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
4838
4839 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
4840
4841 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
4842 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
4843
4844 wq_numa_init();
4845
4846
4847 for_each_possible_cpu(cpu) {
4848 struct worker_pool *pool;
4849
4850 i = 0;
4851 for_each_cpu_worker_pool(pool, cpu) {
4852 BUG_ON(init_worker_pool(pool));
4853 pool->cpu = cpu;
4854 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
4855 pool->attrs->nice = std_nice[i++];
4856 pool->node = cpu_to_node(cpu);
4857
4858
4859 mutex_lock(&wq_pool_mutex);
4860 BUG_ON(worker_pool_assign_id(pool));
4861 mutex_unlock(&wq_pool_mutex);
4862 }
4863 }
4864
4865
4866 for_each_online_cpu(cpu) {
4867 struct worker_pool *pool;
4868
4869 for_each_cpu_worker_pool(pool, cpu) {
4870 pool->flags &= ~POOL_DISASSOCIATED;
4871 BUG_ON(!create_worker(pool));
4872 }
4873 }
4874
4875
4876 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
4877 struct workqueue_attrs *attrs;
4878
4879 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
4880 attrs->nice = std_nice[i];
4881 unbound_std_wq_attrs[i] = attrs;
4882
4883
4884
4885
4886
4887
4888 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
4889 attrs->nice = std_nice[i];
4890 attrs->no_numa = true;
4891 ordered_wq_attrs[i] = attrs;
4892 }
4893
4894 system_wq = alloc_workqueue("events", 0, 0);
4895 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
4896 system_long_wq = alloc_workqueue("events_long", 0, 0);
4897 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
4898 WQ_UNBOUND_MAX_ACTIVE);
4899 system_freezable_wq = alloc_workqueue("events_freezable",
4900 WQ_FREEZABLE, 0);
4901 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
4902 WQ_POWER_EFFICIENT, 0);
4903 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
4904 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
4905 0);
4906 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
4907 !system_unbound_wq || !system_freezable_wq ||
4908 !system_power_efficient_wq ||
4909 !system_freezable_power_efficient_wq);
4910 return 0;
4911}
4912early_initcall(init_workqueues);
4913