1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include <linux/export.h>
28#include <linux/kernel.h>
29#include <linux/sched.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/completion.h>
33#include <linux/workqueue.h>
34#include <linux/slab.h>
35#include <linux/cpu.h>
36#include <linux/notifier.h>
37#include <linux/kthread.h>
38#include <linux/hardirq.h>
39#include <linux/mempolicy.h>
40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h>
43#include <linux/lockdep.h>
44#include <linux/idr.h>
45#include <linux/jhash.h>
46#include <linux/hashtable.h>
47#include <linux/rculist.h>
48#include <linux/nodemask.h>
49#include <linux/moduleparam.h>
50#include <linux/uaccess.h>
51
52#include "workqueue_internal.h"
53
54enum {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 POOL_DISASSOCIATED = 1 << 2,
72
73
74 WORKER_DIE = 1 << 1,
75 WORKER_IDLE = 1 << 2,
76 WORKER_PREP = 1 << 3,
77 WORKER_CPU_INTENSIVE = 1 << 6,
78 WORKER_UNBOUND = 1 << 7,
79 WORKER_REBOUND = 1 << 8,
80
81 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
82 WORKER_UNBOUND | WORKER_REBOUND,
83
84 NR_STD_WORKER_POOLS = 2,
85
86 UNBOUND_POOL_HASH_ORDER = 6,
87 BUSY_WORKER_HASH_ORDER = 6,
88
89 MAX_IDLE_WORKERS_RATIO = 4,
90 IDLE_WORKER_TIMEOUT = 300 * HZ,
91
92 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
93
94
95 MAYDAY_INTERVAL = HZ / 10,
96 CREATE_COOLDOWN = HZ,
97
98
99
100
101
102 RESCUER_NICE_LEVEL = MIN_NICE,
103 HIGHPRI_NICE_LEVEL = MIN_NICE,
104
105 WQ_NAME_LEN = 24,
106};
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139struct worker_pool {
140 spinlock_t lock;
141 int cpu;
142 int node;
143 int id;
144 unsigned int flags;
145
146 struct list_head worklist;
147 int nr_workers;
148
149
150 int nr_idle;
151
152 struct list_head idle_list;
153 struct timer_list idle_timer;
154 struct timer_list mayday_timer;
155
156
157 DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
158
159
160
161 struct mutex manager_arb;
162 struct mutex attach_mutex;
163 struct list_head workers;
164 struct completion *detach_completion;
165
166 struct ida worker_ida;
167
168 struct workqueue_attrs *attrs;
169 struct hlist_node hash_node;
170 int refcnt;
171
172
173
174
175
176
177 atomic_t nr_running ____cacheline_aligned_in_smp;
178
179
180
181
182
183 struct rcu_head rcu;
184} ____cacheline_aligned_in_smp;
185
186
187
188
189
190
191
192struct pool_workqueue {
193 struct worker_pool *pool;
194 struct workqueue_struct *wq;
195 int work_color;
196 int flush_color;
197 int refcnt;
198 int nr_in_flight[WORK_NR_COLORS];
199
200 int nr_active;
201 int max_active;
202 struct list_head delayed_works;
203 struct list_head pwqs_node;
204 struct list_head mayday_node;
205
206
207
208
209
210
211
212 struct work_struct unbound_release_work;
213 struct rcu_head rcu;
214} __aligned(1 << WORK_STRUCT_FLAG_BITS);
215
216
217
218
219struct wq_flusher {
220 struct list_head list;
221 int flush_color;
222 struct completion done;
223};
224
225struct wq_device;
226
227
228
229
230
231struct workqueue_struct {
232 struct list_head pwqs;
233 struct list_head list;
234
235 struct mutex mutex;
236 int work_color;
237 int flush_color;
238 atomic_t nr_pwqs_to_flush;
239 struct wq_flusher *first_flusher;
240 struct list_head flusher_queue;
241 struct list_head flusher_overflow;
242
243 struct list_head maydays;
244 struct worker *rescuer;
245
246 int nr_drainers;
247 int saved_max_active;
248
249 struct workqueue_attrs *unbound_attrs;
250 struct pool_workqueue *dfl_pwq;
251
252#ifdef CONFIG_SYSFS
253 struct wq_device *wq_dev;
254#endif
255#ifdef CONFIG_LOCKDEP
256 struct lockdep_map lockdep_map;
257#endif
258 char name[WQ_NAME_LEN];
259
260
261 unsigned int flags ____cacheline_aligned;
262 struct pool_workqueue __percpu *cpu_pwqs;
263 struct pool_workqueue __rcu *numa_pwq_tbl[];
264};
265
266static struct kmem_cache *pwq_cache;
267
268static cpumask_var_t *wq_numa_possible_cpumask;
269
270
271static bool wq_disable_numa;
272module_param_named(disable_numa, wq_disable_numa, bool, 0444);
273
274
275#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
276static bool wq_power_efficient = true;
277#else
278static bool wq_power_efficient;
279#endif
280
281module_param_named(power_efficient, wq_power_efficient, bool, 0444);
282
283static bool wq_numa_enabled;
284
285
286static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
287
288static DEFINE_MUTEX(wq_pool_mutex);
289static DEFINE_SPINLOCK(wq_mayday_lock);
290
291static LIST_HEAD(workqueues);
292static bool workqueue_freezing;
293
294
295static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
296 cpu_worker_pools);
297
298static DEFINE_IDR(worker_pool_idr);
299
300
301static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
302
303
304static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
305
306
307static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
308
309struct workqueue_struct *system_wq __read_mostly;
310EXPORT_SYMBOL(system_wq);
311struct workqueue_struct *system_highpri_wq __read_mostly;
312EXPORT_SYMBOL_GPL(system_highpri_wq);
313struct workqueue_struct *system_long_wq __read_mostly;
314EXPORT_SYMBOL_GPL(system_long_wq);
315struct workqueue_struct *system_unbound_wq __read_mostly;
316EXPORT_SYMBOL_GPL(system_unbound_wq);
317struct workqueue_struct *system_freezable_wq __read_mostly;
318EXPORT_SYMBOL_GPL(system_freezable_wq);
319struct workqueue_struct *system_power_efficient_wq __read_mostly;
320EXPORT_SYMBOL_GPL(system_power_efficient_wq);
321struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
322EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
323
324static int worker_thread(void *__worker);
325static void copy_workqueue_attrs(struct workqueue_attrs *to,
326 const struct workqueue_attrs *from);
327
328#define CREATE_TRACE_POINTS
329#include <trace/events/workqueue.h>
330
331#define assert_rcu_or_pool_mutex() \
332 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
333 lockdep_is_held(&wq_pool_mutex), \
334 "sched RCU or wq_pool_mutex should be held")
335
336#define assert_rcu_or_wq_mutex(wq) \
337 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
338 lockdep_is_held(&wq->mutex), \
339 "sched RCU or wq->mutex should be held")
340
341#define for_each_cpu_worker_pool(pool, cpu) \
342 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
343 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
344 (pool)++)
345
346
347
348
349
350
351
352
353
354
355
356
357
358#define for_each_pool(pool, pi) \
359 idr_for_each_entry(&worker_pool_idr, pool, pi) \
360 if (({ assert_rcu_or_pool_mutex(); false; })) { } \
361 else
362
363
364
365
366
367
368
369
370
371
372
373#define for_each_pool_worker(worker, pool) \
374 list_for_each_entry((worker), &(pool)->workers, node) \
375 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
376 else
377
378
379
380
381
382
383
384
385
386
387
388
389
390#define for_each_pwq(pwq, wq) \
391 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
392 if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
393 else
394
395#ifdef CONFIG_DEBUG_OBJECTS_WORK
396
397static struct debug_obj_descr work_debug_descr;
398
399static void *work_debug_hint(void *addr)
400{
401 return ((struct work_struct *) addr)->func;
402}
403
404
405
406
407
408static int work_fixup_init(void *addr, enum debug_obj_state state)
409{
410 struct work_struct *work = addr;
411
412 switch (state) {
413 case ODEBUG_STATE_ACTIVE:
414 cancel_work_sync(work);
415 debug_object_init(work, &work_debug_descr);
416 return 1;
417 default:
418 return 0;
419 }
420}
421
422
423
424
425
426
427static int work_fixup_activate(void *addr, enum debug_obj_state state)
428{
429 struct work_struct *work = addr;
430
431 switch (state) {
432
433 case ODEBUG_STATE_NOTAVAILABLE:
434
435
436
437
438
439 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
440 debug_object_init(work, &work_debug_descr);
441 debug_object_activate(work, &work_debug_descr);
442 return 0;
443 }
444 WARN_ON_ONCE(1);
445 return 0;
446
447 case ODEBUG_STATE_ACTIVE:
448 WARN_ON(1);
449
450 default:
451 return 0;
452 }
453}
454
455
456
457
458
459static int work_fixup_free(void *addr, enum debug_obj_state state)
460{
461 struct work_struct *work = addr;
462
463 switch (state) {
464 case ODEBUG_STATE_ACTIVE:
465 cancel_work_sync(work);
466 debug_object_free(work, &work_debug_descr);
467 return 1;
468 default:
469 return 0;
470 }
471}
472
473static struct debug_obj_descr work_debug_descr = {
474 .name = "work_struct",
475 .debug_hint = work_debug_hint,
476 .fixup_init = work_fixup_init,
477 .fixup_activate = work_fixup_activate,
478 .fixup_free = work_fixup_free,
479};
480
481static inline void debug_work_activate(struct work_struct *work)
482{
483 debug_object_activate(work, &work_debug_descr);
484}
485
486static inline void debug_work_deactivate(struct work_struct *work)
487{
488 debug_object_deactivate(work, &work_debug_descr);
489}
490
491void __init_work(struct work_struct *work, int onstack)
492{
493 if (onstack)
494 debug_object_init_on_stack(work, &work_debug_descr);
495 else
496 debug_object_init(work, &work_debug_descr);
497}
498EXPORT_SYMBOL_GPL(__init_work);
499
500void destroy_work_on_stack(struct work_struct *work)
501{
502 debug_object_free(work, &work_debug_descr);
503}
504EXPORT_SYMBOL_GPL(destroy_work_on_stack);
505
506void destroy_delayed_work_on_stack(struct delayed_work *work)
507{
508 destroy_timer_on_stack(&work->timer);
509 debug_object_free(&work->work, &work_debug_descr);
510}
511EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
512
513#else
514static inline void debug_work_activate(struct work_struct *work) { }
515static inline void debug_work_deactivate(struct work_struct *work) { }
516#endif
517
518
519
520
521
522
523
524
525static int worker_pool_assign_id(struct worker_pool *pool)
526{
527 int ret;
528
529 lockdep_assert_held(&wq_pool_mutex);
530
531 ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
532 GFP_KERNEL);
533 if (ret >= 0) {
534 pool->id = ret;
535 return 0;
536 }
537 return ret;
538}
539
540
541
542
543
544
545
546
547
548
549
550
551static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
552 int node)
553{
554 assert_rcu_or_wq_mutex(wq);
555 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
556}
557
558static unsigned int work_color_to_flags(int color)
559{
560 return color << WORK_STRUCT_COLOR_SHIFT;
561}
562
563static int get_work_color(struct work_struct *work)
564{
565 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
566 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
567}
568
569static int work_next_color(int color)
570{
571 return (color + 1) % WORK_NR_COLORS;
572}
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594static inline void set_work_data(struct work_struct *work, unsigned long data,
595 unsigned long flags)
596{
597 WARN_ON_ONCE(!work_pending(work));
598 atomic_long_set(&work->data, data | flags | work_static(work));
599}
600
601static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
602 unsigned long extra_flags)
603{
604 set_work_data(work, (unsigned long)pwq,
605 WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
606}
607
608static void set_work_pool_and_keep_pending(struct work_struct *work,
609 int pool_id)
610{
611 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
612 WORK_STRUCT_PENDING);
613}
614
615static void set_work_pool_and_clear_pending(struct work_struct *work,
616 int pool_id)
617{
618
619
620
621
622
623
624 smp_wmb();
625 set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
626}
627
628static void clear_work_data(struct work_struct *work)
629{
630 smp_wmb();
631 set_work_data(work, WORK_STRUCT_NO_POOL, 0);
632}
633
634static struct pool_workqueue *get_work_pwq(struct work_struct *work)
635{
636 unsigned long data = atomic_long_read(&work->data);
637
638 if (data & WORK_STRUCT_PWQ)
639 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
640 else
641 return NULL;
642}
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659static struct worker_pool *get_work_pool(struct work_struct *work)
660{
661 unsigned long data = atomic_long_read(&work->data);
662 int pool_id;
663
664 assert_rcu_or_pool_mutex();
665
666 if (data & WORK_STRUCT_PWQ)
667 return ((struct pool_workqueue *)
668 (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
669
670 pool_id = data >> WORK_OFFQ_POOL_SHIFT;
671 if (pool_id == WORK_OFFQ_POOL_NONE)
672 return NULL;
673
674 return idr_find(&worker_pool_idr, pool_id);
675}
676
677
678
679
680
681
682
683
684static int get_work_pool_id(struct work_struct *work)
685{
686 unsigned long data = atomic_long_read(&work->data);
687
688 if (data & WORK_STRUCT_PWQ)
689 return ((struct pool_workqueue *)
690 (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
691
692 return data >> WORK_OFFQ_POOL_SHIFT;
693}
694
695static void mark_work_canceling(struct work_struct *work)
696{
697 unsigned long pool_id = get_work_pool_id(work);
698
699 pool_id <<= WORK_OFFQ_POOL_SHIFT;
700 set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
701}
702
703static bool work_is_canceling(struct work_struct *work)
704{
705 unsigned long data = atomic_long_read(&work->data);
706
707 return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
708}
709
710
711
712
713
714
715
716static bool __need_more_worker(struct worker_pool *pool)
717{
718 return !atomic_read(&pool->nr_running);
719}
720
721
722
723
724
725
726
727
728
729static bool need_more_worker(struct worker_pool *pool)
730{
731 return !list_empty(&pool->worklist) && __need_more_worker(pool);
732}
733
734
735static bool may_start_working(struct worker_pool *pool)
736{
737 return pool->nr_idle;
738}
739
740
741static bool keep_working(struct worker_pool *pool)
742{
743 return !list_empty(&pool->worklist) &&
744 atomic_read(&pool->nr_running) <= 1;
745}
746
747
748static bool need_to_create_worker(struct worker_pool *pool)
749{
750 return need_more_worker(pool) && !may_start_working(pool);
751}
752
753
754static bool too_many_workers(struct worker_pool *pool)
755{
756 bool managing = mutex_is_locked(&pool->manager_arb);
757 int nr_idle = pool->nr_idle + managing;
758 int nr_busy = pool->nr_workers - nr_idle;
759
760 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
761}
762
763
764
765
766
767
768static struct worker *first_idle_worker(struct worker_pool *pool)
769{
770 if (unlikely(list_empty(&pool->idle_list)))
771 return NULL;
772
773 return list_first_entry(&pool->idle_list, struct worker, entry);
774}
775
776
777
778
779
780
781
782
783
784
785static void wake_up_worker(struct worker_pool *pool)
786{
787 struct worker *worker = first_idle_worker(pool);
788
789 if (likely(worker))
790 wake_up_process(worker->task);
791}
792
793
794
795
796
797
798
799
800
801
802
803
804void wq_worker_waking_up(struct task_struct *task, int cpu)
805{
806 struct worker *worker = kthread_data(task);
807
808 if (!(worker->flags & WORKER_NOT_RUNNING)) {
809 WARN_ON_ONCE(worker->pool->cpu != cpu);
810 atomic_inc(&worker->pool->nr_running);
811 }
812}
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
830{
831 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
832 struct worker_pool *pool;
833
834
835
836
837
838
839 if (worker->flags & WORKER_NOT_RUNNING)
840 return NULL;
841
842 pool = worker->pool;
843
844
845 if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
846 return NULL;
847
848
849
850
851
852
853
854
855
856
857
858
859 if (atomic_dec_and_test(&pool->nr_running) &&
860 !list_empty(&pool->worklist))
861 to_wakeup = first_idle_worker(pool);
862 return to_wakeup ? to_wakeup->task : NULL;
863}
864
865
866
867
868
869
870
871
872
873
874
875static inline void worker_set_flags(struct worker *worker, unsigned int flags)
876{
877 struct worker_pool *pool = worker->pool;
878
879 WARN_ON_ONCE(worker->task != current);
880
881
882 if ((flags & WORKER_NOT_RUNNING) &&
883 !(worker->flags & WORKER_NOT_RUNNING)) {
884 atomic_dec(&pool->nr_running);
885 }
886
887 worker->flags |= flags;
888}
889
890
891
892
893
894
895
896
897
898
899
900static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
901{
902 struct worker_pool *pool = worker->pool;
903 unsigned int oflags = worker->flags;
904
905 WARN_ON_ONCE(worker->task != current);
906
907 worker->flags &= ~flags;
908
909
910
911
912
913
914 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
915 if (!(worker->flags & WORKER_NOT_RUNNING))
916 atomic_inc(&pool->nr_running);
917}
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952static struct worker *find_worker_executing_work(struct worker_pool *pool,
953 struct work_struct *work)
954{
955 struct worker *worker;
956
957 hash_for_each_possible(pool->busy_hash, worker, hentry,
958 (unsigned long)work)
959 if (worker->current_work == work &&
960 worker->current_func == work->func)
961 return worker;
962
963 return NULL;
964}
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983static void move_linked_works(struct work_struct *work, struct list_head *head,
984 struct work_struct **nextp)
985{
986 struct work_struct *n;
987
988
989
990
991
992 list_for_each_entry_safe_from(work, n, NULL, entry) {
993 list_move_tail(&work->entry, head);
994 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
995 break;
996 }
997
998
999
1000
1001
1002
1003 if (nextp)
1004 *nextp = n;
1005}
1006
1007
1008
1009
1010
1011
1012
1013
1014static void get_pwq(struct pool_workqueue *pwq)
1015{
1016 lockdep_assert_held(&pwq->pool->lock);
1017 WARN_ON_ONCE(pwq->refcnt <= 0);
1018 pwq->refcnt++;
1019}
1020
1021
1022
1023
1024
1025
1026
1027
1028static void put_pwq(struct pool_workqueue *pwq)
1029{
1030 lockdep_assert_held(&pwq->pool->lock);
1031 if (likely(--pwq->refcnt))
1032 return;
1033 if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1034 return;
1035
1036
1037
1038
1039
1040
1041
1042
1043 schedule_work(&pwq->unbound_release_work);
1044}
1045
1046
1047
1048
1049
1050
1051
1052static void put_pwq_unlocked(struct pool_workqueue *pwq)
1053{
1054 if (pwq) {
1055
1056
1057
1058
1059 spin_lock_irq(&pwq->pool->lock);
1060 put_pwq(pwq);
1061 spin_unlock_irq(&pwq->pool->lock);
1062 }
1063}
1064
1065static void pwq_activate_delayed_work(struct work_struct *work)
1066{
1067 struct pool_workqueue *pwq = get_work_pwq(work);
1068
1069 trace_workqueue_activate_work(work);
1070 move_linked_works(work, &pwq->pool->worklist, NULL);
1071 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1072 pwq->nr_active++;
1073}
1074
1075static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
1076{
1077 struct work_struct *work = list_first_entry(&pwq->delayed_works,
1078 struct work_struct, entry);
1079
1080 pwq_activate_delayed_work(work);
1081}
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
1095{
1096
1097 if (color == WORK_NO_COLOR)
1098 goto out_put;
1099
1100 pwq->nr_in_flight[color]--;
1101
1102 pwq->nr_active--;
1103 if (!list_empty(&pwq->delayed_works)) {
1104
1105 if (pwq->nr_active < pwq->max_active)
1106 pwq_activate_first_delayed(pwq);
1107 }
1108
1109
1110 if (likely(pwq->flush_color != color))
1111 goto out_put;
1112
1113
1114 if (pwq->nr_in_flight[color])
1115 goto out_put;
1116
1117
1118 pwq->flush_color = -1;
1119
1120
1121
1122
1123
1124 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1125 complete(&pwq->wq->first_flusher->done);
1126out_put:
1127 put_pwq(pwq);
1128}
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1158 unsigned long *flags)
1159{
1160 struct worker_pool *pool;
1161 struct pool_workqueue *pwq;
1162
1163 local_irq_save(*flags);
1164
1165
1166 if (is_dwork) {
1167 struct delayed_work *dwork = to_delayed_work(work);
1168
1169
1170
1171
1172
1173
1174 if (likely(del_timer(&dwork->timer)))
1175 return 1;
1176 }
1177
1178
1179 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1180 return 0;
1181
1182
1183
1184
1185
1186 pool = get_work_pool(work);
1187 if (!pool)
1188 goto fail;
1189
1190 spin_lock(&pool->lock);
1191
1192
1193
1194
1195
1196
1197
1198
1199 pwq = get_work_pwq(work);
1200 if (pwq && pwq->pool == pool) {
1201 debug_work_deactivate(work);
1202
1203
1204
1205
1206
1207
1208
1209
1210 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
1211 pwq_activate_delayed_work(work);
1212
1213 list_del_init(&work->entry);
1214 pwq_dec_nr_in_flight(pwq, get_work_color(work));
1215
1216
1217 set_work_pool_and_keep_pending(work, pool->id);
1218
1219 spin_unlock(&pool->lock);
1220 return 1;
1221 }
1222 spin_unlock(&pool->lock);
1223fail:
1224 local_irq_restore(*flags);
1225 if (work_is_canceling(work))
1226 return -ENOENT;
1227 cpu_relax();
1228 return -EAGAIN;
1229}
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1245 struct list_head *head, unsigned int extra_flags)
1246{
1247 struct worker_pool *pool = pwq->pool;
1248
1249
1250 set_work_pwq(work, pwq, extra_flags);
1251 list_add_tail(&work->entry, head);
1252 get_pwq(pwq);
1253
1254
1255
1256
1257
1258
1259 smp_mb();
1260
1261 if (__need_more_worker(pool))
1262 wake_up_worker(pool);
1263}
1264
1265
1266
1267
1268
1269static bool is_chained_work(struct workqueue_struct *wq)
1270{
1271 struct worker *worker;
1272
1273 worker = current_wq_worker();
1274
1275
1276
1277
1278 return worker && worker->current_pwq->wq == wq;
1279}
1280
1281static void __queue_work(int cpu, struct workqueue_struct *wq,
1282 struct work_struct *work)
1283{
1284 struct pool_workqueue *pwq;
1285 struct worker_pool *last_pool;
1286 struct list_head *worklist;
1287 unsigned int work_flags;
1288 unsigned int req_cpu = cpu;
1289
1290
1291
1292
1293
1294
1295
1296 WARN_ON_ONCE(!irqs_disabled());
1297
1298 debug_work_activate(work);
1299
1300
1301 if (unlikely(wq->flags & __WQ_DRAINING) &&
1302 WARN_ON_ONCE(!is_chained_work(wq)))
1303 return;
1304retry:
1305 if (req_cpu == WORK_CPU_UNBOUND)
1306 cpu = raw_smp_processor_id();
1307
1308
1309 if (!(wq->flags & WQ_UNBOUND))
1310 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1311 else
1312 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1313
1314
1315
1316
1317
1318
1319 last_pool = get_work_pool(work);
1320 if (last_pool && last_pool != pwq->pool) {
1321 struct worker *worker;
1322
1323 spin_lock(&last_pool->lock);
1324
1325 worker = find_worker_executing_work(last_pool, work);
1326
1327 if (worker && worker->current_pwq->wq == wq) {
1328 pwq = worker->current_pwq;
1329 } else {
1330
1331 spin_unlock(&last_pool->lock);
1332 spin_lock(&pwq->pool->lock);
1333 }
1334 } else {
1335 spin_lock(&pwq->pool->lock);
1336 }
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346 if (unlikely(!pwq->refcnt)) {
1347 if (wq->flags & WQ_UNBOUND) {
1348 spin_unlock(&pwq->pool->lock);
1349 cpu_relax();
1350 goto retry;
1351 }
1352
1353 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1354 wq->name, cpu);
1355 }
1356
1357
1358 trace_workqueue_queue_work(req_cpu, pwq, work);
1359
1360 if (WARN_ON(!list_empty(&work->entry))) {
1361 spin_unlock(&pwq->pool->lock);
1362 return;
1363 }
1364
1365 pwq->nr_in_flight[pwq->work_color]++;
1366 work_flags = work_color_to_flags(pwq->work_color);
1367
1368 if (likely(pwq->nr_active < pwq->max_active)) {
1369 trace_workqueue_activate_work(work);
1370 pwq->nr_active++;
1371 worklist = &pwq->pool->worklist;
1372 } else {
1373 work_flags |= WORK_STRUCT_DELAYED;
1374 worklist = &pwq->delayed_works;
1375 }
1376
1377 insert_work(pwq, work, worklist, work_flags);
1378
1379 spin_unlock(&pwq->pool->lock);
1380}
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393bool queue_work_on(int cpu, struct workqueue_struct *wq,
1394 struct work_struct *work)
1395{
1396 bool ret = false;
1397 unsigned long flags;
1398
1399 local_irq_save(flags);
1400
1401 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1402 __queue_work(cpu, wq, work);
1403 ret = true;
1404 }
1405
1406 local_irq_restore(flags);
1407 return ret;
1408}
1409EXPORT_SYMBOL(queue_work_on);
1410
1411void delayed_work_timer_fn(unsigned long __data)
1412{
1413 struct delayed_work *dwork = (struct delayed_work *)__data;
1414
1415
1416 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1417}
1418EXPORT_SYMBOL(delayed_work_timer_fn);
1419
1420static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1421 struct delayed_work *dwork, unsigned long delay)
1422{
1423 struct timer_list *timer = &dwork->timer;
1424 struct work_struct *work = &dwork->work;
1425
1426 WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
1427 timer->data != (unsigned long)dwork);
1428 WARN_ON_ONCE(timer_pending(timer));
1429 WARN_ON_ONCE(!list_empty(&work->entry));
1430
1431
1432
1433
1434
1435
1436
1437 if (!delay) {
1438 __queue_work(cpu, wq, &dwork->work);
1439 return;
1440 }
1441
1442 timer_stats_timer_set_start_info(&dwork->timer);
1443
1444 dwork->wq = wq;
1445 dwork->cpu = cpu;
1446 timer->expires = jiffies + delay;
1447
1448 if (unlikely(cpu != WORK_CPU_UNBOUND))
1449 add_timer_on(timer, cpu);
1450 else
1451 add_timer(timer);
1452}
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1466 struct delayed_work *dwork, unsigned long delay)
1467{
1468 struct work_struct *work = &dwork->work;
1469 bool ret = false;
1470 unsigned long flags;
1471
1472
1473 local_irq_save(flags);
1474
1475 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1476 __queue_delayed_work(cpu, wq, dwork, delay);
1477 ret = true;
1478 }
1479
1480 local_irq_restore(flags);
1481 return ret;
1482}
1483EXPORT_SYMBOL(queue_delayed_work_on);
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1504 struct delayed_work *dwork, unsigned long delay)
1505{
1506 unsigned long flags;
1507 int ret;
1508
1509 do {
1510 ret = try_to_grab_pending(&dwork->work, true, &flags);
1511 } while (unlikely(ret == -EAGAIN));
1512
1513 if (likely(ret >= 0)) {
1514 __queue_delayed_work(cpu, wq, dwork, delay);
1515 local_irq_restore(flags);
1516 }
1517
1518
1519 return ret;
1520}
1521EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533static void worker_enter_idle(struct worker *worker)
1534{
1535 struct worker_pool *pool = worker->pool;
1536
1537 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1538 WARN_ON_ONCE(!list_empty(&worker->entry) &&
1539 (worker->hentry.next || worker->hentry.pprev)))
1540 return;
1541
1542
1543 worker->flags |= WORKER_IDLE;
1544 pool->nr_idle++;
1545 worker->last_active = jiffies;
1546
1547
1548 list_add(&worker->entry, &pool->idle_list);
1549
1550 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1551 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1552
1553
1554
1555
1556
1557
1558
1559 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1560 pool->nr_workers == pool->nr_idle &&
1561 atomic_read(&pool->nr_running));
1562}
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573static void worker_leave_idle(struct worker *worker)
1574{
1575 struct worker_pool *pool = worker->pool;
1576
1577 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
1578 return;
1579 worker_clr_flags(worker, WORKER_IDLE);
1580 pool->nr_idle--;
1581 list_del_init(&worker->entry);
1582}
1583
1584static struct worker *alloc_worker(int node)
1585{
1586 struct worker *worker;
1587
1588 worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
1589 if (worker) {
1590 INIT_LIST_HEAD(&worker->entry);
1591 INIT_LIST_HEAD(&worker->scheduled);
1592 INIT_LIST_HEAD(&worker->node);
1593
1594 worker->flags = WORKER_PREP;
1595 }
1596 return worker;
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608static void worker_attach_to_pool(struct worker *worker,
1609 struct worker_pool *pool)
1610{
1611 mutex_lock(&pool->attach_mutex);
1612
1613
1614
1615
1616
1617 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1618
1619
1620
1621
1622
1623
1624 if (pool->flags & POOL_DISASSOCIATED)
1625 worker->flags |= WORKER_UNBOUND;
1626
1627 list_add_tail(&worker->node, &pool->workers);
1628
1629 mutex_unlock(&pool->attach_mutex);
1630}
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641static void worker_detach_from_pool(struct worker *worker,
1642 struct worker_pool *pool)
1643{
1644 struct completion *detach_completion = NULL;
1645
1646 mutex_lock(&pool->attach_mutex);
1647 list_del(&worker->node);
1648 if (list_empty(&pool->workers))
1649 detach_completion = pool->detach_completion;
1650 mutex_unlock(&pool->attach_mutex);
1651
1652
1653 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
1654
1655 if (detach_completion)
1656 complete(detach_completion);
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671static struct worker *create_worker(struct worker_pool *pool)
1672{
1673 struct worker *worker = NULL;
1674 int id = -1;
1675 char id_buf[16];
1676
1677
1678 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1679 if (id < 0)
1680 goto fail;
1681
1682 worker = alloc_worker(pool->node);
1683 if (!worker)
1684 goto fail;
1685
1686 worker->pool = pool;
1687 worker->id = id;
1688
1689 if (pool->cpu >= 0)
1690 snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
1691 pool->attrs->nice < 0 ? "H" : "");
1692 else
1693 snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
1694
1695 worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
1696 "kworker/%s", id_buf);
1697 if (IS_ERR(worker->task))
1698 goto fail;
1699
1700 set_user_nice(worker->task, pool->attrs->nice);
1701
1702
1703 worker->task->flags |= PF_NO_SETAFFINITY;
1704
1705
1706 worker_attach_to_pool(worker, pool);
1707
1708
1709 spin_lock_irq(&pool->lock);
1710 worker->pool->nr_workers++;
1711 worker_enter_idle(worker);
1712 wake_up_process(worker->task);
1713 spin_unlock_irq(&pool->lock);
1714
1715 return worker;
1716
1717fail:
1718 if (id >= 0)
1719 ida_simple_remove(&pool->worker_ida, id);
1720 kfree(worker);
1721 return NULL;
1722}
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734static void destroy_worker(struct worker *worker)
1735{
1736 struct worker_pool *pool = worker->pool;
1737
1738 lockdep_assert_held(&pool->lock);
1739
1740
1741 if (WARN_ON(worker->current_work) ||
1742 WARN_ON(!list_empty(&worker->scheduled)) ||
1743 WARN_ON(!(worker->flags & WORKER_IDLE)))
1744 return;
1745
1746 pool->nr_workers--;
1747 pool->nr_idle--;
1748
1749 list_del_init(&worker->entry);
1750 worker->flags |= WORKER_DIE;
1751 wake_up_process(worker->task);
1752}
1753
1754static void idle_worker_timeout(unsigned long __pool)
1755{
1756 struct worker_pool *pool = (void *)__pool;
1757
1758 spin_lock_irq(&pool->lock);
1759
1760 while (too_many_workers(pool)) {
1761 struct worker *worker;
1762 unsigned long expires;
1763
1764
1765 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1766 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1767
1768 if (time_before(jiffies, expires)) {
1769 mod_timer(&pool->idle_timer, expires);
1770 break;
1771 }
1772
1773 destroy_worker(worker);
1774 }
1775
1776 spin_unlock_irq(&pool->lock);
1777}
1778
1779static void send_mayday(struct work_struct *work)
1780{
1781 struct pool_workqueue *pwq = get_work_pwq(work);
1782 struct workqueue_struct *wq = pwq->wq;
1783
1784 lockdep_assert_held(&wq_mayday_lock);
1785
1786 if (!wq->rescuer)
1787 return;
1788
1789
1790 if (list_empty(&pwq->mayday_node)) {
1791
1792
1793
1794
1795
1796 get_pwq(pwq);
1797 list_add_tail(&pwq->mayday_node, &wq->maydays);
1798 wake_up_process(wq->rescuer->task);
1799 }
1800}
1801
1802static void pool_mayday_timeout(unsigned long __pool)
1803{
1804 struct worker_pool *pool = (void *)__pool;
1805 struct work_struct *work;
1806
1807 spin_lock_irq(&wq_mayday_lock);
1808 spin_lock(&pool->lock);
1809
1810 if (need_to_create_worker(pool)) {
1811
1812
1813
1814
1815
1816
1817 list_for_each_entry(work, &pool->worklist, entry)
1818 send_mayday(work);
1819 }
1820
1821 spin_unlock(&pool->lock);
1822 spin_unlock_irq(&wq_mayday_lock);
1823
1824 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1825}
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849static bool maybe_create_worker(struct worker_pool *pool)
1850__releases(&pool->lock)
1851__acquires(&pool->lock)
1852{
1853 if (!need_to_create_worker(pool))
1854 return false;
1855restart:
1856 spin_unlock_irq(&pool->lock);
1857
1858
1859 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1860
1861 while (true) {
1862 if (create_worker(pool) || !need_to_create_worker(pool))
1863 break;
1864
1865 schedule_timeout_interruptible(CREATE_COOLDOWN);
1866
1867 if (!need_to_create_worker(pool))
1868 break;
1869 }
1870
1871 del_timer_sync(&pool->mayday_timer);
1872 spin_lock_irq(&pool->lock);
1873
1874
1875
1876
1877
1878 if (need_to_create_worker(pool))
1879 goto restart;
1880 return true;
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906static bool manage_workers(struct worker *worker)
1907{
1908 struct worker_pool *pool = worker->pool;
1909 bool ret = false;
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921 if (!mutex_trylock(&pool->manager_arb))
1922 return ret;
1923
1924 ret |= maybe_create_worker(pool);
1925
1926 mutex_unlock(&pool->manager_arb);
1927 return ret;
1928}
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944static void process_one_work(struct worker *worker, struct work_struct *work)
1945__releases(&pool->lock)
1946__acquires(&pool->lock)
1947{
1948 struct pool_workqueue *pwq = get_work_pwq(work);
1949 struct worker_pool *pool = worker->pool;
1950 bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
1951 int work_color;
1952 struct worker *collision;
1953#ifdef CONFIG_LOCKDEP
1954
1955
1956
1957
1958
1959
1960
1961 struct lockdep_map lockdep_map;
1962
1963 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
1964#endif
1965
1966 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
1967 raw_smp_processor_id() != pool->cpu);
1968
1969
1970
1971
1972
1973
1974
1975 collision = find_worker_executing_work(pool, work);
1976 if (unlikely(collision)) {
1977 move_linked_works(work, &collision->scheduled, NULL);
1978 return;
1979 }
1980
1981
1982 debug_work_deactivate(work);
1983 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
1984 worker->current_work = work;
1985 worker->current_func = work->func;
1986 worker->current_pwq = pwq;
1987 work_color = get_work_color(work);
1988
1989 list_del_init(&work->entry);
1990
1991
1992
1993
1994
1995
1996
1997 if (unlikely(cpu_intensive))
1998 worker_set_flags(worker, WORKER_CPU_INTENSIVE);
1999
2000
2001
2002
2003
2004
2005
2006
2007 if (need_more_worker(pool))
2008 wake_up_worker(pool);
2009
2010
2011
2012
2013
2014
2015
2016 set_work_pool_and_clear_pending(work, pool->id);
2017
2018 spin_unlock_irq(&pool->lock);
2019
2020 lock_map_acquire_read(&pwq->wq->lockdep_map);
2021 lock_map_acquire(&lockdep_map);
2022 trace_workqueue_execute_start(work);
2023 worker->current_func(work);
2024
2025
2026
2027
2028 trace_workqueue_execute_end(work);
2029 lock_map_release(&lockdep_map);
2030 lock_map_release(&pwq->wq->lockdep_map);
2031
2032 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2033 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2034 " last function: %pf\n",
2035 current->comm, preempt_count(), task_pid_nr(current),
2036 worker->current_func);
2037 debug_show_held_locks(current);
2038 dump_stack();
2039 }
2040
2041
2042
2043
2044
2045
2046
2047
2048 cond_resched();
2049
2050 spin_lock_irq(&pool->lock);
2051
2052
2053 if (unlikely(cpu_intensive))
2054 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2055
2056
2057 hash_del(&worker->hentry);
2058 worker->current_work = NULL;
2059 worker->current_func = NULL;
2060 worker->current_pwq = NULL;
2061 worker->desc_valid = false;
2062 pwq_dec_nr_in_flight(pwq, work_color);
2063}
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077static void process_scheduled_works(struct worker *worker)
2078{
2079 while (!list_empty(&worker->scheduled)) {
2080 struct work_struct *work = list_first_entry(&worker->scheduled,
2081 struct work_struct, entry);
2082 process_one_work(worker, work);
2083 }
2084}
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098static int worker_thread(void *__worker)
2099{
2100 struct worker *worker = __worker;
2101 struct worker_pool *pool = worker->pool;
2102
2103
2104 worker->task->flags |= PF_WQ_WORKER;
2105woke_up:
2106 spin_lock_irq(&pool->lock);
2107
2108
2109 if (unlikely(worker->flags & WORKER_DIE)) {
2110 spin_unlock_irq(&pool->lock);
2111 WARN_ON_ONCE(!list_empty(&worker->entry));
2112 worker->task->flags &= ~PF_WQ_WORKER;
2113
2114 set_task_comm(worker->task, "kworker/dying");
2115 ida_simple_remove(&pool->worker_ida, worker->id);
2116 worker_detach_from_pool(worker, pool);
2117 kfree(worker);
2118 return 0;
2119 }
2120
2121 worker_leave_idle(worker);
2122recheck:
2123
2124 if (!need_more_worker(pool))
2125 goto sleep;
2126
2127
2128 if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2129 goto recheck;
2130
2131
2132
2133
2134
2135
2136 WARN_ON_ONCE(!list_empty(&worker->scheduled));
2137
2138
2139
2140
2141
2142
2143
2144
2145 worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2146
2147 do {
2148 struct work_struct *work =
2149 list_first_entry(&pool->worklist,
2150 struct work_struct, entry);
2151
2152 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2153
2154 process_one_work(worker, work);
2155 if (unlikely(!list_empty(&worker->scheduled)))
2156 process_scheduled_works(worker);
2157 } else {
2158 move_linked_works(work, &worker->scheduled, NULL);
2159 process_scheduled_works(worker);
2160 }
2161 } while (keep_working(pool));
2162
2163 worker_set_flags(worker, WORKER_PREP);
2164sleep:
2165
2166
2167
2168
2169
2170
2171
2172 worker_enter_idle(worker);
2173 __set_current_state(TASK_INTERRUPTIBLE);
2174 spin_unlock_irq(&pool->lock);
2175 schedule();
2176 goto woke_up;
2177}
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200static int rescuer_thread(void *__rescuer)
2201{
2202 struct worker *rescuer = __rescuer;
2203 struct workqueue_struct *wq = rescuer->rescue_wq;
2204 struct list_head *scheduled = &rescuer->scheduled;
2205 bool should_stop;
2206
2207 set_user_nice(current, RESCUER_NICE_LEVEL);
2208
2209
2210
2211
2212
2213 rescuer->task->flags |= PF_WQ_WORKER;
2214repeat:
2215 set_current_state(TASK_INTERRUPTIBLE);
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225 should_stop = kthread_should_stop();
2226
2227
2228 spin_lock_irq(&wq_mayday_lock);
2229
2230 while (!list_empty(&wq->maydays)) {
2231 struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2232 struct pool_workqueue, mayday_node);
2233 struct worker_pool *pool = pwq->pool;
2234 struct work_struct *work, *n;
2235
2236 __set_current_state(TASK_RUNNING);
2237 list_del_init(&pwq->mayday_node);
2238
2239 spin_unlock_irq(&wq_mayday_lock);
2240
2241 worker_attach_to_pool(rescuer, pool);
2242
2243 spin_lock_irq(&pool->lock);
2244 rescuer->pool = pool;
2245
2246
2247
2248
2249
2250 WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
2251 list_for_each_entry_safe(work, n, &pool->worklist, entry)
2252 if (get_work_pwq(work) == pwq)
2253 move_linked_works(work, scheduled, &n);
2254
2255 process_scheduled_works(rescuer);
2256
2257
2258
2259
2260
2261 put_pwq(pwq);
2262
2263
2264
2265
2266
2267
2268 if (need_more_worker(pool))
2269 wake_up_worker(pool);
2270
2271 rescuer->pool = NULL;
2272 spin_unlock_irq(&pool->lock);
2273
2274 worker_detach_from_pool(rescuer, pool);
2275
2276 spin_lock_irq(&wq_mayday_lock);
2277 }
2278
2279 spin_unlock_irq(&wq_mayday_lock);
2280
2281 if (should_stop) {
2282 __set_current_state(TASK_RUNNING);
2283 rescuer->task->flags &= ~PF_WQ_WORKER;
2284 return 0;
2285 }
2286
2287
2288 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2289 schedule();
2290 goto repeat;
2291}
2292
2293struct wq_barrier {
2294 struct work_struct work;
2295 struct completion done;
2296};
2297
2298static void wq_barrier_func(struct work_struct *work)
2299{
2300 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2301 complete(&barr->done);
2302}
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328static void insert_wq_barrier(struct pool_workqueue *pwq,
2329 struct wq_barrier *barr,
2330 struct work_struct *target, struct worker *worker)
2331{
2332 struct list_head *head;
2333 unsigned int linked = 0;
2334
2335
2336
2337
2338
2339
2340
2341 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2342 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2343 init_completion(&barr->done);
2344
2345
2346
2347
2348
2349 if (worker)
2350 head = worker->scheduled.next;
2351 else {
2352 unsigned long *bits = work_data_bits(target);
2353
2354 head = target->entry.next;
2355
2356 linked = *bits & WORK_STRUCT_LINKED;
2357 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2358 }
2359
2360 debug_work_activate(&barr->work);
2361 insert_work(pwq, &barr->work, head,
2362 work_color_to_flags(WORK_NO_COLOR) | linked);
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
2397 int flush_color, int work_color)
2398{
2399 bool wait = false;
2400 struct pool_workqueue *pwq;
2401
2402 if (flush_color >= 0) {
2403 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
2404 atomic_set(&wq->nr_pwqs_to_flush, 1);
2405 }
2406
2407 for_each_pwq(pwq, wq) {
2408 struct worker_pool *pool = pwq->pool;
2409
2410 spin_lock_irq(&pool->lock);
2411
2412 if (flush_color >= 0) {
2413 WARN_ON_ONCE(pwq->flush_color != -1);
2414
2415 if (pwq->nr_in_flight[flush_color]) {
2416 pwq->flush_color = flush_color;
2417 atomic_inc(&wq->nr_pwqs_to_flush);
2418 wait = true;
2419 }
2420 }
2421
2422 if (work_color >= 0) {
2423 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
2424 pwq->work_color = work_color;
2425 }
2426
2427 spin_unlock_irq(&pool->lock);
2428 }
2429
2430 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
2431 complete(&wq->first_flusher->done);
2432
2433 return wait;
2434}
2435
2436
2437
2438
2439
2440
2441
2442
2443void flush_workqueue(struct workqueue_struct *wq)
2444{
2445 struct wq_flusher this_flusher = {
2446 .list = LIST_HEAD_INIT(this_flusher.list),
2447 .flush_color = -1,
2448 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2449 };
2450 int next_color;
2451
2452 lock_map_acquire(&wq->lockdep_map);
2453 lock_map_release(&wq->lockdep_map);
2454
2455 mutex_lock(&wq->mutex);
2456
2457
2458
2459
2460 next_color = work_next_color(wq->work_color);
2461
2462 if (next_color != wq->flush_color) {
2463
2464
2465
2466
2467
2468 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
2469 this_flusher.flush_color = wq->work_color;
2470 wq->work_color = next_color;
2471
2472 if (!wq->first_flusher) {
2473
2474 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2475
2476 wq->first_flusher = &this_flusher;
2477
2478 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
2479 wq->work_color)) {
2480
2481 wq->flush_color = next_color;
2482 wq->first_flusher = NULL;
2483 goto out_unlock;
2484 }
2485 } else {
2486
2487 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
2488 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2489 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2490 }
2491 } else {
2492
2493
2494
2495
2496
2497 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2498 }
2499
2500 mutex_unlock(&wq->mutex);
2501
2502 wait_for_completion(&this_flusher.done);
2503
2504
2505
2506
2507
2508
2509
2510 if (wq->first_flusher != &this_flusher)
2511 return;
2512
2513 mutex_lock(&wq->mutex);
2514
2515
2516 if (wq->first_flusher != &this_flusher)
2517 goto out_unlock;
2518
2519 wq->first_flusher = NULL;
2520
2521 WARN_ON_ONCE(!list_empty(&this_flusher.list));
2522 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
2523
2524 while (true) {
2525 struct wq_flusher *next, *tmp;
2526
2527
2528 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2529 if (next->flush_color != wq->flush_color)
2530 break;
2531 list_del_init(&next->list);
2532 complete(&next->done);
2533 }
2534
2535 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
2536 wq->flush_color != work_next_color(wq->work_color));
2537
2538
2539 wq->flush_color = work_next_color(wq->flush_color);
2540
2541
2542 if (!list_empty(&wq->flusher_overflow)) {
2543
2544
2545
2546
2547
2548
2549 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2550 tmp->flush_color = wq->work_color;
2551
2552 wq->work_color = work_next_color(wq->work_color);
2553
2554 list_splice_tail_init(&wq->flusher_overflow,
2555 &wq->flusher_queue);
2556 flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
2557 }
2558
2559 if (list_empty(&wq->flusher_queue)) {
2560 WARN_ON_ONCE(wq->flush_color != wq->work_color);
2561 break;
2562 }
2563
2564
2565
2566
2567
2568 WARN_ON_ONCE(wq->flush_color == wq->work_color);
2569 WARN_ON_ONCE(wq->flush_color != next->flush_color);
2570
2571 list_del_init(&next->list);
2572 wq->first_flusher = next;
2573
2574 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
2575 break;
2576
2577
2578
2579
2580
2581 wq->first_flusher = NULL;
2582 }
2583
2584out_unlock:
2585 mutex_unlock(&wq->mutex);
2586}
2587EXPORT_SYMBOL_GPL(flush_workqueue);
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600void drain_workqueue(struct workqueue_struct *wq)
2601{
2602 unsigned int flush_cnt = 0;
2603 struct pool_workqueue *pwq;
2604
2605
2606
2607
2608
2609
2610 mutex_lock(&wq->mutex);
2611 if (!wq->nr_drainers++)
2612 wq->flags |= __WQ_DRAINING;
2613 mutex_unlock(&wq->mutex);
2614reflush:
2615 flush_workqueue(wq);
2616
2617 mutex_lock(&wq->mutex);
2618
2619 for_each_pwq(pwq, wq) {
2620 bool drained;
2621
2622 spin_lock_irq(&pwq->pool->lock);
2623 drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
2624 spin_unlock_irq(&pwq->pool->lock);
2625
2626 if (drained)
2627 continue;
2628
2629 if (++flush_cnt == 10 ||
2630 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2631 pr_warn("workqueue %s: drain_workqueue() isn't complete after %u tries\n",
2632 wq->name, flush_cnt);
2633
2634 mutex_unlock(&wq->mutex);
2635 goto reflush;
2636 }
2637
2638 if (!--wq->nr_drainers)
2639 wq->flags &= ~__WQ_DRAINING;
2640 mutex_unlock(&wq->mutex);
2641}
2642EXPORT_SYMBOL_GPL(drain_workqueue);
2643
2644static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
2645{
2646 struct worker *worker = NULL;
2647 struct worker_pool *pool;
2648 struct pool_workqueue *pwq;
2649
2650 might_sleep();
2651
2652 local_irq_disable();
2653 pool = get_work_pool(work);
2654 if (!pool) {
2655 local_irq_enable();
2656 return false;
2657 }
2658
2659 spin_lock(&pool->lock);
2660
2661 pwq = get_work_pwq(work);
2662 if (pwq) {
2663 if (unlikely(pwq->pool != pool))
2664 goto already_gone;
2665 } else {
2666 worker = find_worker_executing_work(pool, work);
2667 if (!worker)
2668 goto already_gone;
2669 pwq = worker->current_pwq;
2670 }
2671
2672 insert_wq_barrier(pwq, barr, work, worker);
2673 spin_unlock_irq(&pool->lock);
2674
2675
2676
2677
2678
2679
2680
2681 if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
2682 lock_map_acquire(&pwq->wq->lockdep_map);
2683 else
2684 lock_map_acquire_read(&pwq->wq->lockdep_map);
2685 lock_map_release(&pwq->wq->lockdep_map);
2686
2687 return true;
2688already_gone:
2689 spin_unlock_irq(&pool->lock);
2690 return false;
2691}
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704bool flush_work(struct work_struct *work)
2705{
2706 struct wq_barrier barr;
2707
2708 lock_map_acquire(&work->lockdep_map);
2709 lock_map_release(&work->lockdep_map);
2710
2711 if (start_flush_work(work, &barr)) {
2712 wait_for_completion(&barr.done);
2713 destroy_work_on_stack(&barr.work);
2714 return true;
2715 } else {
2716 return false;
2717 }
2718}
2719EXPORT_SYMBOL_GPL(flush_work);
2720
2721static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2722{
2723 unsigned long flags;
2724 int ret;
2725
2726 do {
2727 ret = try_to_grab_pending(work, is_dwork, &flags);
2728
2729
2730
2731
2732 if (unlikely(ret == -ENOENT))
2733 flush_work(work);
2734 } while (unlikely(ret < 0));
2735
2736
2737 mark_work_canceling(work);
2738 local_irq_restore(flags);
2739
2740 flush_work(work);
2741 clear_work_data(work);
2742 return ret;
2743}
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763bool cancel_work_sync(struct work_struct *work)
2764{
2765 return __cancel_work_timer(work, false);
2766}
2767EXPORT_SYMBOL_GPL(cancel_work_sync);
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781bool flush_delayed_work(struct delayed_work *dwork)
2782{
2783 local_irq_disable();
2784 if (del_timer_sync(&dwork->timer))
2785 __queue_work(dwork->cpu, dwork->wq, &dwork->work);
2786 local_irq_enable();
2787 return flush_work(&dwork->work);
2788}
2789EXPORT_SYMBOL(flush_delayed_work);
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807bool cancel_delayed_work(struct delayed_work *dwork)
2808{
2809 unsigned long flags;
2810 int ret;
2811
2812 do {
2813 ret = try_to_grab_pending(&dwork->work, true, &flags);
2814 } while (unlikely(ret == -EAGAIN));
2815
2816 if (unlikely(ret < 0))
2817 return false;
2818
2819 set_work_pool_and_clear_pending(&dwork->work,
2820 get_work_pool_id(&dwork->work));
2821 local_irq_restore(flags);
2822 return ret;
2823}
2824EXPORT_SYMBOL(cancel_delayed_work);
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835bool cancel_delayed_work_sync(struct delayed_work *dwork)
2836{
2837 return __cancel_work_timer(&dwork->work, true);
2838}
2839EXPORT_SYMBOL(cancel_delayed_work_sync);
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852int schedule_on_each_cpu(work_func_t func)
2853{
2854 int cpu;
2855 struct work_struct __percpu *works;
2856
2857 works = alloc_percpu(struct work_struct);
2858 if (!works)
2859 return -ENOMEM;
2860
2861 get_online_cpus();
2862
2863 for_each_online_cpu(cpu) {
2864 struct work_struct *work = per_cpu_ptr(works, cpu);
2865
2866 INIT_WORK(work, func);
2867 schedule_work_on(cpu, work);
2868 }
2869
2870 for_each_online_cpu(cpu)
2871 flush_work(per_cpu_ptr(works, cpu));
2872
2873 put_online_cpus();
2874 free_percpu(works);
2875 return 0;
2876}
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902void flush_scheduled_work(void)
2903{
2904 flush_workqueue(system_wq);
2905}
2906EXPORT_SYMBOL(flush_scheduled_work);
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920int execute_in_process_context(work_func_t fn, struct execute_work *ew)
2921{
2922 if (!in_interrupt()) {
2923 fn(&ew->work);
2924 return 0;
2925 }
2926
2927 INIT_WORK(&ew->work, fn);
2928 schedule_work(&ew->work);
2929
2930 return 1;
2931}
2932EXPORT_SYMBOL_GPL(execute_in_process_context);
2933
2934#ifdef CONFIG_SYSFS
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949struct wq_device {
2950 struct workqueue_struct *wq;
2951 struct device dev;
2952};
2953
2954static struct workqueue_struct *dev_to_wq(struct device *dev)
2955{
2956 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
2957
2958 return wq_dev->wq;
2959}
2960
2961static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
2962 char *buf)
2963{
2964 struct workqueue_struct *wq = dev_to_wq(dev);
2965
2966 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
2967}
2968static DEVICE_ATTR_RO(per_cpu);
2969
2970static ssize_t max_active_show(struct device *dev,
2971 struct device_attribute *attr, char *buf)
2972{
2973 struct workqueue_struct *wq = dev_to_wq(dev);
2974
2975 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
2976}
2977
2978static ssize_t max_active_store(struct device *dev,
2979 struct device_attribute *attr, const char *buf,
2980 size_t count)
2981{
2982 struct workqueue_struct *wq = dev_to_wq(dev);
2983 int val;
2984
2985 if (sscanf(buf, "%d", &val) != 1 || val <= 0)
2986 return -EINVAL;
2987
2988 workqueue_set_max_active(wq, val);
2989 return count;
2990}
2991static DEVICE_ATTR_RW(max_active);
2992
2993static struct attribute *wq_sysfs_attrs[] = {
2994 &dev_attr_per_cpu.attr,
2995 &dev_attr_max_active.attr,
2996 NULL,
2997};
2998ATTRIBUTE_GROUPS(wq_sysfs);
2999
3000static ssize_t wq_pool_ids_show(struct device *dev,
3001 struct device_attribute *attr, char *buf)
3002{
3003 struct workqueue_struct *wq = dev_to_wq(dev);
3004 const char *delim = "";
3005 int node, written = 0;
3006
3007 rcu_read_lock_sched();
3008 for_each_node(node) {
3009 written += scnprintf(buf + written, PAGE_SIZE - written,
3010 "%s%d:%d", delim, node,
3011 unbound_pwq_by_node(wq, node)->pool->id);
3012 delim = " ";
3013 }
3014 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3015 rcu_read_unlock_sched();
3016
3017 return written;
3018}
3019
3020static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
3021 char *buf)
3022{
3023 struct workqueue_struct *wq = dev_to_wq(dev);
3024 int written;
3025
3026 mutex_lock(&wq->mutex);
3027 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
3028 mutex_unlock(&wq->mutex);
3029
3030 return written;
3031}
3032
3033
3034static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
3035{
3036 struct workqueue_attrs *attrs;
3037
3038 attrs = alloc_workqueue_attrs(GFP_KERNEL);
3039 if (!attrs)
3040 return NULL;
3041
3042 mutex_lock(&wq->mutex);
3043 copy_workqueue_attrs(attrs, wq->unbound_attrs);
3044 mutex_unlock(&wq->mutex);
3045 return attrs;
3046}
3047
3048static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
3049 const char *buf, size_t count)
3050{
3051 struct workqueue_struct *wq = dev_to_wq(dev);
3052 struct workqueue_attrs *attrs;
3053 int ret;
3054
3055 attrs = wq_sysfs_prep_attrs(wq);
3056 if (!attrs)
3057 return -ENOMEM;
3058
3059 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
3060 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
3061 ret = apply_workqueue_attrs(wq, attrs);
3062 else
3063 ret = -EINVAL;
3064
3065 free_workqueue_attrs(attrs);
3066 return ret ?: count;
3067}
3068
3069static ssize_t wq_cpumask_show(struct device *dev,
3070 struct device_attribute *attr, char *buf)
3071{
3072 struct workqueue_struct *wq = dev_to_wq(dev);
3073 int written;
3074
3075 mutex_lock(&wq->mutex);
3076 written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask);
3077 mutex_unlock(&wq->mutex);
3078
3079 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3080 return written;
3081}
3082
3083static ssize_t wq_cpumask_store(struct device *dev,
3084 struct device_attribute *attr,
3085 const char *buf, size_t count)
3086{
3087 struct workqueue_struct *wq = dev_to_wq(dev);
3088 struct workqueue_attrs *attrs;
3089 int ret;
3090
3091 attrs = wq_sysfs_prep_attrs(wq);
3092 if (!attrs)
3093 return -ENOMEM;
3094
3095 ret = cpumask_parse(buf, attrs->cpumask);
3096 if (!ret)
3097 ret = apply_workqueue_attrs(wq, attrs);
3098
3099 free_workqueue_attrs(attrs);
3100 return ret ?: count;
3101}
3102
3103static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
3104 char *buf)
3105{
3106 struct workqueue_struct *wq = dev_to_wq(dev);
3107 int written;
3108
3109 mutex_lock(&wq->mutex);
3110 written = scnprintf(buf, PAGE_SIZE, "%d\n",
3111 !wq->unbound_attrs->no_numa);
3112 mutex_unlock(&wq->mutex);
3113
3114 return written;
3115}
3116
3117static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
3118 const char *buf, size_t count)
3119{
3120 struct workqueue_struct *wq = dev_to_wq(dev);
3121 struct workqueue_attrs *attrs;
3122 int v, ret;
3123
3124 attrs = wq_sysfs_prep_attrs(wq);
3125 if (!attrs)
3126 return -ENOMEM;
3127
3128 ret = -EINVAL;
3129 if (sscanf(buf, "%d", &v) == 1) {
3130 attrs->no_numa = !v;
3131 ret = apply_workqueue_attrs(wq, attrs);
3132 }
3133
3134 free_workqueue_attrs(attrs);
3135 return ret ?: count;
3136}
3137
3138static struct device_attribute wq_sysfs_unbound_attrs[] = {
3139 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
3140 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
3141 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
3142 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
3143 __ATTR_NULL,
3144};
3145
3146static struct bus_type wq_subsys = {
3147 .name = "workqueue",
3148 .dev_groups = wq_sysfs_groups,
3149};
3150
3151static int __init wq_sysfs_init(void)
3152{
3153 return subsys_virtual_register(&wq_subsys, NULL);
3154}
3155core_initcall(wq_sysfs_init);
3156
3157static void wq_device_release(struct device *dev)
3158{
3159 struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
3160
3161 kfree(wq_dev);
3162}
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179int workqueue_sysfs_register(struct workqueue_struct *wq)
3180{
3181 struct wq_device *wq_dev;
3182 int ret;
3183
3184
3185
3186
3187
3188
3189 if (WARN_ON(wq->flags & __WQ_ORDERED))
3190 return -EINVAL;
3191
3192 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
3193 if (!wq_dev)
3194 return -ENOMEM;
3195
3196 wq_dev->wq = wq;
3197 wq_dev->dev.bus = &wq_subsys;
3198 wq_dev->dev.init_name = wq->name;
3199 wq_dev->dev.release = wq_device_release;
3200
3201
3202
3203
3204
3205 dev_set_uevent_suppress(&wq_dev->dev, true);
3206
3207 ret = device_register(&wq_dev->dev);
3208 if (ret) {
3209 kfree(wq_dev);
3210 wq->wq_dev = NULL;
3211 return ret;
3212 }
3213
3214 if (wq->flags & WQ_UNBOUND) {
3215 struct device_attribute *attr;
3216
3217 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
3218 ret = device_create_file(&wq_dev->dev, attr);
3219 if (ret) {
3220 device_unregister(&wq_dev->dev);
3221 wq->wq_dev = NULL;
3222 return ret;
3223 }
3224 }
3225 }
3226
3227 dev_set_uevent_suppress(&wq_dev->dev, false);
3228 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
3229 return 0;
3230}
3231
3232
3233
3234
3235
3236
3237
3238static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
3239{
3240 struct wq_device *wq_dev = wq->wq_dev;
3241
3242 if (!wq->wq_dev)
3243 return;
3244
3245 wq->wq_dev = NULL;
3246 device_unregister(&wq_dev->dev);
3247}
3248#else
3249static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
3250#endif
3251
3252
3253
3254
3255
3256
3257
3258void free_workqueue_attrs(struct workqueue_attrs *attrs)
3259{
3260 if (attrs) {
3261 free_cpumask_var(attrs->cpumask);
3262 kfree(attrs);
3263 }
3264}
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
3276{
3277 struct workqueue_attrs *attrs;
3278
3279 attrs = kzalloc(sizeof(*attrs), gfp_mask);
3280 if (!attrs)
3281 goto fail;
3282 if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
3283 goto fail;
3284
3285 cpumask_copy(attrs->cpumask, cpu_possible_mask);
3286 return attrs;
3287fail:
3288 free_workqueue_attrs(attrs);
3289 return NULL;
3290}
3291
3292static void copy_workqueue_attrs(struct workqueue_attrs *to,
3293 const struct workqueue_attrs *from)
3294{
3295 to->nice = from->nice;
3296 cpumask_copy(to->cpumask, from->cpumask);
3297
3298
3299
3300
3301
3302 to->no_numa = from->no_numa;
3303}
3304
3305
3306static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3307{
3308 u32 hash = 0;
3309
3310 hash = jhash_1word(attrs->nice, hash);
3311 hash = jhash(cpumask_bits(attrs->cpumask),
3312 BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3313 return hash;
3314}
3315
3316
3317static bool wqattrs_equal(const struct workqueue_attrs *a,
3318 const struct workqueue_attrs *b)
3319{
3320 if (a->nice != b->nice)
3321 return false;
3322 if (!cpumask_equal(a->cpumask, b->cpumask))
3323 return false;
3324 return true;
3325}
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337static int init_worker_pool(struct worker_pool *pool)
3338{
3339 spin_lock_init(&pool->lock);
3340 pool->id = -1;
3341 pool->cpu = -1;
3342 pool->node = NUMA_NO_NODE;
3343 pool->flags |= POOL_DISASSOCIATED;
3344 INIT_LIST_HEAD(&pool->worklist);
3345 INIT_LIST_HEAD(&pool->idle_list);
3346 hash_init(pool->busy_hash);
3347
3348 init_timer_deferrable(&pool->idle_timer);
3349 pool->idle_timer.function = idle_worker_timeout;
3350 pool->idle_timer.data = (unsigned long)pool;
3351
3352 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3353 (unsigned long)pool);
3354
3355 mutex_init(&pool->manager_arb);
3356 mutex_init(&pool->attach_mutex);
3357 INIT_LIST_HEAD(&pool->workers);
3358
3359 ida_init(&pool->worker_ida);
3360 INIT_HLIST_NODE(&pool->hash_node);
3361 pool->refcnt = 1;
3362
3363
3364 pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
3365 if (!pool->attrs)
3366 return -ENOMEM;
3367 return 0;
3368}
3369
3370static void rcu_free_pool(struct rcu_head *rcu)
3371{
3372 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3373
3374 ida_destroy(&pool->worker_ida);
3375 free_workqueue_attrs(pool->attrs);
3376 kfree(pool);
3377}
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390static void put_unbound_pool(struct worker_pool *pool)
3391{
3392 DECLARE_COMPLETION_ONSTACK(detach_completion);
3393 struct worker *worker;
3394
3395 lockdep_assert_held(&wq_pool_mutex);
3396
3397 if (--pool->refcnt)
3398 return;
3399
3400
3401 if (WARN_ON(!(pool->cpu < 0)) ||
3402 WARN_ON(!list_empty(&pool->worklist)))
3403 return;
3404
3405
3406 if (pool->id >= 0)
3407 idr_remove(&worker_pool_idr, pool->id);
3408 hash_del(&pool->hash_node);
3409
3410
3411
3412
3413
3414
3415 mutex_lock(&pool->manager_arb);
3416
3417 spin_lock_irq(&pool->lock);
3418 while ((worker = first_idle_worker(pool)))
3419 destroy_worker(worker);
3420 WARN_ON(pool->nr_workers || pool->nr_idle);
3421 spin_unlock_irq(&pool->lock);
3422
3423 mutex_lock(&pool->attach_mutex);
3424 if (!list_empty(&pool->workers))
3425 pool->detach_completion = &detach_completion;
3426 mutex_unlock(&pool->attach_mutex);
3427
3428 if (pool->detach_completion)
3429 wait_for_completion(pool->detach_completion);
3430
3431 mutex_unlock(&pool->manager_arb);
3432
3433
3434 del_timer_sync(&pool->idle_timer);
3435 del_timer_sync(&pool->mayday_timer);
3436
3437
3438 call_rcu_sched(&pool->rcu, rcu_free_pool);
3439}
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3456{
3457 u32 hash = wqattrs_hash(attrs);
3458 struct worker_pool *pool;
3459 int node;
3460
3461 lockdep_assert_held(&wq_pool_mutex);
3462
3463
3464 hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3465 if (wqattrs_equal(pool->attrs, attrs)) {
3466 pool->refcnt++;
3467 return pool;
3468 }
3469 }
3470
3471
3472 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
3473 if (!pool || init_worker_pool(pool) < 0)
3474 goto fail;
3475
3476 lockdep_set_subclass(&pool->lock, 1);
3477 copy_workqueue_attrs(pool->attrs, attrs);
3478
3479
3480
3481
3482
3483 pool->attrs->no_numa = false;
3484
3485
3486 if (wq_numa_enabled) {
3487 for_each_node(node) {
3488 if (cpumask_subset(pool->attrs->cpumask,
3489 wq_numa_possible_cpumask[node])) {
3490 pool->node = node;
3491 break;
3492 }
3493 }
3494 }
3495
3496 if (worker_pool_assign_id(pool) < 0)
3497 goto fail;
3498
3499
3500 if (!create_worker(pool))
3501 goto fail;
3502
3503
3504 hash_add(unbound_pool_hash, &pool->hash_node, hash);
3505
3506 return pool;
3507fail:
3508 if (pool)
3509 put_unbound_pool(pool);
3510 return NULL;
3511}
3512
3513static void rcu_free_pwq(struct rcu_head *rcu)
3514{
3515 kmem_cache_free(pwq_cache,
3516 container_of(rcu, struct pool_workqueue, rcu));
3517}
3518
3519
3520
3521
3522
3523static void pwq_unbound_release_workfn(struct work_struct *work)
3524{
3525 struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
3526 unbound_release_work);
3527 struct workqueue_struct *wq = pwq->wq;
3528 struct worker_pool *pool = pwq->pool;
3529 bool is_last;
3530
3531 if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
3532 return;
3533
3534 mutex_lock(&wq->mutex);
3535 list_del_rcu(&pwq->pwqs_node);
3536 is_last = list_empty(&wq->pwqs);
3537 mutex_unlock(&wq->mutex);
3538
3539 mutex_lock(&wq_pool_mutex);
3540 put_unbound_pool(pool);
3541 mutex_unlock(&wq_pool_mutex);
3542
3543 call_rcu_sched(&pwq->rcu, rcu_free_pwq);
3544
3545
3546
3547
3548
3549 if (is_last) {
3550 free_workqueue_attrs(wq->unbound_attrs);
3551 kfree(wq);
3552 }
3553}
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3564{
3565 struct workqueue_struct *wq = pwq->wq;
3566 bool freezable = wq->flags & WQ_FREEZABLE;
3567
3568
3569 lockdep_assert_held(&wq->mutex);
3570
3571
3572 if (!freezable && pwq->max_active == wq->saved_max_active)
3573 return;
3574
3575 spin_lock_irq(&pwq->pool->lock);
3576
3577
3578
3579
3580
3581
3582 if (!freezable || !workqueue_freezing) {
3583 pwq->max_active = wq->saved_max_active;
3584
3585 while (!list_empty(&pwq->delayed_works) &&
3586 pwq->nr_active < pwq->max_active)
3587 pwq_activate_first_delayed(pwq);
3588
3589
3590
3591
3592
3593 wake_up_worker(pwq->pool);
3594 } else {
3595 pwq->max_active = 0;
3596 }
3597
3598 spin_unlock_irq(&pwq->pool->lock);
3599}
3600
3601
3602static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
3603 struct worker_pool *pool)
3604{
3605 BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
3606
3607 memset(pwq, 0, sizeof(*pwq));
3608
3609 pwq->pool = pool;
3610 pwq->wq = wq;
3611 pwq->flush_color = -1;
3612 pwq->refcnt = 1;
3613 INIT_LIST_HEAD(&pwq->delayed_works);
3614 INIT_LIST_HEAD(&pwq->pwqs_node);
3615 INIT_LIST_HEAD(&pwq->mayday_node);
3616 INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
3617}
3618
3619
3620static void link_pwq(struct pool_workqueue *pwq)
3621{
3622 struct workqueue_struct *wq = pwq->wq;
3623
3624 lockdep_assert_held(&wq->mutex);
3625
3626
3627 if (!list_empty(&pwq->pwqs_node))
3628 return;
3629
3630
3631 pwq->work_color = wq->work_color;
3632
3633
3634 pwq_adjust_max_active(pwq);
3635
3636
3637 list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
3638}
3639
3640
3641static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3642 const struct workqueue_attrs *attrs)
3643{
3644 struct worker_pool *pool;
3645 struct pool_workqueue *pwq;
3646
3647 lockdep_assert_held(&wq_pool_mutex);
3648
3649 pool = get_unbound_pool(attrs);
3650 if (!pool)
3651 return NULL;
3652
3653 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
3654 if (!pwq) {
3655 put_unbound_pool(pool);
3656 return NULL;
3657 }
3658
3659 init_pwq(pwq, wq, pool);
3660 return pwq;
3661}
3662
3663
3664static void free_unbound_pwq(struct pool_workqueue *pwq)
3665{
3666 lockdep_assert_held(&wq_pool_mutex);
3667
3668 if (pwq) {
3669 put_unbound_pool(pwq->pool);
3670 kmem_cache_free(pwq_cache, pwq);
3671 }
3672}
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3697 int cpu_going_down, cpumask_t *cpumask)
3698{
3699 if (!wq_numa_enabled || attrs->no_numa)
3700 goto use_dfl;
3701
3702
3703 cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
3704 if (cpu_going_down >= 0)
3705 cpumask_clear_cpu(cpu_going_down, cpumask);
3706
3707 if (cpumask_empty(cpumask))
3708 goto use_dfl;
3709
3710
3711 cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
3712 return !cpumask_equal(cpumask, attrs->cpumask);
3713
3714use_dfl:
3715 cpumask_copy(cpumask, attrs->cpumask);
3716 return false;
3717}
3718
3719
3720static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3721 int node,
3722 struct pool_workqueue *pwq)
3723{
3724 struct pool_workqueue *old_pwq;
3725
3726 lockdep_assert_held(&wq->mutex);
3727
3728
3729 link_pwq(pwq);
3730
3731 old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
3732 rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
3733 return old_pwq;
3734}
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752int apply_workqueue_attrs(struct workqueue_struct *wq,
3753 const struct workqueue_attrs *attrs)
3754{
3755 struct workqueue_attrs *new_attrs, *tmp_attrs;
3756 struct pool_workqueue **pwq_tbl, *dfl_pwq;
3757 int node, ret;
3758
3759
3760 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3761 return -EINVAL;
3762
3763
3764 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
3765 return -EINVAL;
3766
3767 pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);
3768 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3769 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3770 if (!pwq_tbl || !new_attrs || !tmp_attrs)
3771 goto enomem;
3772
3773
3774 copy_workqueue_attrs(new_attrs, attrs);
3775 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3776
3777
3778
3779
3780
3781
3782 copy_workqueue_attrs(tmp_attrs, new_attrs);
3783
3784
3785
3786
3787
3788
3789 get_online_cpus();
3790
3791 mutex_lock(&wq_pool_mutex);
3792
3793
3794
3795
3796
3797
3798 dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3799 if (!dfl_pwq)
3800 goto enomem_pwq;
3801
3802 for_each_node(node) {
3803 if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
3804 pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3805 if (!pwq_tbl[node])
3806 goto enomem_pwq;
3807 } else {
3808 dfl_pwq->refcnt++;
3809 pwq_tbl[node] = dfl_pwq;
3810 }
3811 }
3812
3813 mutex_unlock(&wq_pool_mutex);
3814
3815
3816 mutex_lock(&wq->mutex);
3817
3818 copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
3819
3820
3821 for_each_node(node)
3822 pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]);
3823
3824
3825 link_pwq(dfl_pwq);
3826 swap(wq->dfl_pwq, dfl_pwq);
3827
3828 mutex_unlock(&wq->mutex);
3829
3830
3831 for_each_node(node)
3832 put_pwq_unlocked(pwq_tbl[node]);
3833 put_pwq_unlocked(dfl_pwq);
3834
3835 put_online_cpus();
3836 ret = 0;
3837
3838out_free:
3839 free_workqueue_attrs(tmp_attrs);
3840 free_workqueue_attrs(new_attrs);
3841 kfree(pwq_tbl);
3842 return ret;
3843
3844enomem_pwq:
3845 free_unbound_pwq(dfl_pwq);
3846 for_each_node(node)
3847 if (pwq_tbl && pwq_tbl[node] != dfl_pwq)
3848 free_unbound_pwq(pwq_tbl[node]);
3849 mutex_unlock(&wq_pool_mutex);
3850 put_online_cpus();
3851enomem:
3852 ret = -ENOMEM;
3853 goto out_free;
3854}
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3879 bool online)
3880{
3881 int node = cpu_to_node(cpu);
3882 int cpu_off = online ? -1 : cpu;
3883 struct pool_workqueue *old_pwq = NULL, *pwq;
3884 struct workqueue_attrs *target_attrs;
3885 cpumask_t *cpumask;
3886
3887 lockdep_assert_held(&wq_pool_mutex);
3888
3889 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
3890 return;
3891
3892
3893
3894
3895
3896
3897 target_attrs = wq_update_unbound_numa_attrs_buf;
3898 cpumask = target_attrs->cpumask;
3899
3900 mutex_lock(&wq->mutex);
3901 if (wq->unbound_attrs->no_numa)
3902 goto out_unlock;
3903
3904 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3905 pwq = unbound_pwq_by_node(wq, node);
3906
3907
3908
3909
3910
3911
3912
3913 if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
3914 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3915 goto out_unlock;
3916 } else {
3917 goto use_dfl_pwq;
3918 }
3919
3920 mutex_unlock(&wq->mutex);
3921
3922
3923 pwq = alloc_unbound_pwq(wq, target_attrs);
3924 if (!pwq) {
3925 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3926 wq->name);
3927 mutex_lock(&wq->mutex);
3928 goto use_dfl_pwq;
3929 }
3930
3931
3932
3933
3934
3935
3936
3937 mutex_lock(&wq->mutex);
3938 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3939 goto out_unlock;
3940
3941use_dfl_pwq:
3942 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3943 get_pwq(wq->dfl_pwq);
3944 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
3945 old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
3946out_unlock:
3947 mutex_unlock(&wq->mutex);
3948 put_pwq_unlocked(old_pwq);
3949}
3950
3951static int alloc_and_link_pwqs(struct workqueue_struct *wq)
3952{
3953 bool highpri = wq->flags & WQ_HIGHPRI;
3954 int cpu, ret;
3955
3956 if (!(wq->flags & WQ_UNBOUND)) {
3957 wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
3958 if (!wq->cpu_pwqs)
3959 return -ENOMEM;
3960
3961 for_each_possible_cpu(cpu) {
3962 struct pool_workqueue *pwq =
3963 per_cpu_ptr(wq->cpu_pwqs, cpu);
3964 struct worker_pool *cpu_pools =
3965 per_cpu(cpu_worker_pools, cpu);
3966
3967 init_pwq(pwq, wq, &cpu_pools[highpri]);
3968
3969 mutex_lock(&wq->mutex);
3970 link_pwq(pwq);
3971 mutex_unlock(&wq->mutex);
3972 }
3973 return 0;
3974 } else if (wq->flags & __WQ_ORDERED) {
3975 ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
3976
3977 WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
3978 wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
3979 "ordering guarantee broken for workqueue %s\n", wq->name);
3980 return ret;
3981 } else {
3982 return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
3983 }
3984}
3985
3986static int wq_clamp_max_active(int max_active, unsigned int flags,
3987 const char *name)
3988{
3989 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
3990
3991 if (max_active < 1 || max_active > lim)
3992 pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
3993 max_active, name, 1, lim);
3994
3995 return clamp_val(max_active, 1, lim);
3996}
3997
3998struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3999 unsigned int flags,
4000 int max_active,
4001 struct lock_class_key *key,
4002 const char *lock_name, ...)
4003{
4004 size_t tbl_size = 0;
4005 va_list args;
4006 struct workqueue_struct *wq;
4007 struct pool_workqueue *pwq;
4008
4009
4010 if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4011 flags |= WQ_UNBOUND;
4012
4013
4014 if (flags & WQ_UNBOUND)
4015 tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4016
4017 wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4018 if (!wq)
4019 return NULL;
4020
4021 if (flags & WQ_UNBOUND) {
4022 wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL);
4023 if (!wq->unbound_attrs)
4024 goto err_free_wq;
4025 }
4026
4027 va_start(args, lock_name);
4028 vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4029 va_end(args);
4030
4031 max_active = max_active ?: WQ_DFL_ACTIVE;
4032 max_active = wq_clamp_max_active(max_active, flags, wq->name);
4033
4034
4035 wq->flags = flags;
4036 wq->saved_max_active = max_active;
4037 mutex_init(&wq->mutex);
4038 atomic_set(&wq->nr_pwqs_to_flush, 0);
4039 INIT_LIST_HEAD(&wq->pwqs);
4040 INIT_LIST_HEAD(&wq->flusher_queue);
4041 INIT_LIST_HEAD(&wq->flusher_overflow);
4042 INIT_LIST_HEAD(&wq->maydays);
4043
4044 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
4045 INIT_LIST_HEAD(&wq->list);
4046
4047 if (alloc_and_link_pwqs(wq) < 0)
4048 goto err_free_wq;
4049
4050
4051
4052
4053
4054 if (flags & WQ_MEM_RECLAIM) {
4055 struct worker *rescuer;
4056
4057 rescuer = alloc_worker(NUMA_NO_NODE);
4058 if (!rescuer)
4059 goto err_destroy;
4060
4061 rescuer->rescue_wq = wq;
4062 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
4063 wq->name);
4064 if (IS_ERR(rescuer->task)) {
4065 kfree(rescuer);
4066 goto err_destroy;
4067 }
4068
4069 wq->rescuer = rescuer;
4070 rescuer->task->flags |= PF_NO_SETAFFINITY;
4071 wake_up_process(rescuer->task);
4072 }
4073
4074 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4075 goto err_destroy;
4076
4077
4078
4079
4080
4081
4082 mutex_lock(&wq_pool_mutex);
4083
4084 mutex_lock(&wq->mutex);
4085 for_each_pwq(pwq, wq)
4086 pwq_adjust_max_active(pwq);
4087 mutex_unlock(&wq->mutex);
4088
4089 list_add(&wq->list, &workqueues);
4090
4091 mutex_unlock(&wq_pool_mutex);
4092
4093 return wq;
4094
4095err_free_wq:
4096 free_workqueue_attrs(wq->unbound_attrs);
4097 kfree(wq);
4098 return NULL;
4099err_destroy:
4100 destroy_workqueue(wq);
4101 return NULL;
4102}
4103EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
4104
4105
4106
4107
4108
4109
4110
4111void destroy_workqueue(struct workqueue_struct *wq)
4112{
4113 struct pool_workqueue *pwq;
4114 int node;
4115
4116
4117 drain_workqueue(wq);
4118
4119
4120 mutex_lock(&wq->mutex);
4121 for_each_pwq(pwq, wq) {
4122 int i;
4123
4124 for (i = 0; i < WORK_NR_COLORS; i++) {
4125 if (WARN_ON(pwq->nr_in_flight[i])) {
4126 mutex_unlock(&wq->mutex);
4127 return;
4128 }
4129 }
4130
4131 if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
4132 WARN_ON(pwq->nr_active) ||
4133 WARN_ON(!list_empty(&pwq->delayed_works))) {
4134 mutex_unlock(&wq->mutex);
4135 return;
4136 }
4137 }
4138 mutex_unlock(&wq->mutex);
4139
4140
4141
4142
4143
4144 mutex_lock(&wq_pool_mutex);
4145 list_del_init(&wq->list);
4146 mutex_unlock(&wq_pool_mutex);
4147
4148 workqueue_sysfs_unregister(wq);
4149
4150 if (wq->rescuer) {
4151 kthread_stop(wq->rescuer->task);
4152 kfree(wq->rescuer);
4153 wq->rescuer = NULL;
4154 }
4155
4156 if (!(wq->flags & WQ_UNBOUND)) {
4157
4158
4159
4160
4161 free_percpu(wq->cpu_pwqs);
4162 kfree(wq);
4163 } else {
4164
4165
4166
4167
4168
4169 for_each_node(node) {
4170 pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4171 RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4172 put_pwq_unlocked(pwq);
4173 }
4174
4175
4176
4177
4178
4179 pwq = wq->dfl_pwq;
4180 wq->dfl_pwq = NULL;
4181 put_pwq_unlocked(pwq);
4182 }
4183}
4184EXPORT_SYMBOL_GPL(destroy_workqueue);
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4197{
4198 struct pool_workqueue *pwq;
4199
4200
4201 if (WARN_ON(wq->flags & __WQ_ORDERED))
4202 return;
4203
4204 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4205
4206 mutex_lock(&wq->mutex);
4207
4208 wq->saved_max_active = max_active;
4209
4210 for_each_pwq(pwq, wq)
4211 pwq_adjust_max_active(pwq);
4212
4213 mutex_unlock(&wq->mutex);
4214}
4215EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225bool current_is_workqueue_rescuer(void)
4226{
4227 struct worker *worker = current_wq_worker();
4228
4229 return worker && worker->rescue_wq;
4230}
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4251{
4252 struct pool_workqueue *pwq;
4253 bool ret;
4254
4255 rcu_read_lock_sched();
4256
4257 if (cpu == WORK_CPU_UNBOUND)
4258 cpu = smp_processor_id();
4259
4260 if (!(wq->flags & WQ_UNBOUND))
4261 pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4262 else
4263 pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4264
4265 ret = !list_empty(&pwq->delayed_works);
4266 rcu_read_unlock_sched();
4267
4268 return ret;
4269}
4270EXPORT_SYMBOL_GPL(workqueue_congested);
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283unsigned int work_busy(struct work_struct *work)
4284{
4285 struct worker_pool *pool;
4286 unsigned long flags;
4287 unsigned int ret = 0;
4288
4289 if (work_pending(work))
4290 ret |= WORK_BUSY_PENDING;
4291
4292 local_irq_save(flags);
4293 pool = get_work_pool(work);
4294 if (pool) {
4295 spin_lock(&pool->lock);
4296 if (find_worker_executing_work(pool, work))
4297 ret |= WORK_BUSY_RUNNING;
4298 spin_unlock(&pool->lock);
4299 }
4300 local_irq_restore(flags);
4301
4302 return ret;
4303}
4304EXPORT_SYMBOL_GPL(work_busy);
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316void set_worker_desc(const char *fmt, ...)
4317{
4318 struct worker *worker = current_wq_worker();
4319 va_list args;
4320
4321 if (worker) {
4322 va_start(args, fmt);
4323 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4324 va_end(args);
4325 worker->desc_valid = true;
4326 }
4327}
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342void print_worker_info(const char *log_lvl, struct task_struct *task)
4343{
4344 work_func_t *fn = NULL;
4345 char name[WQ_NAME_LEN] = { };
4346 char desc[WORKER_DESC_LEN] = { };
4347 struct pool_workqueue *pwq = NULL;
4348 struct workqueue_struct *wq = NULL;
4349 bool desc_valid = false;
4350 struct worker *worker;
4351
4352 if (!(task->flags & PF_WQ_WORKER))
4353 return;
4354
4355
4356
4357
4358
4359 worker = probe_kthread_data(task);
4360
4361
4362
4363
4364
4365 probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
4366 probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
4367 probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
4368 probe_kernel_read(name, wq->name, sizeof(name) - 1);
4369
4370
4371 probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
4372 if (desc_valid)
4373 probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
4374
4375 if (fn || name[0] || desc[0]) {
4376 printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
4377 if (desc[0])
4378 pr_cont(" (%s)", desc);
4379 pr_cont("\n");
4380 }
4381}
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398static void wq_unbind_fn(struct work_struct *work)
4399{
4400 int cpu = smp_processor_id();
4401 struct worker_pool *pool;
4402 struct worker *worker;
4403
4404 for_each_cpu_worker_pool(pool, cpu) {
4405 mutex_lock(&pool->attach_mutex);
4406 spin_lock_irq(&pool->lock);
4407
4408
4409
4410
4411
4412
4413
4414
4415 for_each_pool_worker(worker, pool)
4416 worker->flags |= WORKER_UNBOUND;
4417
4418 pool->flags |= POOL_DISASSOCIATED;
4419
4420 spin_unlock_irq(&pool->lock);
4421 mutex_unlock(&pool->attach_mutex);
4422
4423
4424
4425
4426
4427
4428
4429 schedule();
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439 atomic_set(&pool->nr_running, 0);
4440
4441
4442
4443
4444
4445
4446 spin_lock_irq(&pool->lock);
4447 wake_up_worker(pool);
4448 spin_unlock_irq(&pool->lock);
4449 }
4450}
4451
4452
4453
4454
4455
4456
4457
4458static void rebind_workers(struct worker_pool *pool)
4459{
4460 struct worker *worker;
4461
4462 lockdep_assert_held(&pool->attach_mutex);
4463
4464
4465
4466
4467
4468
4469
4470
4471 for_each_pool_worker(worker, pool)
4472 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4473 pool->attrs->cpumask) < 0);
4474
4475 spin_lock_irq(&pool->lock);
4476 pool->flags &= ~POOL_DISASSOCIATED;
4477
4478 for_each_pool_worker(worker, pool) {
4479 unsigned int worker_flags = worker->flags;
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489 if (worker_flags & WORKER_IDLE)
4490 wake_up_process(worker->task);
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507 WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
4508 worker_flags |= WORKER_REBOUND;
4509 worker_flags &= ~WORKER_UNBOUND;
4510 ACCESS_ONCE(worker->flags) = worker_flags;
4511 }
4512
4513 spin_unlock_irq(&pool->lock);
4514}
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4527{
4528 static cpumask_t cpumask;
4529 struct worker *worker;
4530
4531 lockdep_assert_held(&pool->attach_mutex);
4532
4533
4534 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
4535 return;
4536
4537
4538 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
4539 if (cpumask_weight(&cpumask) != 1)
4540 return;
4541
4542
4543 for_each_pool_worker(worker, pool)
4544 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4545 pool->attrs->cpumask) < 0);
4546}
4547
4548
4549
4550
4551
4552static int workqueue_cpu_up_callback(struct notifier_block *nfb,
4553 unsigned long action,
4554 void *hcpu)
4555{
4556 int cpu = (unsigned long)hcpu;
4557 struct worker_pool *pool;
4558 struct workqueue_struct *wq;
4559 int pi;
4560
4561 switch (action & ~CPU_TASKS_FROZEN) {
4562 case CPU_UP_PREPARE:
4563 for_each_cpu_worker_pool(pool, cpu) {
4564 if (pool->nr_workers)
4565 continue;
4566 if (!create_worker(pool))
4567 return NOTIFY_BAD;
4568 }
4569 break;
4570
4571 case CPU_DOWN_FAILED:
4572 case CPU_ONLINE:
4573 mutex_lock(&wq_pool_mutex);
4574
4575 for_each_pool(pool, pi) {
4576 mutex_lock(&pool->attach_mutex);
4577
4578 if (pool->cpu == cpu)
4579 rebind_workers(pool);
4580 else if (pool->cpu < 0)
4581 restore_unbound_workers_cpumask(pool, cpu);
4582
4583 mutex_unlock(&pool->attach_mutex);
4584 }
4585
4586
4587 list_for_each_entry(wq, &workqueues, list)
4588 wq_update_unbound_numa(wq, cpu, true);
4589
4590 mutex_unlock(&wq_pool_mutex);
4591 break;
4592 }
4593 return NOTIFY_OK;
4594}
4595
4596
4597
4598
4599
4600static int workqueue_cpu_down_callback(struct notifier_block *nfb,
4601 unsigned long action,
4602 void *hcpu)
4603{
4604 int cpu = (unsigned long)hcpu;
4605 struct work_struct unbind_work;
4606 struct workqueue_struct *wq;
4607
4608 switch (action & ~CPU_TASKS_FROZEN) {
4609 case CPU_DOWN_PREPARE:
4610
4611 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
4612 queue_work_on(cpu, system_highpri_wq, &unbind_work);
4613
4614
4615 mutex_lock(&wq_pool_mutex);
4616 list_for_each_entry(wq, &workqueues, list)
4617 wq_update_unbound_numa(wq, cpu, false);
4618 mutex_unlock(&wq_pool_mutex);
4619
4620
4621 flush_work(&unbind_work);
4622 destroy_work_on_stack(&unbind_work);
4623 break;
4624 }
4625 return NOTIFY_OK;
4626}
4627
4628#ifdef CONFIG_SMP
4629
4630struct work_for_cpu {
4631 struct work_struct work;
4632 long (*fn)(void *);
4633 void *arg;
4634 long ret;
4635};
4636
4637static void work_for_cpu_fn(struct work_struct *work)
4638{
4639 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
4640
4641 wfc->ret = wfc->fn(wfc->arg);
4642}
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
4656{
4657 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
4658
4659 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4660 schedule_work_on(cpu, &wfc.work);
4661 flush_work(&wfc.work);
4662 destroy_work_on_stack(&wfc.work);
4663 return wfc.ret;
4664}
4665EXPORT_SYMBOL_GPL(work_on_cpu);
4666#endif
4667
4668#ifdef CONFIG_FREEZER
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680void freeze_workqueues_begin(void)
4681{
4682 struct workqueue_struct *wq;
4683 struct pool_workqueue *pwq;
4684
4685 mutex_lock(&wq_pool_mutex);
4686
4687 WARN_ON_ONCE(workqueue_freezing);
4688 workqueue_freezing = true;
4689
4690 list_for_each_entry(wq, &workqueues, list) {
4691 mutex_lock(&wq->mutex);
4692 for_each_pwq(pwq, wq)
4693 pwq_adjust_max_active(pwq);
4694 mutex_unlock(&wq->mutex);
4695 }
4696
4697 mutex_unlock(&wq_pool_mutex);
4698}
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713bool freeze_workqueues_busy(void)
4714{
4715 bool busy = false;
4716 struct workqueue_struct *wq;
4717 struct pool_workqueue *pwq;
4718
4719 mutex_lock(&wq_pool_mutex);
4720
4721 WARN_ON_ONCE(!workqueue_freezing);
4722
4723 list_for_each_entry(wq, &workqueues, list) {
4724 if (!(wq->flags & WQ_FREEZABLE))
4725 continue;
4726
4727
4728
4729
4730 rcu_read_lock_sched();
4731 for_each_pwq(pwq, wq) {
4732 WARN_ON_ONCE(pwq->nr_active < 0);
4733 if (pwq->nr_active) {
4734 busy = true;
4735 rcu_read_unlock_sched();
4736 goto out_unlock;
4737 }
4738 }
4739 rcu_read_unlock_sched();
4740 }
4741out_unlock:
4742 mutex_unlock(&wq_pool_mutex);
4743 return busy;
4744}
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755void thaw_workqueues(void)
4756{
4757 struct workqueue_struct *wq;
4758 struct pool_workqueue *pwq;
4759
4760 mutex_lock(&wq_pool_mutex);
4761
4762 if (!workqueue_freezing)
4763 goto out_unlock;
4764
4765 workqueue_freezing = false;
4766
4767
4768 list_for_each_entry(wq, &workqueues, list) {
4769 mutex_lock(&wq->mutex);
4770 for_each_pwq(pwq, wq)
4771 pwq_adjust_max_active(pwq);
4772 mutex_unlock(&wq->mutex);
4773 }
4774
4775out_unlock:
4776 mutex_unlock(&wq_pool_mutex);
4777}
4778#endif
4779
4780static void __init wq_numa_init(void)
4781{
4782 cpumask_var_t *tbl;
4783 int node, cpu;
4784
4785 if (num_possible_nodes() <= 1)
4786 return;
4787
4788 if (wq_disable_numa) {
4789 pr_info("workqueue: NUMA affinity support disabled\n");
4790 return;
4791 }
4792
4793 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
4794 BUG_ON(!wq_update_unbound_numa_attrs_buf);
4795
4796
4797
4798
4799
4800
4801 tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
4802 BUG_ON(!tbl);
4803
4804 for_each_node(node)
4805 BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
4806 node_online(node) ? node : NUMA_NO_NODE));
4807
4808 for_each_possible_cpu(cpu) {
4809 node = cpu_to_node(cpu);
4810 if (WARN_ON(node == NUMA_NO_NODE)) {
4811 pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
4812
4813 return;
4814 }
4815 cpumask_set_cpu(cpu, tbl[node]);
4816 }
4817
4818 wq_numa_possible_cpumask = tbl;
4819 wq_numa_enabled = true;
4820}
4821
4822static int __init init_workqueues(void)
4823{
4824 int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
4825 int i, cpu;
4826
4827 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
4828
4829 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
4830
4831 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
4832 hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
4833
4834 wq_numa_init();
4835
4836
4837 for_each_possible_cpu(cpu) {
4838 struct worker_pool *pool;
4839
4840 i = 0;
4841 for_each_cpu_worker_pool(pool, cpu) {
4842 BUG_ON(init_worker_pool(pool));
4843 pool->cpu = cpu;
4844 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
4845 pool->attrs->nice = std_nice[i++];
4846 pool->node = cpu_to_node(cpu);
4847
4848
4849 mutex_lock(&wq_pool_mutex);
4850 BUG_ON(worker_pool_assign_id(pool));
4851 mutex_unlock(&wq_pool_mutex);
4852 }
4853 }
4854
4855
4856 for_each_online_cpu(cpu) {
4857 struct worker_pool *pool;
4858
4859 for_each_cpu_worker_pool(pool, cpu) {
4860 pool->flags &= ~POOL_DISASSOCIATED;
4861 BUG_ON(!create_worker(pool));
4862 }
4863 }
4864
4865
4866 for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
4867 struct workqueue_attrs *attrs;
4868
4869 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
4870 attrs->nice = std_nice[i];
4871 unbound_std_wq_attrs[i] = attrs;
4872
4873
4874
4875
4876
4877
4878 BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
4879 attrs->nice = std_nice[i];
4880 attrs->no_numa = true;
4881 ordered_wq_attrs[i] = attrs;
4882 }
4883
4884 system_wq = alloc_workqueue("events", 0, 0);
4885 system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
4886 system_long_wq = alloc_workqueue("events_long", 0, 0);
4887 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
4888 WQ_UNBOUND_MAX_ACTIVE);
4889 system_freezable_wq = alloc_workqueue("events_freezable",
4890 WQ_FREEZABLE, 0);
4891 system_power_efficient_wq = alloc_workqueue("events_power_efficient",
4892 WQ_POWER_EFFICIENT, 0);
4893 system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
4894 WQ_FREEZABLE | WQ_POWER_EFFICIENT,
4895 0);
4896 BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
4897 !system_unbound_wq || !system_freezable_wq ||
4898 !system_power_efficient_wq ||
4899 !system_freezable_power_efficient_wq);
4900 return 0;
4901}
4902early_initcall(init_workqueues);
4903