1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38#define DEBUG_SUBSYSTEM S_LNET
39
40#include "../../include/linux/libcfs/libcfs.h"
41
42#define CFS_WS_NAME_LEN 16
43
44struct cfs_wi_sched {
45
46 struct list_head ws_list;
47
48 spinlock_t ws_lock;
49
50 wait_queue_head_t ws_waitq;
51
52 struct list_head ws_runq;
53
54
55
56
57
58
59
60 struct list_head ws_rerunq;
61
62 struct cfs_cpt_table *ws_cptab;
63
64 int ws_cpt;
65
66 int ws_nscheduled;
67
68 unsigned int ws_nthreads:30;
69
70 unsigned int ws_stopping:1;
71
72 unsigned int ws_starting:1;
73
74 char ws_name[CFS_WS_NAME_LEN];
75};
76
77static struct cfs_workitem_data {
78
79 spinlock_t wi_glock;
80
81 struct list_head wi_scheds;
82
83 int wi_init;
84
85 int wi_stopping;
86} cfs_wi_data;
87
88static inline int
89cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
90{
91 spin_lock(&sched->ws_lock);
92 if (sched->ws_stopping) {
93 spin_unlock(&sched->ws_lock);
94 return 0;
95 }
96
97 if (!list_empty(&sched->ws_runq)) {
98 spin_unlock(&sched->ws_lock);
99 return 0;
100 }
101 spin_unlock(&sched->ws_lock);
102 return 1;
103}
104
105
106
107
108
109void
110cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
111{
112 LASSERT(!in_interrupt());
113 LASSERT(!sched->ws_stopping);
114
115 spin_lock(&sched->ws_lock);
116
117 LASSERT(wi->wi_running);
118 if (wi->wi_scheduled) {
119 LASSERT(!list_empty(&wi->wi_list));
120 list_del_init(&wi->wi_list);
121
122 LASSERT(sched->ws_nscheduled > 0);
123 sched->ws_nscheduled--;
124 }
125
126 LASSERT(list_empty(&wi->wi_list));
127
128 wi->wi_scheduled = 1;
129 spin_unlock(&sched->ws_lock);
130}
131EXPORT_SYMBOL(cfs_wi_exit);
132
133
134
135
136int
137cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
138{
139 int rc;
140
141 LASSERT(!in_interrupt());
142 LASSERT(!sched->ws_stopping);
143
144
145
146
147
148
149 spin_lock(&sched->ws_lock);
150
151 rc = !(wi->wi_running);
152
153 if (wi->wi_scheduled) {
154 LASSERT(!list_empty(&wi->wi_list));
155 list_del_init(&wi->wi_list);
156
157 LASSERT(sched->ws_nscheduled > 0);
158 sched->ws_nscheduled--;
159
160 wi->wi_scheduled = 0;
161 }
162
163 LASSERT(list_empty(&wi->wi_list));
164
165 spin_unlock(&sched->ws_lock);
166 return rc;
167}
168EXPORT_SYMBOL(cfs_wi_deschedule);
169
170
171
172
173
174
175
176
177void
178cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
179{
180 LASSERT(!in_interrupt());
181 LASSERT(!sched->ws_stopping);
182
183 spin_lock(&sched->ws_lock);
184
185 if (!wi->wi_scheduled) {
186 LASSERT(list_empty(&wi->wi_list));
187
188 wi->wi_scheduled = 1;
189 sched->ws_nscheduled++;
190 if (!wi->wi_running) {
191 list_add_tail(&wi->wi_list, &sched->ws_runq);
192 wake_up(&sched->ws_waitq);
193 } else {
194 list_add(&wi->wi_list, &sched->ws_rerunq);
195 }
196 }
197
198 LASSERT(!list_empty(&wi->wi_list));
199 spin_unlock(&sched->ws_lock);
200}
201EXPORT_SYMBOL(cfs_wi_schedule);
202
203static int cfs_wi_scheduler(void *arg)
204{
205 struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
206
207 cfs_block_allsigs();
208
209
210 if (sched->ws_cptab)
211 if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt))
212 CWARN("Unable to bind %s on CPU partition %d\n",
213 sched->ws_name, sched->ws_cpt);
214
215 spin_lock(&cfs_wi_data.wi_glock);
216
217 LASSERT(sched->ws_starting == 1);
218 sched->ws_starting--;
219 sched->ws_nthreads++;
220
221 spin_unlock(&cfs_wi_data.wi_glock);
222
223 spin_lock(&sched->ws_lock);
224
225 while (!sched->ws_stopping) {
226 int nloops = 0;
227 int rc;
228 struct cfs_workitem *wi;
229
230 while (!list_empty(&sched->ws_runq) &&
231 nloops < CFS_WI_RESCHED) {
232 wi = list_entry(sched->ws_runq.next,
233 struct cfs_workitem, wi_list);
234 LASSERT(wi->wi_scheduled && !wi->wi_running);
235
236 list_del_init(&wi->wi_list);
237
238 LASSERT(sched->ws_nscheduled > 0);
239 sched->ws_nscheduled--;
240
241 wi->wi_running = 1;
242 wi->wi_scheduled = 0;
243
244 spin_unlock(&sched->ws_lock);
245 nloops++;
246
247 rc = (*wi->wi_action)(wi);
248
249 spin_lock(&sched->ws_lock);
250 if (rc)
251 continue;
252
253 wi->wi_running = 0;
254 if (list_empty(&wi->wi_list))
255 continue;
256
257 LASSERT(wi->wi_scheduled);
258
259
260
261 list_move_tail(&wi->wi_list, &sched->ws_runq);
262 }
263
264 if (!list_empty(&sched->ws_runq)) {
265 spin_unlock(&sched->ws_lock);
266
267
268
269 cond_resched();
270 spin_lock(&sched->ws_lock);
271 continue;
272 }
273
274 spin_unlock(&sched->ws_lock);
275 rc = wait_event_interruptible_exclusive(sched->ws_waitq,
276 !cfs_wi_sched_cansleep(sched));
277 spin_lock(&sched->ws_lock);
278 }
279
280 spin_unlock(&sched->ws_lock);
281
282 spin_lock(&cfs_wi_data.wi_glock);
283 sched->ws_nthreads--;
284 spin_unlock(&cfs_wi_data.wi_glock);
285
286 return 0;
287}
288
289void
290cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
291{
292 int i;
293
294 LASSERT(cfs_wi_data.wi_init);
295 LASSERT(!cfs_wi_data.wi_stopping);
296
297 spin_lock(&cfs_wi_data.wi_glock);
298 if (sched->ws_stopping) {
299 CDEBUG(D_INFO, "%s is in progress of stopping\n",
300 sched->ws_name);
301 spin_unlock(&cfs_wi_data.wi_glock);
302 return;
303 }
304
305 LASSERT(!list_empty(&sched->ws_list));
306 sched->ws_stopping = 1;
307
308 spin_unlock(&cfs_wi_data.wi_glock);
309
310 i = 2;
311 wake_up_all(&sched->ws_waitq);
312
313 spin_lock(&cfs_wi_data.wi_glock);
314 while (sched->ws_nthreads > 0) {
315 CDEBUG(is_power_of_2(++i) ? D_WARNING : D_NET,
316 "waiting for %d threads of WI sched[%s] to terminate\n",
317 sched->ws_nthreads, sched->ws_name);
318
319 spin_unlock(&cfs_wi_data.wi_glock);
320 set_current_state(TASK_UNINTERRUPTIBLE);
321 schedule_timeout(cfs_time_seconds(1) / 20);
322 spin_lock(&cfs_wi_data.wi_glock);
323 }
324
325 list_del(&sched->ws_list);
326
327 spin_unlock(&cfs_wi_data.wi_glock);
328 LASSERT(!sched->ws_nscheduled);
329
330 LIBCFS_FREE(sched, sizeof(*sched));
331}
332EXPORT_SYMBOL(cfs_wi_sched_destroy);
333
334int
335cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
336 int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
337{
338 struct cfs_wi_sched *sched;
339 int rc;
340
341 LASSERT(cfs_wi_data.wi_init);
342 LASSERT(!cfs_wi_data.wi_stopping);
343 LASSERT(!cptab || cpt == CFS_CPT_ANY ||
344 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
345
346 LIBCFS_ALLOC(sched, sizeof(*sched));
347 if (!sched)
348 return -ENOMEM;
349
350 if (strlen(name) > sizeof(sched->ws_name) - 1) {
351 LIBCFS_FREE(sched, sizeof(*sched));
352 return -E2BIG;
353 }
354 strncpy(sched->ws_name, name, sizeof(sched->ws_name));
355
356 sched->ws_cptab = cptab;
357 sched->ws_cpt = cpt;
358
359 spin_lock_init(&sched->ws_lock);
360 init_waitqueue_head(&sched->ws_waitq);
361 INIT_LIST_HEAD(&sched->ws_runq);
362 INIT_LIST_HEAD(&sched->ws_rerunq);
363 INIT_LIST_HEAD(&sched->ws_list);
364
365 rc = 0;
366 while (nthrs > 0) {
367 char name[16];
368 struct task_struct *task;
369
370 spin_lock(&cfs_wi_data.wi_glock);
371 while (sched->ws_starting > 0) {
372 spin_unlock(&cfs_wi_data.wi_glock);
373 schedule();
374 spin_lock(&cfs_wi_data.wi_glock);
375 }
376
377 sched->ws_starting++;
378 spin_unlock(&cfs_wi_data.wi_glock);
379
380 if (sched->ws_cptab && sched->ws_cpt >= 0) {
381 snprintf(name, sizeof(name), "%s_%02d_%02u",
382 sched->ws_name, sched->ws_cpt,
383 sched->ws_nthreads);
384 } else {
385 snprintf(name, sizeof(name), "%s_%02u",
386 sched->ws_name, sched->ws_nthreads);
387 }
388
389 task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
390 if (!IS_ERR(task)) {
391 nthrs--;
392 continue;
393 }
394 rc = PTR_ERR(task);
395
396 CERROR("Failed to create thread for WI scheduler %s: %d\n",
397 name, rc);
398
399 spin_lock(&cfs_wi_data.wi_glock);
400
401
402 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
403 sched->ws_starting--;
404
405 spin_unlock(&cfs_wi_data.wi_glock);
406
407 cfs_wi_sched_destroy(sched);
408 return rc;
409 }
410 spin_lock(&cfs_wi_data.wi_glock);
411 list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
412 spin_unlock(&cfs_wi_data.wi_glock);
413
414 *sched_pp = sched;
415 return 0;
416}
417EXPORT_SYMBOL(cfs_wi_sched_create);
418
419int
420cfs_wi_startup(void)
421{
422 memset(&cfs_wi_data, 0, sizeof(cfs_wi_data));
423
424 spin_lock_init(&cfs_wi_data.wi_glock);
425 INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
426 cfs_wi_data.wi_init = 1;
427
428 return 0;
429}
430
431void
432cfs_wi_shutdown(void)
433{
434 struct cfs_wi_sched *sched;
435 struct cfs_wi_sched *temp;
436
437 spin_lock(&cfs_wi_data.wi_glock);
438 cfs_wi_data.wi_stopping = 1;
439 spin_unlock(&cfs_wi_data.wi_glock);
440
441
442 list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
443 sched->ws_stopping = 1;
444 wake_up_all(&sched->ws_waitq);
445 }
446
447 list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
448 spin_lock(&cfs_wi_data.wi_glock);
449
450 while (sched->ws_nthreads) {
451 spin_unlock(&cfs_wi_data.wi_glock);
452 set_current_state(TASK_UNINTERRUPTIBLE);
453 schedule_timeout(cfs_time_seconds(1) / 20);
454 spin_lock(&cfs_wi_data.wi_glock);
455 }
456 spin_unlock(&cfs_wi_data.wi_glock);
457 }
458 list_for_each_entry_safe(sched, temp, &cfs_wi_data.wi_scheds, ws_list) {
459 list_del(&sched->ws_list);
460 LIBCFS_FREE(sched, sizeof(*sched));
461 }
462
463 cfs_wi_data.wi_stopping = 0;
464 cfs_wi_data.wi_init = 0;
465}
466