1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define DEBUG_SUBSYSTEM S_LNET
35
36#include <linux/cpu.h>
37#include <linux/sched.h>
38#include <linux/libcfs/libcfs.h>
39
40#ifdef CONFIG_SMP
41
42
43
44
45
46
47
48
49static int cpu_npartitions;
50CFS_MODULE_PARM(cpu_npartitions, "i", int, 0444, "# of CPU partitions");
51
52
53
54
55
56
57
58
59
60
61
62
63static char *cpu_pattern = "";
64CFS_MODULE_PARM(cpu_pattern, "s", charp, 0444, "CPU partitions pattern");
65
66struct cfs_cpt_data {
67
68 spinlock_t cpt_lock;
69
70 unsigned long cpt_version;
71
72 struct semaphore cpt_mutex;
73
74 cpumask_t *cpt_cpumask;
75};
76
77static struct cfs_cpt_data cpt_data;
78
79void
80cfs_cpu_core_siblings(int cpu, cpumask_t *mask)
81{
82
83 cpumask_copy(mask, topology_core_cpumask(cpu));
84}
85EXPORT_SYMBOL(cfs_cpu_core_siblings);
86
87
88int
89cfs_cpu_core_nsiblings(int cpu)
90{
91 int num;
92
93 down(&cpt_data.cpt_mutex);
94
95 cfs_cpu_core_siblings(cpu, cpt_data.cpt_cpumask);
96 num = cpus_weight(*cpt_data.cpt_cpumask);
97
98 up(&cpt_data.cpt_mutex);
99
100 return num;
101}
102EXPORT_SYMBOL(cfs_cpu_core_nsiblings);
103
104
105void
106cfs_cpu_ht_siblings(int cpu, cpumask_t *mask)
107{
108 cpumask_copy(mask, topology_thread_cpumask(cpu));
109}
110EXPORT_SYMBOL(cfs_cpu_ht_siblings);
111
112
113int
114cfs_cpu_ht_nsiblings(int cpu)
115{
116 int num;
117
118 down(&cpt_data.cpt_mutex);
119
120 cfs_cpu_ht_siblings(cpu, cpt_data.cpt_cpumask);
121 num = cpus_weight(*cpt_data.cpt_cpumask);
122
123 up(&cpt_data.cpt_mutex);
124
125 return num;
126}
127EXPORT_SYMBOL(cfs_cpu_ht_nsiblings);
128
129void
130cfs_node_to_cpumask(int node, cpumask_t *mask)
131{
132 cpumask_copy(mask, cpumask_of_node(node));
133}
134EXPORT_SYMBOL(cfs_node_to_cpumask);
135
136void
137cfs_cpt_table_free(struct cfs_cpt_table *cptab)
138{
139 int i;
140
141 if (cptab->ctb_cpu2cpt != NULL) {
142 LIBCFS_FREE(cptab->ctb_cpu2cpt,
143 num_possible_cpus() *
144 sizeof(cptab->ctb_cpu2cpt[0]));
145 }
146
147 for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) {
148 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
149
150 if (part->cpt_nodemask != NULL) {
151 LIBCFS_FREE(part->cpt_nodemask,
152 sizeof(*part->cpt_nodemask));
153 }
154
155 if (part->cpt_cpumask != NULL)
156 LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
157 }
158
159 if (cptab->ctb_parts != NULL) {
160 LIBCFS_FREE(cptab->ctb_parts,
161 cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
162 }
163
164 if (cptab->ctb_nodemask != NULL)
165 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
166 if (cptab->ctb_cpumask != NULL)
167 LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
168
169 LIBCFS_FREE(cptab, sizeof(*cptab));
170}
171EXPORT_SYMBOL(cfs_cpt_table_free);
172
173struct cfs_cpt_table *
174cfs_cpt_table_alloc(unsigned int ncpt)
175{
176 struct cfs_cpt_table *cptab;
177 int i;
178
179 LIBCFS_ALLOC(cptab, sizeof(*cptab));
180 if (cptab == NULL)
181 return NULL;
182
183 cptab->ctb_nparts = ncpt;
184
185 LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
186 LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
187
188 if (cptab->ctb_cpumask == NULL || cptab->ctb_nodemask == NULL)
189 goto failed;
190
191 LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
192 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
193 if (cptab->ctb_cpu2cpt == NULL)
194 goto failed;
195
196 memset(cptab->ctb_cpu2cpt, -1,
197 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
198
199 LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
200 if (cptab->ctb_parts == NULL)
201 goto failed;
202
203 for (i = 0; i < ncpt; i++) {
204 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
205
206 LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
207 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
208 if (part->cpt_cpumask == NULL || part->cpt_nodemask == NULL)
209 goto failed;
210 }
211
212 spin_lock(&cpt_data.cpt_lock);
213
214 cptab->ctb_version = cpt_data.cpt_version;
215 spin_unlock(&cpt_data.cpt_lock);
216
217 return cptab;
218
219 failed:
220 cfs_cpt_table_free(cptab);
221 return NULL;
222}
223EXPORT_SYMBOL(cfs_cpt_table_alloc);
224
225int
226cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
227{
228 char *tmp = buf;
229 int rc = 0;
230 int i;
231 int j;
232
233 for (i = 0; i < cptab->ctb_nparts; i++) {
234 if (len > 0) {
235 rc = snprintf(tmp, len, "%d\t: ", i);
236 len -= rc;
237 }
238
239 if (len <= 0) {
240 rc = -EFBIG;
241 goto out;
242 }
243
244 tmp += rc;
245 for_each_cpu_mask(j, *cptab->ctb_parts[i].cpt_cpumask) {
246 rc = snprintf(tmp, len, "%d ", j);
247 len -= rc;
248 if (len <= 0) {
249 rc = -EFBIG;
250 goto out;
251 }
252 tmp += rc;
253 }
254
255 *tmp = '\n';
256 tmp++;
257 len--;
258 }
259
260 out:
261 if (rc < 0)
262 return rc;
263
264 return tmp - buf;
265}
266EXPORT_SYMBOL(cfs_cpt_table_print);
267
268int
269cfs_cpt_number(struct cfs_cpt_table *cptab)
270{
271 return cptab->ctb_nparts;
272}
273EXPORT_SYMBOL(cfs_cpt_number);
274
275int
276cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
277{
278 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
279
280 return cpt == CFS_CPT_ANY ?
281 cpus_weight(*cptab->ctb_cpumask) :
282 cpus_weight(*cptab->ctb_parts[cpt].cpt_cpumask);
283}
284EXPORT_SYMBOL(cfs_cpt_weight);
285
286int
287cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
288{
289 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
290
291 return cpt == CFS_CPT_ANY ?
292 any_online_cpu(*cptab->ctb_cpumask) != NR_CPUS :
293 any_online_cpu(*cptab->ctb_parts[cpt].cpt_cpumask) != NR_CPUS;
294}
295EXPORT_SYMBOL(cfs_cpt_online);
296
297cpumask_t *
298cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
299{
300 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
301
302 return cpt == CFS_CPT_ANY ?
303 cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
304}
305EXPORT_SYMBOL(cfs_cpt_cpumask);
306
307nodemask_t *
308cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
309{
310 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
311
312 return cpt == CFS_CPT_ANY ?
313 cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
314}
315EXPORT_SYMBOL(cfs_cpt_nodemask);
316
317int
318cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
319{
320 int node;
321
322 LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
323
324 if (cpu < 0 || cpu >= NR_CPUS || !cpu_online(cpu)) {
325 CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
326 return 0;
327 }
328
329 if (cptab->ctb_cpu2cpt[cpu] != -1) {
330 CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
331 cpu, cptab->ctb_cpu2cpt[cpu]);
332 return 0;
333 }
334
335 cptab->ctb_cpu2cpt[cpu] = cpt;
336
337 LASSERT(!cpu_isset(cpu, *cptab->ctb_cpumask));
338 LASSERT(!cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask));
339
340 cpu_set(cpu, *cptab->ctb_cpumask);
341 cpu_set(cpu, *cptab->ctb_parts[cpt].cpt_cpumask);
342
343 node = cpu_to_node(cpu);
344
345
346 if (!node_isset(node, *cptab->ctb_nodemask))
347 node_set(node, *cptab->ctb_nodemask);
348
349
350 if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
351 node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
352
353 return 1;
354}
355EXPORT_SYMBOL(cfs_cpt_set_cpu);
356
357void
358cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
359{
360 int node;
361 int i;
362
363 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
364
365 if (cpu < 0 || cpu >= NR_CPUS) {
366 CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
367 return;
368 }
369
370 if (cpt == CFS_CPT_ANY) {
371
372 cpt = cptab->ctb_cpu2cpt[cpu];
373 if (cpt < 0) {
374 CDEBUG(D_INFO, "Try to unset cpu %d which is "
375 "not in CPT-table %p\n", cpt, cptab);
376 return;
377 }
378
379 } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
380 CDEBUG(D_INFO,
381 "CPU %d is not in cpu-partition %d\n", cpu, cpt);
382 return;
383 }
384
385 LASSERT(cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask));
386 LASSERT(cpu_isset(cpu, *cptab->ctb_cpumask));
387
388 cpu_clear(cpu, *cptab->ctb_parts[cpt].cpt_cpumask);
389 cpu_clear(cpu, *cptab->ctb_cpumask);
390 cptab->ctb_cpu2cpt[cpu] = -1;
391
392 node = cpu_to_node(cpu);
393
394 LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
395 LASSERT(node_isset(node, *cptab->ctb_nodemask));
396
397 for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask) {
398
399 if (cpu_to_node(i) == node)
400 break;
401 }
402
403 if (i == NR_CPUS)
404 node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
405
406 for_each_cpu_mask(i, *cptab->ctb_cpumask) {
407
408 if (cpu_to_node(i) == node)
409 break;
410 }
411
412 if (i == NR_CPUS)
413 node_clear(node, *cptab->ctb_nodemask);
414
415 return;
416}
417EXPORT_SYMBOL(cfs_cpt_unset_cpu);
418
419int
420cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
421{
422 int i;
423
424 if (cpus_weight(*mask) == 0 || any_online_cpu(*mask) == NR_CPUS) {
425 CDEBUG(D_INFO, "No online CPU is found in the CPU mask "
426 "for CPU partition %d\n", cpt);
427 return 0;
428 }
429
430 for_each_cpu_mask(i, *mask) {
431 if (!cfs_cpt_set_cpu(cptab, cpt, i))
432 return 0;
433 }
434
435 return 1;
436}
437EXPORT_SYMBOL(cfs_cpt_set_cpumask);
438
439void
440cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
441{
442 int i;
443
444 for_each_cpu_mask(i, *mask)
445 cfs_cpt_unset_cpu(cptab, cpt, i);
446}
447EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
448
449int
450cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
451{
452 cpumask_t *mask;
453 int rc;
454
455 if (node < 0 || node >= MAX_NUMNODES) {
456 CDEBUG(D_INFO,
457 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
458 return 0;
459 }
460
461 down(&cpt_data.cpt_mutex);
462
463 mask = cpt_data.cpt_cpumask;
464 cfs_node_to_cpumask(node, mask);
465
466 rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
467
468 up(&cpt_data.cpt_mutex);
469
470 return rc;
471}
472EXPORT_SYMBOL(cfs_cpt_set_node);
473
474void
475cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
476{
477 cpumask_t *mask;
478
479 if (node < 0 || node >= MAX_NUMNODES) {
480 CDEBUG(D_INFO,
481 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
482 return;
483 }
484
485 down(&cpt_data.cpt_mutex);
486
487 mask = cpt_data.cpt_cpumask;
488 cfs_node_to_cpumask(node, mask);
489
490 cfs_cpt_unset_cpumask(cptab, cpt, mask);
491
492 up(&cpt_data.cpt_mutex);
493}
494EXPORT_SYMBOL(cfs_cpt_unset_node);
495
496int
497cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
498{
499 int i;
500
501 for_each_node_mask(i, *mask) {
502 if (!cfs_cpt_set_node(cptab, cpt, i))
503 return 0;
504 }
505
506 return 1;
507}
508EXPORT_SYMBOL(cfs_cpt_set_nodemask);
509
510void
511cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
512{
513 int i;
514
515 for_each_node_mask(i, *mask)
516 cfs_cpt_unset_node(cptab, cpt, i);
517}
518EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
519
520void
521cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
522{
523 int last;
524 int i;
525
526 if (cpt == CFS_CPT_ANY) {
527 last = cptab->ctb_nparts - 1;
528 cpt = 0;
529 } else {
530 last = cpt;
531 }
532
533 for (; cpt <= last; cpt++) {
534 for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask)
535 cfs_cpt_unset_cpu(cptab, cpt, i);
536 }
537}
538EXPORT_SYMBOL(cfs_cpt_clear);
539
540int
541cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
542{
543 nodemask_t *mask;
544 int weight;
545 int rotor;
546 int node;
547
548
549
550 if (cpt < 0 || cpt >= cptab->ctb_nparts) {
551 mask = cptab->ctb_nodemask;
552 rotor = cptab->ctb_spread_rotor++;
553 } else {
554 mask = cptab->ctb_parts[cpt].cpt_nodemask;
555 rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
556 }
557
558 weight = nodes_weight(*mask);
559 LASSERT(weight > 0);
560
561 rotor %= weight;
562
563 for_each_node_mask(node, *mask) {
564 if (rotor-- == 0)
565 return node;
566 }
567
568 LBUG();
569 return 0;
570}
571EXPORT_SYMBOL(cfs_cpt_spread_node);
572
573int
574cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
575{
576 int cpu = smp_processor_id();
577 int cpt = cptab->ctb_cpu2cpt[cpu];
578
579 if (cpt < 0) {
580 if (!remap)
581 return cpt;
582
583
584
585 cpt = cpu % cptab->ctb_nparts;
586 }
587
588 return cpt;
589}
590EXPORT_SYMBOL(cfs_cpt_current);
591
592int
593cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
594{
595 LASSERT(cpu >= 0 && cpu < NR_CPUS);
596
597 return cptab->ctb_cpu2cpt[cpu];
598}
599EXPORT_SYMBOL(cfs_cpt_of_cpu);
600
601int
602cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
603{
604 cpumask_t *cpumask;
605 nodemask_t *nodemask;
606 int rc;
607 int i;
608
609 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
610
611 if (cpt == CFS_CPT_ANY) {
612 cpumask = cptab->ctb_cpumask;
613 nodemask = cptab->ctb_nodemask;
614 } else {
615 cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
616 nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
617 }
618
619 if (any_online_cpu(*cpumask) == NR_CPUS) {
620 CERROR("No online CPU found in CPU partition %d, did someone "
621 "do CPU hotplug on system? You might need to reload "
622 "Lustre modules to keep system working well.\n", cpt);
623 return -EINVAL;
624 }
625
626 for_each_online_cpu(i) {
627 if (cpu_isset(i, *cpumask))
628 continue;
629
630 rc = set_cpus_allowed_ptr(current, cpumask);
631 set_mems_allowed(*nodemask);
632 if (rc == 0)
633 schedule();
634
635 return rc;
636 }
637
638
639 return 0;
640}
641EXPORT_SYMBOL(cfs_cpt_bind);
642
643
644
645
646
647static int
648cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
649 cpumask_t *node, int number)
650{
651 cpumask_t *socket = NULL;
652 cpumask_t *core = NULL;
653 int rc = 0;
654 int cpu;
655
656 LASSERT(number > 0);
657
658 if (number >= cpus_weight(*node)) {
659 while (!cpus_empty(*node)) {
660 cpu = first_cpu(*node);
661
662 rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
663 if (!rc)
664 return -EINVAL;
665 cpu_clear(cpu, *node);
666 }
667 return 0;
668 }
669
670
671 LIBCFS_ALLOC(socket, cpumask_size());
672 LIBCFS_ALLOC(core, cpumask_size());
673 if (socket == NULL || core == NULL) {
674 rc = -ENOMEM;
675 goto out;
676 }
677
678 while (!cpus_empty(*node)) {
679 cpu = first_cpu(*node);
680
681
682 cfs_cpu_core_siblings(cpu, socket);
683 cpus_and(*socket, *socket, *node);
684
685 LASSERT(!cpus_empty(*socket));
686
687 while (!cpus_empty(*socket)) {
688 int i;
689
690
691 cfs_cpu_ht_siblings(cpu, core);
692 cpus_and(*core, *core, *node);
693
694 LASSERT(!cpus_empty(*core));
695
696 for_each_cpu_mask(i, *core) {
697 cpu_clear(i, *socket);
698 cpu_clear(i, *node);
699
700 rc = cfs_cpt_set_cpu(cptab, cpt, i);
701 if (!rc) {
702 rc = -EINVAL;
703 goto out;
704 }
705
706 if (--number == 0)
707 goto out;
708 }
709 cpu = first_cpu(*socket);
710 }
711 }
712
713 out:
714 if (socket != NULL)
715 LIBCFS_FREE(socket, cpumask_size());
716 if (core != NULL)
717 LIBCFS_FREE(core, cpumask_size());
718 return rc;
719}
720
721#define CPT_WEIGHT_MIN 4u
722
723static unsigned int
724cfs_cpt_num_estimate(void)
725{
726 unsigned nnode = num_online_nodes();
727 unsigned ncpu = num_online_cpus();
728 unsigned ncpt;
729
730 if (ncpu <= CPT_WEIGHT_MIN) {
731 ncpt = 1;
732 goto out;
733 }
734
735
736
737
738 for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) {}
739
740 if (ncpt <= nnode) {
741 while (nnode > ncpt)
742 nnode >>= 1;
743
744 } else {
745 while ((nnode << 1) <= ncpt)
746 nnode <<= 1;
747 }
748
749 ncpt = nnode;
750
751 out:
752#if (BITS_PER_LONG == 32)
753
754
755 ncpt = min(2U, ncpt);
756#endif
757 while (ncpu % ncpt != 0)
758 ncpt--;
759
760 return ncpt;
761}
762
763static struct cfs_cpt_table *
764cfs_cpt_table_create(int ncpt)
765{
766 struct cfs_cpt_table *cptab = NULL;
767 cpumask_t *mask = NULL;
768 int cpt = 0;
769 int num;
770 int rc;
771 int i;
772
773 rc = cfs_cpt_num_estimate();
774 if (ncpt <= 0)
775 ncpt = rc;
776
777 if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
778 CWARN("CPU partition number %d is larger than suggested "
779 "value (%d), your system may have performance"
780 "issue or run out of memory while under pressure\n",
781 ncpt, rc);
782 }
783
784 if (num_online_cpus() % ncpt != 0) {
785 CERROR("CPU number %d is not multiple of cpu_npartition %d, "
786 "please try different cpu_npartitions value or"
787 "set pattern string by cpu_pattern=STRING\n",
788 (int)num_online_cpus(), ncpt);
789 goto failed;
790 }
791
792 cptab = cfs_cpt_table_alloc(ncpt);
793 if (cptab == NULL) {
794 CERROR("Failed to allocate CPU map(%d)\n", ncpt);
795 goto failed;
796 }
797
798 num = num_online_cpus() / ncpt;
799 if (num == 0) {
800 CERROR("CPU changed while setting CPU partition\n");
801 goto failed;
802 }
803
804 LIBCFS_ALLOC(mask, cpumask_size());
805 if (mask == NULL) {
806 CERROR("Failed to allocate scratch cpumask\n");
807 goto failed;
808 }
809
810 for_each_online_node(i) {
811 cfs_node_to_cpumask(i, mask);
812
813 while (!cpus_empty(*mask)) {
814 struct cfs_cpu_partition *part;
815 int n;
816
817 if (cpt >= ncpt)
818 goto failed;
819
820 part = &cptab->ctb_parts[cpt];
821
822 n = num - cpus_weight(*part->cpt_cpumask);
823 LASSERT(n > 0);
824
825 rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
826 if (rc < 0)
827 goto failed;
828
829 LASSERT(num >= cpus_weight(*part->cpt_cpumask));
830 if (num == cpus_weight(*part->cpt_cpumask))
831 cpt++;
832 }
833 }
834
835 if (cpt != ncpt ||
836 num != cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
837 CERROR("Expect %d(%d) CPU partitions but got %d(%d), "
838 "CPU hotplug/unplug while setting?\n",
839 cptab->ctb_nparts, num, cpt,
840 cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask));
841 goto failed;
842 }
843
844 LIBCFS_FREE(mask, cpumask_size());
845
846 return cptab;
847
848 failed:
849 CERROR("Failed to setup CPU-partition-table with %d "
850 "CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
851 ncpt, num_online_nodes(), num_online_cpus());
852
853 if (mask != NULL)
854 LIBCFS_FREE(mask, cpumask_size());
855
856 if (cptab != NULL)
857 cfs_cpt_table_free(cptab);
858
859 return NULL;
860}
861
862static struct cfs_cpt_table *
863cfs_cpt_table_create_pattern(char *pattern)
864{
865 struct cfs_cpt_table *cptab;
866 char *str = pattern;
867 int node = 0;
868 int high;
869 int ncpt;
870 int c;
871
872 for (ncpt = 0;; ncpt++) {
873 str = strchr(str, '[');
874 if (str == NULL)
875 break;
876 str++;
877 }
878
879 str = cfs_trimwhite(pattern);
880 if (*str == 'n' || *str == 'N') {
881 pattern = str + 1;
882 node = 1;
883 }
884
885 if (ncpt == 0 ||
886 (node && ncpt > num_online_nodes()) ||
887 (!node && ncpt > num_online_cpus())) {
888 CERROR("Invalid pattern %s, or too many partitions %d\n",
889 pattern, ncpt);
890 return NULL;
891 }
892
893 high = node ? MAX_NUMNODES - 1 : NR_CPUS - 1;
894
895 cptab = cfs_cpt_table_alloc(ncpt);
896 if (cptab == NULL) {
897 CERROR("Failed to allocate cpu partition table\n");
898 return NULL;
899 }
900
901 for (str = cfs_trimwhite(pattern), c = 0;; c++) {
902 struct cfs_range_expr *range;
903 struct cfs_expr_list *el;
904 char *bracket = strchr(str, '[');
905 int cpt;
906 int rc;
907 int i;
908 int n;
909
910 if (bracket == NULL) {
911 if (*str != 0) {
912 CERROR("Invalid pattern %s\n", str);
913 goto failed;
914 } else if (c != ncpt) {
915 CERROR("expect %d partitions but found %d\n",
916 ncpt, c);
917 goto failed;
918 }
919 break;
920 }
921
922 if (sscanf(str, "%u%n", &cpt, &n) < 1) {
923 CERROR("Invalid cpu pattern %s\n", str);
924 goto failed;
925 }
926
927 if (cpt < 0 || cpt >= ncpt) {
928 CERROR("Invalid partition id %d, total partitions %d\n",
929 cpt, ncpt);
930 goto failed;
931 }
932
933 if (cfs_cpt_weight(cptab, cpt) != 0) {
934 CERROR("Partition %d has already been set.\n", cpt);
935 goto failed;
936 }
937
938 str = cfs_trimwhite(str + n);
939 if (str != bracket) {
940 CERROR("Invalid pattern %s\n", str);
941 goto failed;
942 }
943
944 bracket = strchr(str, ']');
945 if (bracket == NULL) {
946 CERROR("missing right bracket for cpt %d, %s\n",
947 cpt, str);
948 goto failed;
949 }
950
951 if (cfs_expr_list_parse(str, (bracket - str) + 1,
952 0, high, &el) != 0) {
953 CERROR("Can't parse number range: %s\n", str);
954 goto failed;
955 }
956
957 list_for_each_entry(range, &el->el_exprs, re_link) {
958 for (i = range->re_lo; i <= range->re_hi; i++) {
959 if ((i - range->re_lo) % range->re_stride != 0)
960 continue;
961
962 rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
963 cfs_cpt_set_cpu(cptab, cpt, i);
964 if (!rc) {
965 cfs_expr_list_free(el);
966 goto failed;
967 }
968 }
969 }
970
971 cfs_expr_list_free(el);
972
973 if (!cfs_cpt_online(cptab, cpt)) {
974 CERROR("No online CPU is found on partition %d\n", cpt);
975 goto failed;
976 }
977
978 str = cfs_trimwhite(bracket + 1);
979 }
980
981 return cptab;
982
983 failed:
984 cfs_cpt_table_free(cptab);
985 return NULL;
986}
987
988#ifdef CONFIG_HOTPLUG_CPU
989static int
990cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
991{
992 unsigned int cpu = (unsigned long)hcpu;
993
994 switch (action) {
995 case CPU_DEAD:
996 case CPU_DEAD_FROZEN:
997 case CPU_ONLINE:
998 case CPU_ONLINE_FROZEN:
999 spin_lock(&cpt_data.cpt_lock);
1000 cpt_data.cpt_version++;
1001 spin_unlock(&cpt_data.cpt_lock);
1002 default:
1003 CWARN("Lustre: can't support CPU hotplug well now, "
1004 "performance and stability could be impacted"
1005 "[CPU %u notify: %lx]\n", cpu, action);
1006 }
1007
1008 return NOTIFY_OK;
1009}
1010
1011static struct notifier_block cfs_cpu_notifier = {
1012 .notifier_call = cfs_cpu_notify,
1013 .priority = 0
1014};
1015
1016#endif
1017
1018void
1019cfs_cpu_fini(void)
1020{
1021 if (cfs_cpt_table != NULL)
1022 cfs_cpt_table_free(cfs_cpt_table);
1023
1024#ifdef CONFIG_HOTPLUG_CPU
1025 unregister_hotcpu_notifier(&cfs_cpu_notifier);
1026#endif
1027 if (cpt_data.cpt_cpumask != NULL)
1028 LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
1029}
1030
1031int
1032cfs_cpu_init(void)
1033{
1034 LASSERT(cfs_cpt_table == NULL);
1035
1036 memset(&cpt_data, 0, sizeof(cpt_data));
1037
1038 LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
1039 if (cpt_data.cpt_cpumask == NULL) {
1040 CERROR("Failed to allocate scratch buffer\n");
1041 return -1;
1042 }
1043
1044 spin_lock_init(&cpt_data.cpt_lock);
1045 sema_init(&cpt_data.cpt_mutex, 1);
1046
1047#ifdef CONFIG_HOTPLUG_CPU
1048 register_hotcpu_notifier(&cfs_cpu_notifier);
1049#endif
1050
1051 if (*cpu_pattern != 0) {
1052 cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
1053 if (cfs_cpt_table == NULL) {
1054 CERROR("Failed to create cptab from pattern %s\n",
1055 cpu_pattern);
1056 goto failed;
1057 }
1058
1059 } else {
1060 cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
1061 if (cfs_cpt_table == NULL) {
1062 CERROR("Failed to create ptable with npartitions %d\n",
1063 cpu_npartitions);
1064 goto failed;
1065 }
1066 }
1067
1068 spin_lock(&cpt_data.cpt_lock);
1069 if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
1070 spin_unlock(&cpt_data.cpt_lock);
1071 CERROR("CPU hotplug/unplug during setup\n");
1072 goto failed;
1073 }
1074 spin_unlock(&cpt_data.cpt_lock);
1075
1076 LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
1077 num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
1078 return 0;
1079
1080 failed:
1081 cfs_cpu_fini();
1082 return -1;
1083}
1084
1085#endif
1086