1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define DEBUG_SUBSYSTEM S_LNET
35
36#include <linux/cpu.h>
37#include <linux/sched.h>
38#include "../../../include/linux/libcfs/libcfs.h"
39
40#ifdef CONFIG_SMP
41
42
43
44
45
46
47
48
49static int cpu_npartitions;
50module_param(cpu_npartitions, int, 0444);
51MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
52
53
54
55
56
57
58
59
60
61
62
63
64static char *cpu_pattern = "";
65module_param(cpu_pattern, charp, 0444);
66MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
67
68struct cfs_cpt_data {
69
70 spinlock_t cpt_lock;
71
72 unsigned long cpt_version;
73
74 struct mutex cpt_mutex;
75
76 cpumask_t *cpt_cpumask;
77};
78
79static struct cfs_cpt_data cpt_data;
80
81static void cfs_cpu_core_siblings(int cpu, cpumask_t *mask)
82{
83
84 cpumask_copy(mask, topology_core_cpumask(cpu));
85}
86
87
88static void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask)
89{
90 cpumask_copy(mask, topology_sibling_cpumask(cpu));
91}
92
93static void cfs_node_to_cpumask(int node, cpumask_t *mask)
94{
95 cpumask_copy(mask, cpumask_of_node(node));
96}
97
98void
99cfs_cpt_table_free(struct cfs_cpt_table *cptab)
100{
101 int i;
102
103 if (cptab->ctb_cpu2cpt != NULL) {
104 LIBCFS_FREE(cptab->ctb_cpu2cpt,
105 num_possible_cpus() *
106 sizeof(cptab->ctb_cpu2cpt[0]));
107 }
108
109 for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) {
110 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
111
112 if (part->cpt_nodemask != NULL) {
113 LIBCFS_FREE(part->cpt_nodemask,
114 sizeof(*part->cpt_nodemask));
115 }
116
117 if (part->cpt_cpumask != NULL)
118 LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
119 }
120
121 if (cptab->ctb_parts != NULL) {
122 LIBCFS_FREE(cptab->ctb_parts,
123 cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
124 }
125
126 if (cptab->ctb_nodemask != NULL)
127 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
128 if (cptab->ctb_cpumask != NULL)
129 LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
130
131 LIBCFS_FREE(cptab, sizeof(*cptab));
132}
133EXPORT_SYMBOL(cfs_cpt_table_free);
134
135struct cfs_cpt_table *
136cfs_cpt_table_alloc(unsigned int ncpt)
137{
138 struct cfs_cpt_table *cptab;
139 int i;
140
141 LIBCFS_ALLOC(cptab, sizeof(*cptab));
142 if (cptab == NULL)
143 return NULL;
144
145 cptab->ctb_nparts = ncpt;
146
147 LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
148 LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
149
150 if (cptab->ctb_cpumask == NULL || cptab->ctb_nodemask == NULL)
151 goto failed;
152
153 LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
154 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
155 if (cptab->ctb_cpu2cpt == NULL)
156 goto failed;
157
158 memset(cptab->ctb_cpu2cpt, -1,
159 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
160
161 LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
162 if (cptab->ctb_parts == NULL)
163 goto failed;
164
165 for (i = 0; i < ncpt; i++) {
166 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
167
168 LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
169 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
170 if (part->cpt_cpumask == NULL || part->cpt_nodemask == NULL)
171 goto failed;
172 }
173
174 spin_lock(&cpt_data.cpt_lock);
175
176 cptab->ctb_version = cpt_data.cpt_version;
177 spin_unlock(&cpt_data.cpt_lock);
178
179 return cptab;
180
181 failed:
182 cfs_cpt_table_free(cptab);
183 return NULL;
184}
185EXPORT_SYMBOL(cfs_cpt_table_alloc);
186
187int
188cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
189{
190 char *tmp = buf;
191 int rc = 0;
192 int i;
193 int j;
194
195 for (i = 0; i < cptab->ctb_nparts; i++) {
196 if (len > 0) {
197 rc = snprintf(tmp, len, "%d\t: ", i);
198 len -= rc;
199 }
200
201 if (len <= 0) {
202 rc = -EFBIG;
203 goto out;
204 }
205
206 tmp += rc;
207 for_each_cpu(j, cptab->ctb_parts[i].cpt_cpumask) {
208 rc = snprintf(tmp, len, "%d ", j);
209 len -= rc;
210 if (len <= 0) {
211 rc = -EFBIG;
212 goto out;
213 }
214 tmp += rc;
215 }
216
217 *tmp = '\n';
218 tmp++;
219 len--;
220 }
221
222 out:
223 if (rc < 0)
224 return rc;
225
226 return tmp - buf;
227}
228EXPORT_SYMBOL(cfs_cpt_table_print);
229
230int
231cfs_cpt_number(struct cfs_cpt_table *cptab)
232{
233 return cptab->ctb_nparts;
234}
235EXPORT_SYMBOL(cfs_cpt_number);
236
237int
238cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
239{
240 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
241
242 return cpt == CFS_CPT_ANY ?
243 cpumask_weight(cptab->ctb_cpumask) :
244 cpumask_weight(cptab->ctb_parts[cpt].cpt_cpumask);
245}
246EXPORT_SYMBOL(cfs_cpt_weight);
247
248int
249cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
250{
251 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
252
253 return cpt == CFS_CPT_ANY ?
254 cpumask_any_and(cptab->ctb_cpumask,
255 cpu_online_mask) < nr_cpu_ids :
256 cpumask_any_and(cptab->ctb_parts[cpt].cpt_cpumask,
257 cpu_online_mask) < nr_cpu_ids;
258}
259EXPORT_SYMBOL(cfs_cpt_online);
260
261cpumask_t *
262cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
263{
264 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
265
266 return cpt == CFS_CPT_ANY ?
267 cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
268}
269EXPORT_SYMBOL(cfs_cpt_cpumask);
270
271nodemask_t *
272cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
273{
274 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
275
276 return cpt == CFS_CPT_ANY ?
277 cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
278}
279EXPORT_SYMBOL(cfs_cpt_nodemask);
280
281int
282cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
283{
284 int node;
285
286 LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
287
288 if (cpu < 0 || cpu >= nr_cpu_ids || !cpu_online(cpu)) {
289 CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
290 return 0;
291 }
292
293 if (cptab->ctb_cpu2cpt[cpu] != -1) {
294 CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
295 cpu, cptab->ctb_cpu2cpt[cpu]);
296 return 0;
297 }
298
299 cptab->ctb_cpu2cpt[cpu] = cpt;
300
301 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_cpumask));
302 LASSERT(!cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
303
304 cpumask_set_cpu(cpu, cptab->ctb_cpumask);
305 cpumask_set_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
306
307 node = cpu_to_node(cpu);
308
309
310 if (!node_isset(node, *cptab->ctb_nodemask))
311 node_set(node, *cptab->ctb_nodemask);
312
313
314 if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
315 node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
316
317 return 1;
318}
319EXPORT_SYMBOL(cfs_cpt_set_cpu);
320
321void
322cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
323{
324 int node;
325 int i;
326
327 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
328
329 if (cpu < 0 || cpu >= nr_cpu_ids) {
330 CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
331 return;
332 }
333
334 if (cpt == CFS_CPT_ANY) {
335
336 cpt = cptab->ctb_cpu2cpt[cpu];
337 if (cpt < 0) {
338 CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
339 cpt, cptab);
340 return;
341 }
342
343 } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
344 CDEBUG(D_INFO,
345 "CPU %d is not in cpu-partition %d\n", cpu, cpt);
346 return;
347 }
348
349 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask));
350 LASSERT(cpumask_test_cpu(cpu, cptab->ctb_cpumask));
351
352 cpumask_clear_cpu(cpu, cptab->ctb_parts[cpt].cpt_cpumask);
353 cpumask_clear_cpu(cpu, cptab->ctb_cpumask);
354 cptab->ctb_cpu2cpt[cpu] = -1;
355
356 node = cpu_to_node(cpu);
357
358 LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
359 LASSERT(node_isset(node, *cptab->ctb_nodemask));
360
361 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask) {
362
363 if (cpu_to_node(i) == node)
364 break;
365 }
366
367 if (i >= nr_cpu_ids)
368 node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
369
370 for_each_cpu(i, cptab->ctb_cpumask) {
371
372 if (cpu_to_node(i) == node)
373 break;
374 }
375
376 if (i >= nr_cpu_ids)
377 node_clear(node, *cptab->ctb_nodemask);
378
379 return;
380}
381EXPORT_SYMBOL(cfs_cpt_unset_cpu);
382
383int
384cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
385{
386 int i;
387
388 if (cpumask_weight(mask) == 0 ||
389 cpumask_any_and(mask, cpu_online_mask) >= nr_cpu_ids) {
390 CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
391 cpt);
392 return 0;
393 }
394
395 for_each_cpu(i, mask) {
396 if (!cfs_cpt_set_cpu(cptab, cpt, i))
397 return 0;
398 }
399
400 return 1;
401}
402EXPORT_SYMBOL(cfs_cpt_set_cpumask);
403
404void
405cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
406{
407 int i;
408
409 for_each_cpu(i, mask)
410 cfs_cpt_unset_cpu(cptab, cpt, i);
411}
412EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
413
414int
415cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
416{
417 cpumask_t *mask;
418 int rc;
419
420 if (node < 0 || node >= MAX_NUMNODES) {
421 CDEBUG(D_INFO,
422 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
423 return 0;
424 }
425
426 mutex_lock(&cpt_data.cpt_mutex);
427
428 mask = cpt_data.cpt_cpumask;
429 cfs_node_to_cpumask(node, mask);
430
431 rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
432
433 mutex_unlock(&cpt_data.cpt_mutex);
434
435 return rc;
436}
437EXPORT_SYMBOL(cfs_cpt_set_node);
438
439void
440cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
441{
442 cpumask_t *mask;
443
444 if (node < 0 || node >= MAX_NUMNODES) {
445 CDEBUG(D_INFO,
446 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
447 return;
448 }
449
450 mutex_lock(&cpt_data.cpt_mutex);
451
452 mask = cpt_data.cpt_cpumask;
453 cfs_node_to_cpumask(node, mask);
454
455 cfs_cpt_unset_cpumask(cptab, cpt, mask);
456
457 mutex_unlock(&cpt_data.cpt_mutex);
458}
459EXPORT_SYMBOL(cfs_cpt_unset_node);
460
461int
462cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
463{
464 int i;
465
466 for_each_node_mask(i, *mask) {
467 if (!cfs_cpt_set_node(cptab, cpt, i))
468 return 0;
469 }
470
471 return 1;
472}
473EXPORT_SYMBOL(cfs_cpt_set_nodemask);
474
475void
476cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
477{
478 int i;
479
480 for_each_node_mask(i, *mask)
481 cfs_cpt_unset_node(cptab, cpt, i);
482}
483EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
484
485void
486cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
487{
488 int last;
489 int i;
490
491 if (cpt == CFS_CPT_ANY) {
492 last = cptab->ctb_nparts - 1;
493 cpt = 0;
494 } else {
495 last = cpt;
496 }
497
498 for (; cpt <= last; cpt++) {
499 for_each_cpu(i, cptab->ctb_parts[cpt].cpt_cpumask)
500 cfs_cpt_unset_cpu(cptab, cpt, i);
501 }
502}
503EXPORT_SYMBOL(cfs_cpt_clear);
504
505int
506cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
507{
508 nodemask_t *mask;
509 int weight;
510 int rotor;
511 int node;
512
513
514
515 if (cpt < 0 || cpt >= cptab->ctb_nparts) {
516 mask = cptab->ctb_nodemask;
517 rotor = cptab->ctb_spread_rotor++;
518 } else {
519 mask = cptab->ctb_parts[cpt].cpt_nodemask;
520 rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
521 }
522
523 weight = nodes_weight(*mask);
524 LASSERT(weight > 0);
525
526 rotor %= weight;
527
528 for_each_node_mask(node, *mask) {
529 if (rotor-- == 0)
530 return node;
531 }
532
533 LBUG();
534 return 0;
535}
536EXPORT_SYMBOL(cfs_cpt_spread_node);
537
538int
539cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
540{
541 int cpu = smp_processor_id();
542 int cpt = cptab->ctb_cpu2cpt[cpu];
543
544 if (cpt < 0) {
545 if (!remap)
546 return cpt;
547
548
549
550 cpt = cpu % cptab->ctb_nparts;
551 }
552
553 return cpt;
554}
555EXPORT_SYMBOL(cfs_cpt_current);
556
557int
558cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
559{
560 LASSERT(cpu >= 0 && cpu < nr_cpu_ids);
561
562 return cptab->ctb_cpu2cpt[cpu];
563}
564EXPORT_SYMBOL(cfs_cpt_of_cpu);
565
566int
567cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
568{
569 cpumask_t *cpumask;
570 nodemask_t *nodemask;
571 int rc;
572 int i;
573
574 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
575
576 if (cpt == CFS_CPT_ANY) {
577 cpumask = cptab->ctb_cpumask;
578 nodemask = cptab->ctb_nodemask;
579 } else {
580 cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
581 nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
582 }
583
584 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) {
585 CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
586 cpt);
587 return -EINVAL;
588 }
589
590 for_each_online_cpu(i) {
591 if (cpumask_test_cpu(i, cpumask))
592 continue;
593
594 rc = set_cpus_allowed_ptr(current, cpumask);
595 set_mems_allowed(*nodemask);
596 if (rc == 0)
597 schedule();
598
599 return rc;
600 }
601
602
603 return 0;
604}
605EXPORT_SYMBOL(cfs_cpt_bind);
606
607
608
609
610
611static int
612cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
613 cpumask_t *node, int number)
614{
615 cpumask_t *socket = NULL;
616 cpumask_t *core = NULL;
617 int rc = 0;
618 int cpu;
619
620 LASSERT(number > 0);
621
622 if (number >= cpumask_weight(node)) {
623 while (!cpumask_empty(node)) {
624 cpu = cpumask_first(node);
625
626 rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
627 if (!rc)
628 return -EINVAL;
629 cpumask_clear_cpu(cpu, node);
630 }
631 return 0;
632 }
633
634
635 LIBCFS_ALLOC(socket, cpumask_size());
636 LIBCFS_ALLOC(core, cpumask_size());
637 if (socket == NULL || core == NULL) {
638 rc = -ENOMEM;
639 goto out;
640 }
641
642 while (!cpumask_empty(node)) {
643 cpu = cpumask_first(node);
644
645
646 cfs_cpu_core_siblings(cpu, socket);
647 cpumask_and(socket, socket, node);
648
649 LASSERT(!cpumask_empty(socket));
650
651 while (!cpumask_empty(socket)) {
652 int i;
653
654
655 cfs_cpu_ht_siblings(cpu, core);
656 cpumask_and(core, core, node);
657
658 LASSERT(!cpumask_empty(core));
659
660 for_each_cpu(i, core) {
661 cpumask_clear_cpu(i, socket);
662 cpumask_clear_cpu(i, node);
663
664 rc = cfs_cpt_set_cpu(cptab, cpt, i);
665 if (!rc) {
666 rc = -EINVAL;
667 goto out;
668 }
669
670 if (--number == 0)
671 goto out;
672 }
673 cpu = cpumask_first(socket);
674 }
675 }
676
677 out:
678 if (socket != NULL)
679 LIBCFS_FREE(socket, cpumask_size());
680 if (core != NULL)
681 LIBCFS_FREE(core, cpumask_size());
682 return rc;
683}
684
685#define CPT_WEIGHT_MIN 4u
686
687static unsigned int
688cfs_cpt_num_estimate(void)
689{
690 unsigned nnode = num_online_nodes();
691 unsigned ncpu = num_online_cpus();
692 unsigned ncpt;
693
694 if (ncpu <= CPT_WEIGHT_MIN) {
695 ncpt = 1;
696 goto out;
697 }
698
699
700
701
702 for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1)
703 ;
704
705 if (ncpt <= nnode) {
706 while (nnode > ncpt)
707 nnode >>= 1;
708
709 } else {
710 while ((nnode << 1) <= ncpt)
711 nnode <<= 1;
712 }
713
714 ncpt = nnode;
715
716 out:
717#if (BITS_PER_LONG == 32)
718
719
720 ncpt = min(2U, ncpt);
721#endif
722 while (ncpu % ncpt != 0)
723 ncpt--;
724
725 return ncpt;
726}
727
728static struct cfs_cpt_table *
729cfs_cpt_table_create(int ncpt)
730{
731 struct cfs_cpt_table *cptab = NULL;
732 cpumask_t *mask = NULL;
733 int cpt = 0;
734 int num;
735 int rc;
736 int i;
737
738 rc = cfs_cpt_num_estimate();
739 if (ncpt <= 0)
740 ncpt = rc;
741
742 if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
743 CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
744 ncpt, rc);
745 }
746
747 if (num_online_cpus() % ncpt != 0) {
748 CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
749 (int)num_online_cpus(), ncpt);
750 goto failed;
751 }
752
753 cptab = cfs_cpt_table_alloc(ncpt);
754 if (cptab == NULL) {
755 CERROR("Failed to allocate CPU map(%d)\n", ncpt);
756 goto failed;
757 }
758
759 num = num_online_cpus() / ncpt;
760 if (num == 0) {
761 CERROR("CPU changed while setting CPU partition\n");
762 goto failed;
763 }
764
765 LIBCFS_ALLOC(mask, cpumask_size());
766 if (mask == NULL) {
767 CERROR("Failed to allocate scratch cpumask\n");
768 goto failed;
769 }
770
771 for_each_online_node(i) {
772 cfs_node_to_cpumask(i, mask);
773
774 while (!cpumask_empty(mask)) {
775 struct cfs_cpu_partition *part;
776 int n;
777
778 if (cpt >= ncpt)
779 goto failed;
780
781 part = &cptab->ctb_parts[cpt];
782
783 n = num - cpumask_weight(part->cpt_cpumask);
784 LASSERT(n > 0);
785
786 rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
787 if (rc < 0)
788 goto failed;
789
790 LASSERT(num >= cpumask_weight(part->cpt_cpumask));
791 if (num == cpumask_weight(part->cpt_cpumask))
792 cpt++;
793 }
794 }
795
796 if (cpt != ncpt ||
797 num != cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
798 CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
799 cptab->ctb_nparts, num, cpt,
800 cpumask_weight(cptab->ctb_parts[ncpt - 1].cpt_cpumask));
801 goto failed;
802 }
803
804 LIBCFS_FREE(mask, cpumask_size());
805
806 return cptab;
807
808 failed:
809 CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
810 ncpt, num_online_nodes(), num_online_cpus());
811
812 if (mask != NULL)
813 LIBCFS_FREE(mask, cpumask_size());
814
815 if (cptab != NULL)
816 cfs_cpt_table_free(cptab);
817
818 return NULL;
819}
820
821static struct cfs_cpt_table *
822cfs_cpt_table_create_pattern(char *pattern)
823{
824 struct cfs_cpt_table *cptab;
825 char *str = pattern;
826 int node = 0;
827 int high;
828 int ncpt;
829 int c;
830
831 for (ncpt = 0;; ncpt++) {
832 str = strchr(str, '[');
833 if (str == NULL)
834 break;
835 str++;
836 }
837
838 str = cfs_trimwhite(pattern);
839 if (*str == 'n' || *str == 'N') {
840 pattern = str + 1;
841 node = 1;
842 }
843
844 if (ncpt == 0 ||
845 (node && ncpt > num_online_nodes()) ||
846 (!node && ncpt > num_online_cpus())) {
847 CERROR("Invalid pattern %s, or too many partitions %d\n",
848 pattern, ncpt);
849 return NULL;
850 }
851
852 high = node ? MAX_NUMNODES - 1 : nr_cpu_ids - 1;
853
854 cptab = cfs_cpt_table_alloc(ncpt);
855 if (cptab == NULL) {
856 CERROR("Failed to allocate cpu partition table\n");
857 return NULL;
858 }
859
860 for (str = cfs_trimwhite(pattern), c = 0;; c++) {
861 struct cfs_range_expr *range;
862 struct cfs_expr_list *el;
863 char *bracket = strchr(str, '[');
864 int cpt;
865 int rc;
866 int i;
867 int n;
868
869 if (bracket == NULL) {
870 if (*str != 0) {
871 CERROR("Invalid pattern %s\n", str);
872 goto failed;
873 } else if (c != ncpt) {
874 CERROR("expect %d partitions but found %d\n",
875 ncpt, c);
876 goto failed;
877 }
878 break;
879 }
880
881 if (sscanf(str, "%d%n", &cpt, &n) < 1) {
882 CERROR("Invalid cpu pattern %s\n", str);
883 goto failed;
884 }
885
886 if (cpt < 0 || cpt >= ncpt) {
887 CERROR("Invalid partition id %d, total partitions %d\n",
888 cpt, ncpt);
889 goto failed;
890 }
891
892 if (cfs_cpt_weight(cptab, cpt) != 0) {
893 CERROR("Partition %d has already been set.\n", cpt);
894 goto failed;
895 }
896
897 str = cfs_trimwhite(str + n);
898 if (str != bracket) {
899 CERROR("Invalid pattern %s\n", str);
900 goto failed;
901 }
902
903 bracket = strchr(str, ']');
904 if (bracket == NULL) {
905 CERROR("missing right bracket for cpt %d, %s\n",
906 cpt, str);
907 goto failed;
908 }
909
910 if (cfs_expr_list_parse(str, (bracket - str) + 1,
911 0, high, &el) != 0) {
912 CERROR("Can't parse number range: %s\n", str);
913 goto failed;
914 }
915
916 list_for_each_entry(range, &el->el_exprs, re_link) {
917 for (i = range->re_lo; i <= range->re_hi; i++) {
918 if ((i - range->re_lo) % range->re_stride != 0)
919 continue;
920
921 rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
922 cfs_cpt_set_cpu(cptab, cpt, i);
923 if (!rc) {
924 cfs_expr_list_free(el);
925 goto failed;
926 }
927 }
928 }
929
930 cfs_expr_list_free(el);
931
932 if (!cfs_cpt_online(cptab, cpt)) {
933 CERROR("No online CPU is found on partition %d\n", cpt);
934 goto failed;
935 }
936
937 str = cfs_trimwhite(bracket + 1);
938 }
939
940 return cptab;
941
942 failed:
943 cfs_cpt_table_free(cptab);
944 return NULL;
945}
946
947#ifdef CONFIG_HOTPLUG_CPU
948static int
949cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
950{
951 unsigned int cpu = (unsigned long)hcpu;
952 bool warn;
953
954 switch (action) {
955 case CPU_DEAD:
956 case CPU_DEAD_FROZEN:
957 case CPU_ONLINE:
958 case CPU_ONLINE_FROZEN:
959 spin_lock(&cpt_data.cpt_lock);
960 cpt_data.cpt_version++;
961 spin_unlock(&cpt_data.cpt_lock);
962 default:
963 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
964 CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
965 cpu, action);
966 break;
967 }
968
969 mutex_lock(&cpt_data.cpt_mutex);
970
971 cfs_cpu_ht_siblings(cpu, cpt_data.cpt_cpumask);
972 warn = cpumask_any_and(cpt_data.cpt_cpumask,
973 cpu_online_mask) >= nr_cpu_ids;
974 mutex_unlock(&cpt_data.cpt_mutex);
975 CDEBUG(warn ? D_WARNING : D_INFO,
976 "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
977 cpu, action);
978 }
979
980 return NOTIFY_OK;
981}
982
983static struct notifier_block cfs_cpu_notifier = {
984 .notifier_call = cfs_cpu_notify,
985 .priority = 0
986};
987
988#endif
989
990void
991cfs_cpu_fini(void)
992{
993 if (cfs_cpt_table != NULL)
994 cfs_cpt_table_free(cfs_cpt_table);
995
996#ifdef CONFIG_HOTPLUG_CPU
997 unregister_hotcpu_notifier(&cfs_cpu_notifier);
998#endif
999 if (cpt_data.cpt_cpumask != NULL)
1000 LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
1001}
1002
1003int
1004cfs_cpu_init(void)
1005{
1006 LASSERT(cfs_cpt_table == NULL);
1007
1008 memset(&cpt_data, 0, sizeof(cpt_data));
1009
1010 LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
1011 if (cpt_data.cpt_cpumask == NULL) {
1012 CERROR("Failed to allocate scratch buffer\n");
1013 return -1;
1014 }
1015
1016 spin_lock_init(&cpt_data.cpt_lock);
1017 mutex_init(&cpt_data.cpt_mutex);
1018
1019#ifdef CONFIG_HOTPLUG_CPU
1020 register_hotcpu_notifier(&cfs_cpu_notifier);
1021#endif
1022
1023 if (*cpu_pattern != 0) {
1024 cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
1025 if (cfs_cpt_table == NULL) {
1026 CERROR("Failed to create cptab from pattern %s\n",
1027 cpu_pattern);
1028 goto failed;
1029 }
1030
1031 } else {
1032 cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
1033 if (cfs_cpt_table == NULL) {
1034 CERROR("Failed to create ptable with npartitions %d\n",
1035 cpu_npartitions);
1036 goto failed;
1037 }
1038 }
1039
1040 spin_lock(&cpt_data.cpt_lock);
1041 if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
1042 spin_unlock(&cpt_data.cpt_lock);
1043 CERROR("CPU hotplug/unplug during setup\n");
1044 goto failed;
1045 }
1046 spin_unlock(&cpt_data.cpt_lock);
1047
1048 LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
1049 num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
1050 return 0;
1051
1052 failed:
1053 cfs_cpu_fini();
1054 return -1;
1055}
1056
1057#endif
1058