1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34#define DEBUG_SUBSYSTEM S_LNET
35
36#include <linux/cpu.h>
37#include <linux/sched.h>
38#include "../../../include/linux/libcfs/libcfs.h"
39
40#ifdef CONFIG_SMP
41
42
43
44
45
46
47
48
49static int cpu_npartitions;
50module_param(cpu_npartitions, int, 0444);
51MODULE_PARM_DESC(cpu_npartitions, "# of CPU partitions");
52
53
54
55
56
57
58
59
60
61
62
63
64static char *cpu_pattern = "";
65module_param(cpu_pattern, charp, 0444);
66MODULE_PARM_DESC(cpu_pattern, "CPU partitions pattern");
67
68struct cfs_cpt_data {
69
70 spinlock_t cpt_lock;
71
72 unsigned long cpt_version;
73
74 struct mutex cpt_mutex;
75
76 cpumask_t *cpt_cpumask;
77};
78
79static struct cfs_cpt_data cpt_data;
80
81static void cfs_cpu_core_siblings(int cpu, cpumask_t *mask)
82{
83
84 cpumask_copy(mask, topology_core_cpumask(cpu));
85}
86
87
88static void cfs_cpu_ht_siblings(int cpu, cpumask_t *mask)
89{
90 cpumask_copy(mask, topology_thread_cpumask(cpu));
91}
92
93static void cfs_node_to_cpumask(int node, cpumask_t *mask)
94{
95 cpumask_copy(mask, cpumask_of_node(node));
96}
97
98void
99cfs_cpt_table_free(struct cfs_cpt_table *cptab)
100{
101 int i;
102
103 if (cptab->ctb_cpu2cpt != NULL) {
104 LIBCFS_FREE(cptab->ctb_cpu2cpt,
105 num_possible_cpus() *
106 sizeof(cptab->ctb_cpu2cpt[0]));
107 }
108
109 for (i = 0; cptab->ctb_parts != NULL && i < cptab->ctb_nparts; i++) {
110 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
111
112 if (part->cpt_nodemask != NULL) {
113 LIBCFS_FREE(part->cpt_nodemask,
114 sizeof(*part->cpt_nodemask));
115 }
116
117 if (part->cpt_cpumask != NULL)
118 LIBCFS_FREE(part->cpt_cpumask, cpumask_size());
119 }
120
121 if (cptab->ctb_parts != NULL) {
122 LIBCFS_FREE(cptab->ctb_parts,
123 cptab->ctb_nparts * sizeof(cptab->ctb_parts[0]));
124 }
125
126 if (cptab->ctb_nodemask != NULL)
127 LIBCFS_FREE(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
128 if (cptab->ctb_cpumask != NULL)
129 LIBCFS_FREE(cptab->ctb_cpumask, cpumask_size());
130
131 LIBCFS_FREE(cptab, sizeof(*cptab));
132}
133EXPORT_SYMBOL(cfs_cpt_table_free);
134
135struct cfs_cpt_table *
136cfs_cpt_table_alloc(unsigned int ncpt)
137{
138 struct cfs_cpt_table *cptab;
139 int i;
140
141 LIBCFS_ALLOC(cptab, sizeof(*cptab));
142 if (cptab == NULL)
143 return NULL;
144
145 cptab->ctb_nparts = ncpt;
146
147 LIBCFS_ALLOC(cptab->ctb_cpumask, cpumask_size());
148 LIBCFS_ALLOC(cptab->ctb_nodemask, sizeof(*cptab->ctb_nodemask));
149
150 if (cptab->ctb_cpumask == NULL || cptab->ctb_nodemask == NULL)
151 goto failed;
152
153 LIBCFS_ALLOC(cptab->ctb_cpu2cpt,
154 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
155 if (cptab->ctb_cpu2cpt == NULL)
156 goto failed;
157
158 memset(cptab->ctb_cpu2cpt, -1,
159 num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
160
161 LIBCFS_ALLOC(cptab->ctb_parts, ncpt * sizeof(cptab->ctb_parts[0]));
162 if (cptab->ctb_parts == NULL)
163 goto failed;
164
165 for (i = 0; i < ncpt; i++) {
166 struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
167
168 LIBCFS_ALLOC(part->cpt_cpumask, cpumask_size());
169 LIBCFS_ALLOC(part->cpt_nodemask, sizeof(*part->cpt_nodemask));
170 if (part->cpt_cpumask == NULL || part->cpt_nodemask == NULL)
171 goto failed;
172 }
173
174 spin_lock(&cpt_data.cpt_lock);
175
176 cptab->ctb_version = cpt_data.cpt_version;
177 spin_unlock(&cpt_data.cpt_lock);
178
179 return cptab;
180
181 failed:
182 cfs_cpt_table_free(cptab);
183 return NULL;
184}
185EXPORT_SYMBOL(cfs_cpt_table_alloc);
186
187int
188cfs_cpt_table_print(struct cfs_cpt_table *cptab, char *buf, int len)
189{
190 char *tmp = buf;
191 int rc = 0;
192 int i;
193 int j;
194
195 for (i = 0; i < cptab->ctb_nparts; i++) {
196 if (len > 0) {
197 rc = snprintf(tmp, len, "%d\t: ", i);
198 len -= rc;
199 }
200
201 if (len <= 0) {
202 rc = -EFBIG;
203 goto out;
204 }
205
206 tmp += rc;
207 for_each_cpu_mask(j, *cptab->ctb_parts[i].cpt_cpumask) {
208 rc = snprintf(tmp, len, "%d ", j);
209 len -= rc;
210 if (len <= 0) {
211 rc = -EFBIG;
212 goto out;
213 }
214 tmp += rc;
215 }
216
217 *tmp = '\n';
218 tmp++;
219 len--;
220 }
221
222 out:
223 if (rc < 0)
224 return rc;
225
226 return tmp - buf;
227}
228EXPORT_SYMBOL(cfs_cpt_table_print);
229
230int
231cfs_cpt_number(struct cfs_cpt_table *cptab)
232{
233 return cptab->ctb_nparts;
234}
235EXPORT_SYMBOL(cfs_cpt_number);
236
237int
238cfs_cpt_weight(struct cfs_cpt_table *cptab, int cpt)
239{
240 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
241
242 return cpt == CFS_CPT_ANY ?
243 cpus_weight(*cptab->ctb_cpumask) :
244 cpus_weight(*cptab->ctb_parts[cpt].cpt_cpumask);
245}
246EXPORT_SYMBOL(cfs_cpt_weight);
247
248int
249cfs_cpt_online(struct cfs_cpt_table *cptab, int cpt)
250{
251 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
252
253 return cpt == CFS_CPT_ANY ?
254 any_online_cpu(*cptab->ctb_cpumask) != NR_CPUS :
255 any_online_cpu(*cptab->ctb_parts[cpt].cpt_cpumask) != NR_CPUS;
256}
257EXPORT_SYMBOL(cfs_cpt_online);
258
259cpumask_t *
260cfs_cpt_cpumask(struct cfs_cpt_table *cptab, int cpt)
261{
262 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
263
264 return cpt == CFS_CPT_ANY ?
265 cptab->ctb_cpumask : cptab->ctb_parts[cpt].cpt_cpumask;
266}
267EXPORT_SYMBOL(cfs_cpt_cpumask);
268
269nodemask_t *
270cfs_cpt_nodemask(struct cfs_cpt_table *cptab, int cpt)
271{
272 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
273
274 return cpt == CFS_CPT_ANY ?
275 cptab->ctb_nodemask : cptab->ctb_parts[cpt].cpt_nodemask;
276}
277EXPORT_SYMBOL(cfs_cpt_nodemask);
278
279int
280cfs_cpt_set_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
281{
282 int node;
283
284 LASSERT(cpt >= 0 && cpt < cptab->ctb_nparts);
285
286 if (cpu < 0 || cpu >= NR_CPUS || !cpu_online(cpu)) {
287 CDEBUG(D_INFO, "CPU %d is invalid or it's offline\n", cpu);
288 return 0;
289 }
290
291 if (cptab->ctb_cpu2cpt[cpu] != -1) {
292 CDEBUG(D_INFO, "CPU %d is already in partition %d\n",
293 cpu, cptab->ctb_cpu2cpt[cpu]);
294 return 0;
295 }
296
297 cptab->ctb_cpu2cpt[cpu] = cpt;
298
299 LASSERT(!cpu_isset(cpu, *cptab->ctb_cpumask));
300 LASSERT(!cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask));
301
302 cpu_set(cpu, *cptab->ctb_cpumask);
303 cpu_set(cpu, *cptab->ctb_parts[cpt].cpt_cpumask);
304
305 node = cpu_to_node(cpu);
306
307
308 if (!node_isset(node, *cptab->ctb_nodemask))
309 node_set(node, *cptab->ctb_nodemask);
310
311
312 if (!node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask))
313 node_set(node, *cptab->ctb_parts[cpt].cpt_nodemask);
314
315 return 1;
316}
317EXPORT_SYMBOL(cfs_cpt_set_cpu);
318
319void
320cfs_cpt_unset_cpu(struct cfs_cpt_table *cptab, int cpt, int cpu)
321{
322 int node;
323 int i;
324
325 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
326
327 if (cpu < 0 || cpu >= NR_CPUS) {
328 CDEBUG(D_INFO, "Invalid CPU id %d\n", cpu);
329 return;
330 }
331
332 if (cpt == CFS_CPT_ANY) {
333
334 cpt = cptab->ctb_cpu2cpt[cpu];
335 if (cpt < 0) {
336 CDEBUG(D_INFO, "Try to unset cpu %d which is not in CPT-table %p\n",
337 cpt, cptab);
338 return;
339 }
340
341 } else if (cpt != cptab->ctb_cpu2cpt[cpu]) {
342 CDEBUG(D_INFO,
343 "CPU %d is not in cpu-partition %d\n", cpu, cpt);
344 return;
345 }
346
347 LASSERT(cpu_isset(cpu, *cptab->ctb_parts[cpt].cpt_cpumask));
348 LASSERT(cpu_isset(cpu, *cptab->ctb_cpumask));
349
350 cpu_clear(cpu, *cptab->ctb_parts[cpt].cpt_cpumask);
351 cpu_clear(cpu, *cptab->ctb_cpumask);
352 cptab->ctb_cpu2cpt[cpu] = -1;
353
354 node = cpu_to_node(cpu);
355
356 LASSERT(node_isset(node, *cptab->ctb_parts[cpt].cpt_nodemask));
357 LASSERT(node_isset(node, *cptab->ctb_nodemask));
358
359 for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask) {
360
361 if (cpu_to_node(i) == node)
362 break;
363 }
364
365 if (i == NR_CPUS)
366 node_clear(node, *cptab->ctb_parts[cpt].cpt_nodemask);
367
368 for_each_cpu_mask(i, *cptab->ctb_cpumask) {
369
370 if (cpu_to_node(i) == node)
371 break;
372 }
373
374 if (i == NR_CPUS)
375 node_clear(node, *cptab->ctb_nodemask);
376
377 return;
378}
379EXPORT_SYMBOL(cfs_cpt_unset_cpu);
380
381int
382cfs_cpt_set_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
383{
384 int i;
385
386 if (cpus_weight(*mask) == 0 || any_online_cpu(*mask) == NR_CPUS) {
387 CDEBUG(D_INFO, "No online CPU is found in the CPU mask for CPU partition %d\n",
388 cpt);
389 return 0;
390 }
391
392 for_each_cpu_mask(i, *mask) {
393 if (!cfs_cpt_set_cpu(cptab, cpt, i))
394 return 0;
395 }
396
397 return 1;
398}
399EXPORT_SYMBOL(cfs_cpt_set_cpumask);
400
401void
402cfs_cpt_unset_cpumask(struct cfs_cpt_table *cptab, int cpt, cpumask_t *mask)
403{
404 int i;
405
406 for_each_cpu_mask(i, *mask)
407 cfs_cpt_unset_cpu(cptab, cpt, i);
408}
409EXPORT_SYMBOL(cfs_cpt_unset_cpumask);
410
411int
412cfs_cpt_set_node(struct cfs_cpt_table *cptab, int cpt, int node)
413{
414 cpumask_t *mask;
415 int rc;
416
417 if (node < 0 || node >= MAX_NUMNODES) {
418 CDEBUG(D_INFO,
419 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
420 return 0;
421 }
422
423 mutex_lock(&cpt_data.cpt_mutex);
424
425 mask = cpt_data.cpt_cpumask;
426 cfs_node_to_cpumask(node, mask);
427
428 rc = cfs_cpt_set_cpumask(cptab, cpt, mask);
429
430 mutex_unlock(&cpt_data.cpt_mutex);
431
432 return rc;
433}
434EXPORT_SYMBOL(cfs_cpt_set_node);
435
436void
437cfs_cpt_unset_node(struct cfs_cpt_table *cptab, int cpt, int node)
438{
439 cpumask_t *mask;
440
441 if (node < 0 || node >= MAX_NUMNODES) {
442 CDEBUG(D_INFO,
443 "Invalid NUMA id %d for CPU partition %d\n", node, cpt);
444 return;
445 }
446
447 mutex_lock(&cpt_data.cpt_mutex);
448
449 mask = cpt_data.cpt_cpumask;
450 cfs_node_to_cpumask(node, mask);
451
452 cfs_cpt_unset_cpumask(cptab, cpt, mask);
453
454 mutex_unlock(&cpt_data.cpt_mutex);
455}
456EXPORT_SYMBOL(cfs_cpt_unset_node);
457
458int
459cfs_cpt_set_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
460{
461 int i;
462
463 for_each_node_mask(i, *mask) {
464 if (!cfs_cpt_set_node(cptab, cpt, i))
465 return 0;
466 }
467
468 return 1;
469}
470EXPORT_SYMBOL(cfs_cpt_set_nodemask);
471
472void
473cfs_cpt_unset_nodemask(struct cfs_cpt_table *cptab, int cpt, nodemask_t *mask)
474{
475 int i;
476
477 for_each_node_mask(i, *mask)
478 cfs_cpt_unset_node(cptab, cpt, i);
479}
480EXPORT_SYMBOL(cfs_cpt_unset_nodemask);
481
482void
483cfs_cpt_clear(struct cfs_cpt_table *cptab, int cpt)
484{
485 int last;
486 int i;
487
488 if (cpt == CFS_CPT_ANY) {
489 last = cptab->ctb_nparts - 1;
490 cpt = 0;
491 } else {
492 last = cpt;
493 }
494
495 for (; cpt <= last; cpt++) {
496 for_each_cpu_mask(i, *cptab->ctb_parts[cpt].cpt_cpumask)
497 cfs_cpt_unset_cpu(cptab, cpt, i);
498 }
499}
500EXPORT_SYMBOL(cfs_cpt_clear);
501
502int
503cfs_cpt_spread_node(struct cfs_cpt_table *cptab, int cpt)
504{
505 nodemask_t *mask;
506 int weight;
507 int rotor;
508 int node;
509
510
511
512 if (cpt < 0 || cpt >= cptab->ctb_nparts) {
513 mask = cptab->ctb_nodemask;
514 rotor = cptab->ctb_spread_rotor++;
515 } else {
516 mask = cptab->ctb_parts[cpt].cpt_nodemask;
517 rotor = cptab->ctb_parts[cpt].cpt_spread_rotor++;
518 }
519
520 weight = nodes_weight(*mask);
521 LASSERT(weight > 0);
522
523 rotor %= weight;
524
525 for_each_node_mask(node, *mask) {
526 if (rotor-- == 0)
527 return node;
528 }
529
530 LBUG();
531 return 0;
532}
533EXPORT_SYMBOL(cfs_cpt_spread_node);
534
535int
536cfs_cpt_current(struct cfs_cpt_table *cptab, int remap)
537{
538 int cpu = smp_processor_id();
539 int cpt = cptab->ctb_cpu2cpt[cpu];
540
541 if (cpt < 0) {
542 if (!remap)
543 return cpt;
544
545
546
547 cpt = cpu % cptab->ctb_nparts;
548 }
549
550 return cpt;
551}
552EXPORT_SYMBOL(cfs_cpt_current);
553
554int
555cfs_cpt_of_cpu(struct cfs_cpt_table *cptab, int cpu)
556{
557 LASSERT(cpu >= 0 && cpu < NR_CPUS);
558
559 return cptab->ctb_cpu2cpt[cpu];
560}
561EXPORT_SYMBOL(cfs_cpt_of_cpu);
562
563int
564cfs_cpt_bind(struct cfs_cpt_table *cptab, int cpt)
565{
566 cpumask_t *cpumask;
567 nodemask_t *nodemask;
568 int rc;
569 int i;
570
571 LASSERT(cpt == CFS_CPT_ANY || (cpt >= 0 && cpt < cptab->ctb_nparts));
572
573 if (cpt == CFS_CPT_ANY) {
574 cpumask = cptab->ctb_cpumask;
575 nodemask = cptab->ctb_nodemask;
576 } else {
577 cpumask = cptab->ctb_parts[cpt].cpt_cpumask;
578 nodemask = cptab->ctb_parts[cpt].cpt_nodemask;
579 }
580
581 if (any_online_cpu(*cpumask) == NR_CPUS) {
582 CERROR("No online CPU found in CPU partition %d, did someone do CPU hotplug on system? You might need to reload Lustre modules to keep system working well.\n",
583 cpt);
584 return -EINVAL;
585 }
586
587 for_each_online_cpu(i) {
588 if (cpu_isset(i, *cpumask))
589 continue;
590
591 rc = set_cpus_allowed_ptr(current, cpumask);
592 set_mems_allowed(*nodemask);
593 if (rc == 0)
594 schedule();
595
596 return rc;
597 }
598
599
600 return 0;
601}
602EXPORT_SYMBOL(cfs_cpt_bind);
603
604
605
606
607
608static int
609cfs_cpt_choose_ncpus(struct cfs_cpt_table *cptab, int cpt,
610 cpumask_t *node, int number)
611{
612 cpumask_t *socket = NULL;
613 cpumask_t *core = NULL;
614 int rc = 0;
615 int cpu;
616
617 LASSERT(number > 0);
618
619 if (number >= cpus_weight(*node)) {
620 while (!cpus_empty(*node)) {
621 cpu = first_cpu(*node);
622
623 rc = cfs_cpt_set_cpu(cptab, cpt, cpu);
624 if (!rc)
625 return -EINVAL;
626 cpu_clear(cpu, *node);
627 }
628 return 0;
629 }
630
631
632 LIBCFS_ALLOC(socket, cpumask_size());
633 LIBCFS_ALLOC(core, cpumask_size());
634 if (socket == NULL || core == NULL) {
635 rc = -ENOMEM;
636 goto out;
637 }
638
639 while (!cpus_empty(*node)) {
640 cpu = first_cpu(*node);
641
642
643 cfs_cpu_core_siblings(cpu, socket);
644 cpus_and(*socket, *socket, *node);
645
646 LASSERT(!cpus_empty(*socket));
647
648 while (!cpus_empty(*socket)) {
649 int i;
650
651
652 cfs_cpu_ht_siblings(cpu, core);
653 cpus_and(*core, *core, *node);
654
655 LASSERT(!cpus_empty(*core));
656
657 for_each_cpu_mask(i, *core) {
658 cpu_clear(i, *socket);
659 cpu_clear(i, *node);
660
661 rc = cfs_cpt_set_cpu(cptab, cpt, i);
662 if (!rc) {
663 rc = -EINVAL;
664 goto out;
665 }
666
667 if (--number == 0)
668 goto out;
669 }
670 cpu = first_cpu(*socket);
671 }
672 }
673
674 out:
675 if (socket != NULL)
676 LIBCFS_FREE(socket, cpumask_size());
677 if (core != NULL)
678 LIBCFS_FREE(core, cpumask_size());
679 return rc;
680}
681
682#define CPT_WEIGHT_MIN 4u
683
684static unsigned int
685cfs_cpt_num_estimate(void)
686{
687 unsigned nnode = num_online_nodes();
688 unsigned ncpu = num_online_cpus();
689 unsigned ncpt;
690
691 if (ncpu <= CPT_WEIGHT_MIN) {
692 ncpt = 1;
693 goto out;
694 }
695
696
697
698
699 for (ncpt = 2; ncpu > 2 * ncpt * ncpt; ncpt <<= 1) {}
700
701 if (ncpt <= nnode) {
702 while (nnode > ncpt)
703 nnode >>= 1;
704
705 } else {
706 while ((nnode << 1) <= ncpt)
707 nnode <<= 1;
708 }
709
710 ncpt = nnode;
711
712 out:
713#if (BITS_PER_LONG == 32)
714
715
716 ncpt = min(2U, ncpt);
717#endif
718 while (ncpu % ncpt != 0)
719 ncpt--;
720
721 return ncpt;
722}
723
724static struct cfs_cpt_table *
725cfs_cpt_table_create(int ncpt)
726{
727 struct cfs_cpt_table *cptab = NULL;
728 cpumask_t *mask = NULL;
729 int cpt = 0;
730 int num;
731 int rc;
732 int i;
733
734 rc = cfs_cpt_num_estimate();
735 if (ncpt <= 0)
736 ncpt = rc;
737
738 if (ncpt > num_online_cpus() || ncpt > 4 * rc) {
739 CWARN("CPU partition number %d is larger than suggested value (%d), your system may have performance issue or run out of memory while under pressure\n",
740 ncpt, rc);
741 }
742
743 if (num_online_cpus() % ncpt != 0) {
744 CERROR("CPU number %d is not multiple of cpu_npartition %d, please try different cpu_npartitions value or set pattern string by cpu_pattern=STRING\n",
745 (int)num_online_cpus(), ncpt);
746 goto failed;
747 }
748
749 cptab = cfs_cpt_table_alloc(ncpt);
750 if (cptab == NULL) {
751 CERROR("Failed to allocate CPU map(%d)\n", ncpt);
752 goto failed;
753 }
754
755 num = num_online_cpus() / ncpt;
756 if (num == 0) {
757 CERROR("CPU changed while setting CPU partition\n");
758 goto failed;
759 }
760
761 LIBCFS_ALLOC(mask, cpumask_size());
762 if (mask == NULL) {
763 CERROR("Failed to allocate scratch cpumask\n");
764 goto failed;
765 }
766
767 for_each_online_node(i) {
768 cfs_node_to_cpumask(i, mask);
769
770 while (!cpus_empty(*mask)) {
771 struct cfs_cpu_partition *part;
772 int n;
773
774 if (cpt >= ncpt)
775 goto failed;
776
777 part = &cptab->ctb_parts[cpt];
778
779 n = num - cpus_weight(*part->cpt_cpumask);
780 LASSERT(n > 0);
781
782 rc = cfs_cpt_choose_ncpus(cptab, cpt, mask, n);
783 if (rc < 0)
784 goto failed;
785
786 LASSERT(num >= cpus_weight(*part->cpt_cpumask));
787 if (num == cpus_weight(*part->cpt_cpumask))
788 cpt++;
789 }
790 }
791
792 if (cpt != ncpt ||
793 num != cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask)) {
794 CERROR("Expect %d(%d) CPU partitions but got %d(%d), CPU hotplug/unplug while setting?\n",
795 cptab->ctb_nparts, num, cpt,
796 cpus_weight(*cptab->ctb_parts[ncpt - 1].cpt_cpumask));
797 goto failed;
798 }
799
800 LIBCFS_FREE(mask, cpumask_size());
801
802 return cptab;
803
804 failed:
805 CERROR("Failed to setup CPU-partition-table with %d CPU-partitions, online HW nodes: %d, HW cpus: %d.\n",
806 ncpt, num_online_nodes(), num_online_cpus());
807
808 if (mask != NULL)
809 LIBCFS_FREE(mask, cpumask_size());
810
811 if (cptab != NULL)
812 cfs_cpt_table_free(cptab);
813
814 return NULL;
815}
816
817static struct cfs_cpt_table *
818cfs_cpt_table_create_pattern(char *pattern)
819{
820 struct cfs_cpt_table *cptab;
821 char *str = pattern;
822 int node = 0;
823 int high;
824 int ncpt;
825 int c;
826
827 for (ncpt = 0;; ncpt++) {
828 str = strchr(str, '[');
829 if (str == NULL)
830 break;
831 str++;
832 }
833
834 str = cfs_trimwhite(pattern);
835 if (*str == 'n' || *str == 'N') {
836 pattern = str + 1;
837 node = 1;
838 }
839
840 if (ncpt == 0 ||
841 (node && ncpt > num_online_nodes()) ||
842 (!node && ncpt > num_online_cpus())) {
843 CERROR("Invalid pattern %s, or too many partitions %d\n",
844 pattern, ncpt);
845 return NULL;
846 }
847
848 high = node ? MAX_NUMNODES - 1 : NR_CPUS - 1;
849
850 cptab = cfs_cpt_table_alloc(ncpt);
851 if (cptab == NULL) {
852 CERROR("Failed to allocate cpu partition table\n");
853 return NULL;
854 }
855
856 for (str = cfs_trimwhite(pattern), c = 0;; c++) {
857 struct cfs_range_expr *range;
858 struct cfs_expr_list *el;
859 char *bracket = strchr(str, '[');
860 int cpt;
861 int rc;
862 int i;
863 int n;
864
865 if (bracket == NULL) {
866 if (*str != 0) {
867 CERROR("Invalid pattern %s\n", str);
868 goto failed;
869 } else if (c != ncpt) {
870 CERROR("expect %d partitions but found %d\n",
871 ncpt, c);
872 goto failed;
873 }
874 break;
875 }
876
877 if (sscanf(str, "%d%n", &cpt, &n) < 1) {
878 CERROR("Invalid cpu pattern %s\n", str);
879 goto failed;
880 }
881
882 if (cpt < 0 || cpt >= ncpt) {
883 CERROR("Invalid partition id %d, total partitions %d\n",
884 cpt, ncpt);
885 goto failed;
886 }
887
888 if (cfs_cpt_weight(cptab, cpt) != 0) {
889 CERROR("Partition %d has already been set.\n", cpt);
890 goto failed;
891 }
892
893 str = cfs_trimwhite(str + n);
894 if (str != bracket) {
895 CERROR("Invalid pattern %s\n", str);
896 goto failed;
897 }
898
899 bracket = strchr(str, ']');
900 if (bracket == NULL) {
901 CERROR("missing right bracket for cpt %d, %s\n",
902 cpt, str);
903 goto failed;
904 }
905
906 if (cfs_expr_list_parse(str, (bracket - str) + 1,
907 0, high, &el) != 0) {
908 CERROR("Can't parse number range: %s\n", str);
909 goto failed;
910 }
911
912 list_for_each_entry(range, &el->el_exprs, re_link) {
913 for (i = range->re_lo; i <= range->re_hi; i++) {
914 if ((i - range->re_lo) % range->re_stride != 0)
915 continue;
916
917 rc = node ? cfs_cpt_set_node(cptab, cpt, i) :
918 cfs_cpt_set_cpu(cptab, cpt, i);
919 if (!rc) {
920 cfs_expr_list_free(el);
921 goto failed;
922 }
923 }
924 }
925
926 cfs_expr_list_free(el);
927
928 if (!cfs_cpt_online(cptab, cpt)) {
929 CERROR("No online CPU is found on partition %d\n", cpt);
930 goto failed;
931 }
932
933 str = cfs_trimwhite(bracket + 1);
934 }
935
936 return cptab;
937
938 failed:
939 cfs_cpt_table_free(cptab);
940 return NULL;
941}
942
943#ifdef CONFIG_HOTPLUG_CPU
944static int
945cfs_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
946{
947 unsigned int cpu = (unsigned long)hcpu;
948 bool warn;
949
950 switch (action) {
951 case CPU_DEAD:
952 case CPU_DEAD_FROZEN:
953 case CPU_ONLINE:
954 case CPU_ONLINE_FROZEN:
955 spin_lock(&cpt_data.cpt_lock);
956 cpt_data.cpt_version++;
957 spin_unlock(&cpt_data.cpt_lock);
958 default:
959 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) {
960 CDEBUG(D_INFO, "CPU changed [cpu %u action %lx]\n",
961 cpu, action);
962 break;
963 }
964
965 mutex_lock(&cpt_data.cpt_mutex);
966
967 cfs_cpu_ht_siblings(cpu, cpt_data.cpt_cpumask);
968 warn = any_online_cpu(*cpt_data.cpt_cpumask) >= nr_cpu_ids;
969 mutex_unlock(&cpt_data.cpt_mutex);
970 CDEBUG(warn ? D_WARNING : D_INFO,
971 "Lustre: can't support CPU plug-out well now, performance and stability could be impacted [CPU %u action: %lx]\n",
972 cpu, action);
973 }
974
975 return NOTIFY_OK;
976}
977
978static struct notifier_block cfs_cpu_notifier = {
979 .notifier_call = cfs_cpu_notify,
980 .priority = 0
981};
982
983#endif
984
985void
986cfs_cpu_fini(void)
987{
988 if (cfs_cpt_table != NULL)
989 cfs_cpt_table_free(cfs_cpt_table);
990
991#ifdef CONFIG_HOTPLUG_CPU
992 unregister_hotcpu_notifier(&cfs_cpu_notifier);
993#endif
994 if (cpt_data.cpt_cpumask != NULL)
995 LIBCFS_FREE(cpt_data.cpt_cpumask, cpumask_size());
996}
997
998int
999cfs_cpu_init(void)
1000{
1001 LASSERT(cfs_cpt_table == NULL);
1002
1003 memset(&cpt_data, 0, sizeof(cpt_data));
1004
1005 LIBCFS_ALLOC(cpt_data.cpt_cpumask, cpumask_size());
1006 if (cpt_data.cpt_cpumask == NULL) {
1007 CERROR("Failed to allocate scratch buffer\n");
1008 return -1;
1009 }
1010
1011 spin_lock_init(&cpt_data.cpt_lock);
1012 mutex_init(&cpt_data.cpt_mutex);
1013
1014#ifdef CONFIG_HOTPLUG_CPU
1015 register_hotcpu_notifier(&cfs_cpu_notifier);
1016#endif
1017
1018 if (*cpu_pattern != 0) {
1019 cfs_cpt_table = cfs_cpt_table_create_pattern(cpu_pattern);
1020 if (cfs_cpt_table == NULL) {
1021 CERROR("Failed to create cptab from pattern %s\n",
1022 cpu_pattern);
1023 goto failed;
1024 }
1025
1026 } else {
1027 cfs_cpt_table = cfs_cpt_table_create(cpu_npartitions);
1028 if (cfs_cpt_table == NULL) {
1029 CERROR("Failed to create ptable with npartitions %d\n",
1030 cpu_npartitions);
1031 goto failed;
1032 }
1033 }
1034
1035 spin_lock(&cpt_data.cpt_lock);
1036 if (cfs_cpt_table->ctb_version != cpt_data.cpt_version) {
1037 spin_unlock(&cpt_data.cpt_lock);
1038 CERROR("CPU hotplug/unplug during setup\n");
1039 goto failed;
1040 }
1041 spin_unlock(&cpt_data.cpt_lock);
1042
1043 LCONSOLE(0, "HW CPU cores: %d, npartitions: %d\n",
1044 num_online_cpus(), cfs_cpt_number(cfs_cpt_table));
1045 return 0;
1046
1047 failed:
1048 cfs_cpu_fini();
1049 return -1;
1050}
1051
1052#endif
1053