1
2
3
4
5
6
7
8
9
10
11
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14#include <linux/cacheinfo.h>
15#include <linux/cpu.h>
16#include <linux/cpumask.h>
17#include <linux/debugfs.h>
18#include <linux/kthread.h>
19#include <linux/mman.h>
20#include <linux/perf_event.h>
21#include <linux/pm_qos.h>
22#include <linux/slab.h>
23#include <linux/uaccess.h>
24
25#include <asm/cacheflush.h>
26#include <asm/intel-family.h>
27#include <asm/resctrl.h>
28#include <asm/perf_event.h>
29
30#include "../../events/perf_event.h"
31#include "internal.h"
32
33#define CREATE_TRACE_POINTS
34#include "pseudo_lock_event.h"
35
36
37
38
39
40static u64 prefetch_disable_bits;
41
42
43
44
45
46static unsigned int pseudo_lock_major;
47static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
48static struct class *pseudo_lock_class;
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71static u64 get_prefetch_disable_bits(void)
72{
73 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
74 boot_cpu_data.x86 != 6)
75 return 0;
76
77 switch (boot_cpu_data.x86_model) {
78 case INTEL_FAM6_BROADWELL_X:
79
80
81
82
83
84
85
86
87
88 return 0xF;
89 case INTEL_FAM6_ATOM_GOLDMONT:
90 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
91
92
93
94
95
96
97
98
99 return 0x5;
100 }
101
102 return 0;
103}
104
105
106
107
108
109
110
111
112
113
114static int pseudo_lock_minor_get(unsigned int *minor)
115{
116 unsigned long first_bit;
117
118 first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
119
120 if (first_bit == MINORBITS)
121 return -ENOSPC;
122
123 __clear_bit(first_bit, &pseudo_lock_minor_avail);
124 *minor = first_bit;
125
126 return 0;
127}
128
129
130
131
132
133static void pseudo_lock_minor_release(unsigned int minor)
134{
135 __set_bit(minor, &pseudo_lock_minor_avail);
136}
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152static struct rdtgroup *region_find_by_minor(unsigned int minor)
153{
154 struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
155
156 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
157 if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
158 rdtgrp_match = rdtgrp;
159 break;
160 }
161 }
162 return rdtgrp_match;
163}
164
165
166
167
168
169
170struct pseudo_lock_pm_req {
171 struct list_head list;
172 struct dev_pm_qos_request req;
173};
174
175static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
176{
177 struct pseudo_lock_pm_req *pm_req, *next;
178
179 list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
180 dev_pm_qos_remove_request(&pm_req->req);
181 list_del(&pm_req->list);
182 kfree(pm_req);
183 }
184}
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
205{
206 struct pseudo_lock_pm_req *pm_req;
207 int cpu;
208 int ret;
209
210 for_each_cpu(cpu, &plr->d->cpu_mask) {
211 pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
212 if (!pm_req) {
213 rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
214 ret = -ENOMEM;
215 goto out_err;
216 }
217 ret = dev_pm_qos_add_request(get_cpu_device(cpu),
218 &pm_req->req,
219 DEV_PM_QOS_RESUME_LATENCY,
220 30);
221 if (ret < 0) {
222 rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
223 cpu);
224 kfree(pm_req);
225 ret = -1;
226 goto out_err;
227 }
228 list_add(&pm_req->list, &plr->pm_reqs);
229 }
230
231 return 0;
232
233out_err:
234 pseudo_lock_cstates_relax(plr);
235 return ret;
236}
237
238
239
240
241
242
243
244
245
246
247static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
248{
249 plr->size = 0;
250 plr->line_size = 0;
251 kfree(plr->kmem);
252 plr->kmem = NULL;
253 plr->s = NULL;
254 if (plr->d)
255 plr->d->plr = NULL;
256 plr->d = NULL;
257 plr->cbm = 0;
258 plr->debugfs_dir = NULL;
259}
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
280{
281 struct cpu_cacheinfo *ci;
282 int ret;
283 int i;
284
285
286 plr->cpu = cpumask_first(&plr->d->cpu_mask);
287
288 if (!cpu_online(plr->cpu)) {
289 rdt_last_cmd_printf("CPU %u associated with cache not online\n",
290 plr->cpu);
291 ret = -ENODEV;
292 goto out_region;
293 }
294
295 ci = get_cpu_cacheinfo(plr->cpu);
296
297 plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
298
299 for (i = 0; i < ci->num_leaves; i++) {
300 if (ci->info_list[i].level == plr->s->res->cache_level) {
301 plr->line_size = ci->info_list[i].coherency_line_size;
302 return 0;
303 }
304 }
305
306 ret = -1;
307 rdt_last_cmd_puts("Unable to determine cache line size\n");
308out_region:
309 pseudo_lock_region_clear(plr);
310 return ret;
311}
312
313
314
315
316
317
318
319
320
321
322
323
324static int pseudo_lock_init(struct rdtgroup *rdtgrp)
325{
326 struct pseudo_lock_region *plr;
327
328 plr = kzalloc(sizeof(*plr), GFP_KERNEL);
329 if (!plr)
330 return -ENOMEM;
331
332 init_waitqueue_head(&plr->lock_thread_wq);
333 INIT_LIST_HEAD(&plr->pm_reqs);
334 rdtgrp->plr = plr;
335 return 0;
336}
337
338
339
340
341
342
343
344
345
346
347
348static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
349{
350 int ret;
351
352 ret = pseudo_lock_region_init(plr);
353 if (ret < 0)
354 return ret;
355
356
357
358
359
360 if (plr->size > KMALLOC_MAX_SIZE) {
361 rdt_last_cmd_puts("Requested region exceeds maximum size\n");
362 ret = -E2BIG;
363 goto out_region;
364 }
365
366 plr->kmem = kzalloc(plr->size, GFP_KERNEL);
367 if (!plr->kmem) {
368 rdt_last_cmd_puts("Unable to allocate memory\n");
369 ret = -ENOMEM;
370 goto out_region;
371 }
372
373 ret = 0;
374 goto out;
375out_region:
376 pseudo_lock_region_clear(plr);
377out:
378 return ret;
379}
380
381
382
383
384
385
386
387
388
389
390
391static void pseudo_lock_free(struct rdtgroup *rdtgrp)
392{
393 pseudo_lock_region_clear(rdtgrp->plr);
394 kfree(rdtgrp->plr);
395 rdtgrp->plr = NULL;
396}
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417static int pseudo_lock_fn(void *_rdtgrp)
418{
419 struct rdtgroup *rdtgrp = _rdtgrp;
420 struct pseudo_lock_region *plr = rdtgrp->plr;
421 u32 rmid_p, closid_p;
422 unsigned long i;
423#ifdef CONFIG_KASAN
424
425
426
427
428
429
430
431
432 unsigned int line_size;
433 unsigned int size;
434 void *mem_r;
435#else
436 register unsigned int line_size asm("esi");
437 register unsigned int size asm("edi");
438 register void *mem_r asm(_ASM_BX);
439#endif
440
441
442
443
444
445
446
447
448
449 native_wbinvd();
450
451
452
453
454
455 local_irq_disable();
456
457
458
459
460
461
462
463
464
465
466 __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
467 closid_p = this_cpu_read(pqr_state.cur_closid);
468 rmid_p = this_cpu_read(pqr_state.cur_rmid);
469 mem_r = plr->kmem;
470 size = plr->size;
471 line_size = plr->line_size;
472
473
474
475
476
477
478 __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
479
480
481
482
483
484
485
486
487
488 for (i = 0; i < size; i += PAGE_SIZE) {
489
490
491
492
493 rmb();
494 asm volatile("mov (%0,%1,1), %%eax\n\t"
495 :
496 : "r" (mem_r), "r" (i)
497 : "%eax", "memory");
498 }
499 for (i = 0; i < size; i += line_size) {
500
501
502
503
504 rmb();
505 asm volatile("mov (%0,%1,1), %%eax\n\t"
506 :
507 : "r" (mem_r), "r" (i)
508 : "%eax", "memory");
509 }
510
511
512
513
514 __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
515
516
517 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
518 local_irq_enable();
519
520 plr->thread_done = 1;
521 wake_up_interruptible(&plr->lock_thread_wq);
522 return 0;
523}
524
525
526
527
528
529
530
531
532static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
533{
534 return !list_empty(&rdtgrp->mon.crdtgrp_list);
535}
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
553{
554 int ret;
555
556 ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
557 if (ret)
558 return ret;
559
560 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
561 if (ret)
562 goto err_tasks;
563
564 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
565 if (ret)
566 goto err_cpus;
567
568 if (rdt_mon_capable) {
569 ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
570 if (ret)
571 goto err_cpus_list;
572 }
573
574 ret = 0;
575 goto out;
576
577err_cpus_list:
578 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
579err_cpus:
580 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
581err_tasks:
582 rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
583out:
584 return ret;
585}
586
587
588
589
590
591
592
593
594
595
596
597
598
599static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
600{
601 int ret;
602
603 ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
604 if (ret)
605 return ret;
606
607 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
608 if (ret)
609 goto err_tasks;
610
611 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
612 if (ret)
613 goto err_cpus;
614
615 if (rdt_mon_capable) {
616 ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
617 if (ret)
618 goto err_cpus_list;
619 }
620
621 ret = 0;
622 goto out;
623
624err_cpus_list:
625 rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
626err_cpus:
627 rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
628err_tasks:
629 rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
630out:
631 return ret;
632}
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
653{
654 int ret;
655
656
657
658
659
660 if (rdtgrp == &rdtgroup_default) {
661 rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
662 return -EINVAL;
663 }
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3) ||
692 resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) {
693 rdt_last_cmd_puts("CDP enabled\n");
694 return -EINVAL;
695 }
696
697
698
699
700
701 prefetch_disable_bits = get_prefetch_disable_bits();
702 if (prefetch_disable_bits == 0) {
703 rdt_last_cmd_puts("Pseudo-locking not supported\n");
704 return -EINVAL;
705 }
706
707 if (rdtgroup_monitor_in_progress(rdtgrp)) {
708 rdt_last_cmd_puts("Monitoring in progress\n");
709 return -EINVAL;
710 }
711
712 if (rdtgroup_tasks_assigned(rdtgrp)) {
713 rdt_last_cmd_puts("Tasks assigned to resource group\n");
714 return -EINVAL;
715 }
716
717 if (!cpumask_empty(&rdtgrp->cpu_mask)) {
718 rdt_last_cmd_puts("CPUs assigned to resource group\n");
719 return -EINVAL;
720 }
721
722 if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
723 rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
724 return -EIO;
725 }
726
727 ret = pseudo_lock_init(rdtgrp);
728 if (ret) {
729 rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
730 goto out_release;
731 }
732
733
734
735
736
737
738
739 free_rmid(rdtgrp->mon.rmid);
740
741 ret = 0;
742 goto out;
743
744out_release:
745 rdtgroup_locksetup_user_restore(rdtgrp);
746out:
747 return ret;
748}
749
750
751
752
753
754
755
756
757
758
759int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
760{
761 int ret;
762
763 if (rdt_mon_capable) {
764 ret = alloc_rmid();
765 if (ret < 0) {
766 rdt_last_cmd_puts("Out of RMIDs\n");
767 return ret;
768 }
769 rdtgrp->mon.rmid = ret;
770 }
771
772 ret = rdtgroup_locksetup_user_restore(rdtgrp);
773 if (ret) {
774 free_rmid(rdtgrp->mon.rmid);
775 return ret;
776 }
777
778 pseudo_lock_free(rdtgrp);
779 return 0;
780}
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
798{
799 unsigned int cbm_len;
800 unsigned long cbm_b;
801
802 if (d->plr) {
803 cbm_len = d->plr->s->res->cache.cbm_len;
804 cbm_b = d->plr->cbm;
805 if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
806 return true;
807 }
808 return false;
809}
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
825{
826 cpumask_var_t cpu_with_psl;
827 struct rdt_resource *r;
828 struct rdt_domain *d_i;
829 bool ret = false;
830
831 if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
832 return true;
833
834
835
836
837
838 for_each_alloc_enabled_rdt_resource(r) {
839 list_for_each_entry(d_i, &r->domains, list) {
840 if (d_i->plr)
841 cpumask_or(cpu_with_psl, cpu_with_psl,
842 &d_i->cpu_mask);
843 }
844 }
845
846
847
848
849
850 if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
851 ret = true;
852
853 free_cpumask_var(cpu_with_psl);
854 return ret;
855}
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871static int measure_cycles_lat_fn(void *_plr)
872{
873 struct pseudo_lock_region *plr = _plr;
874 unsigned long i;
875 u64 start, end;
876 void *mem_r;
877
878 local_irq_disable();
879
880
881
882 wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
883 mem_r = READ_ONCE(plr->kmem);
884
885
886
887
888 start = rdtsc_ordered();
889 for (i = 0; i < plr->size; i += 32) {
890 start = rdtsc_ordered();
891 asm volatile("mov (%0,%1,1), %%eax\n\t"
892 :
893 : "r" (mem_r), "r" (i)
894 : "%eax", "memory");
895 end = rdtsc_ordered();
896 trace_pseudo_lock_mem_latency((u32)(end - start));
897 }
898 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
899 local_irq_enable();
900 plr->thread_done = 1;
901 wake_up_interruptible(&plr->lock_thread_wq);
902 return 0;
903}
904
905
906
907
908
909
910
911
912
913
914static struct perf_event_attr perf_miss_attr = {
915 .type = PERF_TYPE_RAW,
916 .size = sizeof(struct perf_event_attr),
917 .pinned = 1,
918 .disabled = 0,
919 .exclude_user = 1,
920};
921
922static struct perf_event_attr perf_hit_attr = {
923 .type = PERF_TYPE_RAW,
924 .size = sizeof(struct perf_event_attr),
925 .pinned = 1,
926 .disabled = 0,
927 .exclude_user = 1,
928};
929
930struct residency_counts {
931 u64 miss_before, hits_before;
932 u64 miss_after, hits_after;
933};
934
935static int measure_residency_fn(struct perf_event_attr *miss_attr,
936 struct perf_event_attr *hit_attr,
937 struct pseudo_lock_region *plr,
938 struct residency_counts *counts)
939{
940 u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
941 struct perf_event *miss_event, *hit_event;
942 int hit_pmcnum, miss_pmcnum;
943 unsigned int line_size;
944 unsigned int size;
945 unsigned long i;
946 void *mem_r;
947 u64 tmp;
948
949 miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
950 NULL, NULL, NULL);
951 if (IS_ERR(miss_event))
952 goto out;
953
954 hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
955 NULL, NULL, NULL);
956 if (IS_ERR(hit_event))
957 goto out_miss;
958
959 local_irq_disable();
960
961
962
963
964 if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
965 local_irq_enable();
966 goto out_hit;
967 }
968 if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
969 local_irq_enable();
970 goto out_hit;
971 }
972
973
974
975
976 wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
977
978
979
980
981
982
983 miss_pmcnum = x86_perf_rdpmc_index(miss_event);
984 hit_pmcnum = x86_perf_rdpmc_index(hit_event);
985 line_size = READ_ONCE(plr->line_size);
986 mem_r = READ_ONCE(plr->kmem);
987 size = READ_ONCE(plr->size);
988
989
990
991
992
993
994 rdpmcl(hit_pmcnum, hits_before);
995 rdpmcl(miss_pmcnum, miss_before);
996
997
998
999
1000
1001
1002 rmb();
1003 rdpmcl(hit_pmcnum, hits_before);
1004 rdpmcl(miss_pmcnum, miss_before);
1005
1006
1007
1008
1009 rmb();
1010 for (i = 0; i < size; i += line_size) {
1011
1012
1013
1014
1015 rmb();
1016 asm volatile("mov (%0,%1,1), %%eax\n\t"
1017 :
1018 : "r" (mem_r), "r" (i)
1019 : "%eax", "memory");
1020 }
1021
1022
1023
1024
1025 rmb();
1026 rdpmcl(hit_pmcnum, hits_after);
1027 rdpmcl(miss_pmcnum, miss_after);
1028
1029
1030
1031
1032 rmb();
1033
1034 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
1035 local_irq_enable();
1036out_hit:
1037 perf_event_release_kernel(hit_event);
1038out_miss:
1039 perf_event_release_kernel(miss_event);
1040out:
1041
1042
1043
1044 counts->miss_before = miss_before;
1045 counts->hits_before = hits_before;
1046 counts->miss_after = miss_after;
1047 counts->hits_after = hits_after;
1048 return 0;
1049}
1050
1051static int measure_l2_residency(void *_plr)
1052{
1053 struct pseudo_lock_region *plr = _plr;
1054 struct residency_counts counts = {0};
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 switch (boot_cpu_data.x86_model) {
1065 case INTEL_FAM6_ATOM_GOLDMONT:
1066 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1067 perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
1068 .umask = 0x10);
1069 perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
1070 .umask = 0x2);
1071 break;
1072 default:
1073 goto out;
1074 }
1075
1076 measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
1077
1078
1079
1080
1081 trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
1082 counts.miss_after - counts.miss_before);
1083out:
1084 plr->thread_done = 1;
1085 wake_up_interruptible(&plr->lock_thread_wq);
1086 return 0;
1087}
1088
1089static int measure_l3_residency(void *_plr)
1090{
1091 struct pseudo_lock_region *plr = _plr;
1092 struct residency_counts counts = {0};
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 switch (boot_cpu_data.x86_model) {
1104 case INTEL_FAM6_BROADWELL_X:
1105
1106 perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
1107 .umask = 0x4f);
1108 perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
1109 .umask = 0x41);
1110 break;
1111 default:
1112 goto out;
1113 }
1114
1115 measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
1116
1117
1118
1119
1120
1121 counts.miss_after -= counts.miss_before;
1122 if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
1123
1124
1125
1126
1127
1128
1129
1130
1131 counts.hits_after -= counts.hits_before;
1132
1133 counts.hits_after -= min(counts.miss_after, counts.hits_after);
1134 } else {
1135 counts.hits_after -= counts.hits_before;
1136 }
1137
1138 trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
1139out:
1140 plr->thread_done = 1;
1141 wake_up_interruptible(&plr->lock_thread_wq);
1142 return 0;
1143}
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
1158{
1159 struct pseudo_lock_region *plr = rdtgrp->plr;
1160 struct task_struct *thread;
1161 unsigned int cpu;
1162 int ret = -1;
1163
1164 cpus_read_lock();
1165 mutex_lock(&rdtgroup_mutex);
1166
1167 if (rdtgrp->flags & RDT_DELETED) {
1168 ret = -ENODEV;
1169 goto out;
1170 }
1171
1172 if (!plr->d) {
1173 ret = -ENODEV;
1174 goto out;
1175 }
1176
1177 plr->thread_done = 0;
1178 cpu = cpumask_first(&plr->d->cpu_mask);
1179 if (!cpu_online(cpu)) {
1180 ret = -ENODEV;
1181 goto out;
1182 }
1183
1184 plr->cpu = cpu;
1185
1186 if (sel == 1)
1187 thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
1188 cpu_to_node(cpu),
1189 "pseudo_lock_measure/%u",
1190 cpu);
1191 else if (sel == 2)
1192 thread = kthread_create_on_node(measure_l2_residency, plr,
1193 cpu_to_node(cpu),
1194 "pseudo_lock_measure/%u",
1195 cpu);
1196 else if (sel == 3)
1197 thread = kthread_create_on_node(measure_l3_residency, plr,
1198 cpu_to_node(cpu),
1199 "pseudo_lock_measure/%u",
1200 cpu);
1201 else
1202 goto out;
1203
1204 if (IS_ERR(thread)) {
1205 ret = PTR_ERR(thread);
1206 goto out;
1207 }
1208 kthread_bind(thread, cpu);
1209 wake_up_process(thread);
1210
1211 ret = wait_event_interruptible(plr->lock_thread_wq,
1212 plr->thread_done == 1);
1213 if (ret < 0)
1214 goto out;
1215
1216 ret = 0;
1217
1218out:
1219 mutex_unlock(&rdtgroup_mutex);
1220 cpus_read_unlock();
1221 return ret;
1222}
1223
1224static ssize_t pseudo_lock_measure_trigger(struct file *file,
1225 const char __user *user_buf,
1226 size_t count, loff_t *ppos)
1227{
1228 struct rdtgroup *rdtgrp = file->private_data;
1229 size_t buf_size;
1230 char buf[32];
1231 int ret;
1232 int sel;
1233
1234 buf_size = min(count, (sizeof(buf) - 1));
1235 if (copy_from_user(buf, user_buf, buf_size))
1236 return -EFAULT;
1237
1238 buf[buf_size] = '\0';
1239 ret = kstrtoint(buf, 10, &sel);
1240 if (ret == 0) {
1241 if (sel != 1 && sel != 2 && sel != 3)
1242 return -EINVAL;
1243 ret = debugfs_file_get(file->f_path.dentry);
1244 if (ret)
1245 return ret;
1246 ret = pseudo_lock_measure_cycles(rdtgrp, sel);
1247 if (ret == 0)
1248 ret = count;
1249 debugfs_file_put(file->f_path.dentry);
1250 }
1251
1252 return ret;
1253}
1254
1255static const struct file_operations pseudo_measure_fops = {
1256 .write = pseudo_lock_measure_trigger,
1257 .open = simple_open,
1258 .llseek = default_llseek,
1259};
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1278{
1279 struct pseudo_lock_region *plr = rdtgrp->plr;
1280 struct task_struct *thread;
1281 unsigned int new_minor;
1282 struct device *dev;
1283 int ret;
1284
1285 ret = pseudo_lock_region_alloc(plr);
1286 if (ret < 0)
1287 return ret;
1288
1289 ret = pseudo_lock_cstates_constrain(plr);
1290 if (ret < 0) {
1291 ret = -EINVAL;
1292 goto out_region;
1293 }
1294
1295 plr->thread_done = 0;
1296
1297 thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
1298 cpu_to_node(plr->cpu),
1299 "pseudo_lock/%u", plr->cpu);
1300 if (IS_ERR(thread)) {
1301 ret = PTR_ERR(thread);
1302 rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
1303 goto out_cstates;
1304 }
1305
1306 kthread_bind(thread, plr->cpu);
1307 wake_up_process(thread);
1308
1309 ret = wait_event_interruptible(plr->lock_thread_wq,
1310 plr->thread_done == 1);
1311 if (ret < 0) {
1312
1313
1314
1315
1316
1317
1318
1319
1320 rdt_last_cmd_puts("Locking thread interrupted\n");
1321 goto out_cstates;
1322 }
1323
1324 ret = pseudo_lock_minor_get(&new_minor);
1325 if (ret < 0) {
1326 rdt_last_cmd_puts("Unable to obtain a new minor number\n");
1327 goto out_cstates;
1328 }
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 mutex_unlock(&rdtgroup_mutex);
1340
1341 if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
1342 plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
1343 debugfs_resctrl);
1344 if (!IS_ERR_OR_NULL(plr->debugfs_dir))
1345 debugfs_create_file("pseudo_lock_measure", 0200,
1346 plr->debugfs_dir, rdtgrp,
1347 &pseudo_measure_fops);
1348 }
1349
1350 dev = device_create(pseudo_lock_class, NULL,
1351 MKDEV(pseudo_lock_major, new_minor),
1352 rdtgrp, "%s", rdtgrp->kn->name);
1353
1354 mutex_lock(&rdtgroup_mutex);
1355
1356 if (IS_ERR(dev)) {
1357 ret = PTR_ERR(dev);
1358 rdt_last_cmd_printf("Failed to create character device: %d\n",
1359 ret);
1360 goto out_debugfs;
1361 }
1362
1363
1364 if (rdtgrp->flags & RDT_DELETED) {
1365 ret = -ENODEV;
1366 goto out_device;
1367 }
1368
1369 plr->minor = new_minor;
1370
1371 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
1372 closid_free(rdtgrp->closid);
1373 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
1374 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
1375
1376 ret = 0;
1377 goto out;
1378
1379out_device:
1380 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
1381out_debugfs:
1382 debugfs_remove_recursive(plr->debugfs_dir);
1383 pseudo_lock_minor_release(new_minor);
1384out_cstates:
1385 pseudo_lock_cstates_relax(plr);
1386out_region:
1387 pseudo_lock_region_clear(plr);
1388out:
1389 return ret;
1390}
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
1407{
1408 struct pseudo_lock_region *plr = rdtgrp->plr;
1409
1410 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1411
1412
1413
1414
1415 closid_free(rdtgrp->closid);
1416 goto free;
1417 }
1418
1419 pseudo_lock_cstates_relax(plr);
1420 debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
1421 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
1422 pseudo_lock_minor_release(plr->minor);
1423
1424free:
1425 pseudo_lock_free(rdtgrp);
1426}
1427
1428static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
1429{
1430 struct rdtgroup *rdtgrp;
1431
1432 mutex_lock(&rdtgroup_mutex);
1433
1434 rdtgrp = region_find_by_minor(iminor(inode));
1435 if (!rdtgrp) {
1436 mutex_unlock(&rdtgroup_mutex);
1437 return -ENODEV;
1438 }
1439
1440 filp->private_data = rdtgrp;
1441 atomic_inc(&rdtgrp->waitcount);
1442
1443 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1444
1445 mutex_unlock(&rdtgroup_mutex);
1446
1447 return 0;
1448}
1449
1450static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
1451{
1452 struct rdtgroup *rdtgrp;
1453
1454 mutex_lock(&rdtgroup_mutex);
1455 rdtgrp = filp->private_data;
1456 WARN_ON(!rdtgrp);
1457 if (!rdtgrp) {
1458 mutex_unlock(&rdtgroup_mutex);
1459 return -ENODEV;
1460 }
1461 filp->private_data = NULL;
1462 atomic_dec(&rdtgrp->waitcount);
1463 mutex_unlock(&rdtgroup_mutex);
1464 return 0;
1465}
1466
1467static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
1468{
1469
1470 return -EINVAL;
1471}
1472
1473static const struct vm_operations_struct pseudo_mmap_ops = {
1474 .mremap = pseudo_lock_dev_mremap,
1475};
1476
1477static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
1478{
1479 unsigned long vsize = vma->vm_end - vma->vm_start;
1480 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
1481 struct pseudo_lock_region *plr;
1482 struct rdtgroup *rdtgrp;
1483 unsigned long physical;
1484 unsigned long psize;
1485
1486 mutex_lock(&rdtgroup_mutex);
1487
1488 rdtgrp = filp->private_data;
1489 WARN_ON(!rdtgrp);
1490 if (!rdtgrp) {
1491 mutex_unlock(&rdtgroup_mutex);
1492 return -ENODEV;
1493 }
1494
1495 plr = rdtgrp->plr;
1496
1497 if (!plr->d) {
1498 mutex_unlock(&rdtgroup_mutex);
1499 return -ENODEV;
1500 }
1501
1502
1503
1504
1505
1506
1507
1508 if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
1509 mutex_unlock(&rdtgroup_mutex);
1510 return -EINVAL;
1511 }
1512
1513 physical = __pa(plr->kmem) >> PAGE_SHIFT;
1514 psize = plr->size - off;
1515
1516 if (off > plr->size) {
1517 mutex_unlock(&rdtgroup_mutex);
1518 return -ENOSPC;
1519 }
1520
1521
1522
1523
1524
1525 if (!(vma->vm_flags & VM_SHARED)) {
1526 mutex_unlock(&rdtgroup_mutex);
1527 return -EINVAL;
1528 }
1529
1530 if (vsize > psize) {
1531 mutex_unlock(&rdtgroup_mutex);
1532 return -ENOSPC;
1533 }
1534
1535 memset(plr->kmem + off, 0, vsize);
1536
1537 if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
1538 vsize, vma->vm_page_prot)) {
1539 mutex_unlock(&rdtgroup_mutex);
1540 return -EAGAIN;
1541 }
1542 vma->vm_ops = &pseudo_mmap_ops;
1543 mutex_unlock(&rdtgroup_mutex);
1544 return 0;
1545}
1546
1547static const struct file_operations pseudo_lock_dev_fops = {
1548 .owner = THIS_MODULE,
1549 .llseek = no_llseek,
1550 .read = NULL,
1551 .write = NULL,
1552 .open = pseudo_lock_dev_open,
1553 .release = pseudo_lock_dev_release,
1554 .mmap = pseudo_lock_dev_mmap,
1555};
1556
1557static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
1558{
1559 struct rdtgroup *rdtgrp;
1560
1561 rdtgrp = dev_get_drvdata(dev);
1562 if (mode)
1563 *mode = 0600;
1564 return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
1565}
1566
1567int rdt_pseudo_lock_init(void)
1568{
1569 int ret;
1570
1571 ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
1572 if (ret < 0)
1573 return ret;
1574
1575 pseudo_lock_major = ret;
1576
1577 pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock");
1578 if (IS_ERR(pseudo_lock_class)) {
1579 ret = PTR_ERR(pseudo_lock_class);
1580 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1581 return ret;
1582 }
1583
1584 pseudo_lock_class->devnode = pseudo_lock_devnode;
1585 return 0;
1586}
1587
1588void rdt_pseudo_lock_release(void)
1589{
1590 class_destroy(pseudo_lock_class);
1591 pseudo_lock_class = NULL;
1592 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1593 pseudo_lock_major = 0;
1594}
1595