1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/module.h>
19#include <linux/slab.h>
20#include <asm/cpu_device_id.h>
21#include "internal.h"
22
23struct rmid_entry {
24 u32 rmid;
25 int busy;
26 struct list_head list;
27};
28
29
30
31
32
33
34static LIST_HEAD(rmid_free_lru);
35
36
37
38
39
40
41
42
43static unsigned int rmid_limbo_count;
44
45
46
47
48static struct rmid_entry *rmid_ptrs;
49
50
51
52
53
54bool rdt_mon_capable;
55
56
57
58
59unsigned int rdt_mon_features;
60
61
62
63
64
65unsigned int resctrl_cqm_threshold;
66
67#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84static const struct mbm_correction_factor_table {
85 u32 rmidthreshold;
86 u64 cf;
87} mbm_cf_table[] __initconst = {
88 {7, CF(1.000000)},
89 {15, CF(1.000000)},
90 {15, CF(0.969650)},
91 {31, CF(1.000000)},
92 {31, CF(1.066667)},
93 {31, CF(0.969650)},
94 {47, CF(1.142857)},
95 {63, CF(1.000000)},
96 {63, CF(1.185115)},
97 {63, CF(1.066553)},
98 {79, CF(1.454545)},
99 {95, CF(1.000000)},
100 {95, CF(1.230769)},
101 {95, CF(1.142857)},
102 {95, CF(1.066667)},
103 {127, CF(1.000000)},
104 {127, CF(1.254863)},
105 {127, CF(1.185255)},
106 {151, CF(1.000000)},
107 {127, CF(1.066667)},
108 {167, CF(1.000000)},
109 {159, CF(1.454334)},
110 {183, CF(1.000000)},
111 {127, CF(0.969744)},
112 {191, CF(1.280246)},
113 {191, CF(1.230921)},
114 {215, CF(1.000000)},
115 {191, CF(1.143118)},
116};
117
118static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
119static u64 mbm_cf __read_mostly;
120
121static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
122{
123
124 if (rmid > mbm_cf_rmidthreshold)
125 val = (val * mbm_cf) >> 20;
126
127 return val;
128}
129
130static inline struct rmid_entry *__rmid_entry(u32 rmid)
131{
132 struct rmid_entry *entry;
133
134 entry = &rmid_ptrs[rmid];
135 WARN_ON(entry->rmid != rmid);
136
137 return entry;
138}
139
140static u64 __rmid_read(u32 rmid, u32 eventid)
141{
142 u64 val;
143
144
145
146
147
148
149
150
151
152 wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
153 rdmsrl(MSR_IA32_QM_CTR, val);
154
155 return val;
156}
157
158static bool rmid_dirty(struct rmid_entry *entry)
159{
160 u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
161
162 return val >= resctrl_cqm_threshold;
163}
164
165
166
167
168
169
170
171void __check_limbo(struct rdt_domain *d, bool force_free)
172{
173 struct rmid_entry *entry;
174 struct rdt_resource *r;
175 u32 crmid = 1, nrmid;
176
177 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
178
179
180
181
182
183
184
185 for (;;) {
186 nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
187 if (nrmid >= r->num_rmid)
188 break;
189
190 entry = __rmid_entry(nrmid);
191 if (force_free || !rmid_dirty(entry)) {
192 clear_bit(entry->rmid, d->rmid_busy_llc);
193 if (!--entry->busy) {
194 rmid_limbo_count--;
195 list_add_tail(&entry->list, &rmid_free_lru);
196 }
197 }
198 crmid = nrmid + 1;
199 }
200}
201
202bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
203{
204 return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
205}
206
207
208
209
210
211
212int alloc_rmid(void)
213{
214 struct rmid_entry *entry;
215
216 lockdep_assert_held(&rdtgroup_mutex);
217
218 if (list_empty(&rmid_free_lru))
219 return rmid_limbo_count ? -EBUSY : -ENOSPC;
220
221 entry = list_first_entry(&rmid_free_lru,
222 struct rmid_entry, list);
223 list_del(&entry->list);
224
225 return entry->rmid;
226}
227
228static void add_rmid_to_limbo(struct rmid_entry *entry)
229{
230 struct rdt_resource *r;
231 struct rdt_domain *d;
232 int cpu;
233 u64 val;
234
235 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
236
237 entry->busy = 0;
238 cpu = get_cpu();
239 list_for_each_entry(d, &r->domains, list) {
240 if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
241 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
242 if (val <= resctrl_cqm_threshold)
243 continue;
244 }
245
246
247
248
249
250 if (!has_busy_rmid(r, d))
251 cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
252 set_bit(entry->rmid, d->rmid_busy_llc);
253 entry->busy++;
254 }
255 put_cpu();
256
257 if (entry->busy)
258 rmid_limbo_count++;
259 else
260 list_add_tail(&entry->list, &rmid_free_lru);
261}
262
263void free_rmid(u32 rmid)
264{
265 struct rmid_entry *entry;
266
267 if (!rmid)
268 return;
269
270 lockdep_assert_held(&rdtgroup_mutex);
271
272 entry = __rmid_entry(rmid);
273
274 if (is_llc_occupancy_enabled())
275 add_rmid_to_limbo(entry);
276 else
277 list_add_tail(&entry->list, &rmid_free_lru);
278}
279
280static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
281{
282 u64 shift = 64 - width, chunks;
283
284 chunks = (cur_msr << shift) - (prev_msr << shift);
285 return chunks >>= shift;
286}
287
288static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
289{
290 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
291 struct mbm_state *m;
292 u64 chunks, tval;
293
294 tval = __rmid_read(rmid, rr->evtid);
295 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
296 return tval;
297 }
298 switch (rr->evtid) {
299 case QOS_L3_OCCUP_EVENT_ID:
300 rr->val += tval;
301 return 0;
302 case QOS_L3_MBM_TOTAL_EVENT_ID:
303 m = &rr->d->mbm_total[rmid];
304 break;
305 case QOS_L3_MBM_LOCAL_EVENT_ID:
306 m = &rr->d->mbm_local[rmid];
307 break;
308 default:
309
310
311
312
313 return RMID_VAL_ERROR;
314 }
315
316 if (rr->first) {
317 memset(m, 0, sizeof(struct mbm_state));
318 m->prev_bw_msr = m->prev_msr = tval;
319 return 0;
320 }
321
322 chunks = mbm_overflow_count(m->prev_msr, tval, hw_res->mbm_width);
323 m->chunks += chunks;
324 m->prev_msr = tval;
325
326 rr->val += get_corrected_mbm_count(rmid, m->chunks);
327
328 return 0;
329}
330
331
332
333
334
335static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
336{
337 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
338 struct mbm_state *m = &rr->d->mbm_local[rmid];
339 u64 tval, cur_bw, chunks;
340
341 tval = __rmid_read(rmid, rr->evtid);
342 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
343 return;
344
345 chunks = mbm_overflow_count(m->prev_bw_msr, tval, hw_res->mbm_width);
346 cur_bw = (get_corrected_mbm_count(rmid, chunks) * hw_res->mon_scale) >> 20;
347
348 if (m->delta_comp)
349 m->delta_bw = abs(cur_bw - m->prev_bw);
350 m->delta_comp = false;
351 m->prev_bw = cur_bw;
352 m->prev_bw_msr = tval;
353}
354
355
356
357
358
359void mon_event_count(void *info)
360{
361 struct rdtgroup *rdtgrp, *entry;
362 struct rmid_read *rr = info;
363 struct list_head *head;
364 u64 ret_val;
365
366 rdtgrp = rr->rgrp;
367
368 ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
369
370
371
372
373
374
375 head = &rdtgrp->mon.crdtgrp_list;
376
377 if (rdtgrp->type == RDTCTRL_GROUP) {
378 list_for_each_entry(entry, head, mon.crdtgrp_list) {
379 if (__mon_event_count(entry->mon.rmid, rr) == 0)
380 ret_val = 0;
381 }
382 }
383
384
385 if (ret_val)
386 rr->val = ret_val;
387}
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
422{
423 u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
424 struct mbm_state *pmbm_data, *cmbm_data;
425 struct rdt_hw_resource *hw_r_mba;
426 struct rdt_hw_domain *hw_dom_mba;
427 u32 cur_bw, delta_bw, user_bw;
428 struct rdt_resource *r_mba;
429 struct rdt_domain *dom_mba;
430 struct list_head *head;
431 struct rdtgroup *entry;
432
433 if (!is_mbm_local_enabled())
434 return;
435
436 hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
437 r_mba = &hw_r_mba->r_resctrl;
438 closid = rgrp->closid;
439 rmid = rgrp->mon.rmid;
440 pmbm_data = &dom_mbm->mbm_local[rmid];
441
442 dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
443 if (!dom_mba) {
444 pr_warn_once("Failure to get domain for MBA update\n");
445 return;
446 }
447 hw_dom_mba = resctrl_to_arch_dom(dom_mba);
448
449 cur_bw = pmbm_data->prev_bw;
450 user_bw = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
451 delta_bw = pmbm_data->delta_bw;
452
453
454
455
456 cur_msr_val = hw_dom_mba->ctrl_val[closid];
457
458
459
460
461 head = &rgrp->mon.crdtgrp_list;
462 list_for_each_entry(entry, head, mon.crdtgrp_list) {
463 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
464 cur_bw += cmbm_data->prev_bw;
465 delta_bw += cmbm_data->delta_bw;
466 }
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
483 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
484 } else if (cur_msr_val < MAX_MBA_BW &&
485 (user_bw > (cur_bw + delta_bw))) {
486 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
487 } else {
488 return;
489 }
490
491 cur_msr = hw_r_mba->msr_base + closid;
492 wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
493 hw_dom_mba->ctrl_val[closid] = new_msr_val;
494
495
496
497
498
499
500
501
502
503
504
505 pmbm_data->delta_comp = true;
506 list_for_each_entry(entry, head, mon.crdtgrp_list) {
507 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
508 cmbm_data->delta_comp = true;
509 }
510}
511
512static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
513{
514 struct rmid_read rr;
515
516 rr.first = false;
517 rr.r = r;
518 rr.d = d;
519
520
521
522
523
524 if (is_mbm_total_enabled()) {
525 rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
526 __mon_event_count(rmid, &rr);
527 }
528 if (is_mbm_local_enabled()) {
529 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
530 __mon_event_count(rmid, &rr);
531
532
533
534
535
536
537 if (is_mba_sc(NULL))
538 mbm_bw_count(rmid, &rr);
539 }
540}
541
542
543
544
545
546void cqm_handle_limbo(struct work_struct *work)
547{
548 unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
549 int cpu = smp_processor_id();
550 struct rdt_resource *r;
551 struct rdt_domain *d;
552
553 mutex_lock(&rdtgroup_mutex);
554
555 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
556 d = container_of(work, struct rdt_domain, cqm_limbo.work);
557
558 __check_limbo(d, false);
559
560 if (has_busy_rmid(r, d))
561 schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
562
563 mutex_unlock(&rdtgroup_mutex);
564}
565
566void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
567{
568 unsigned long delay = msecs_to_jiffies(delay_ms);
569 int cpu;
570
571 cpu = cpumask_any(&dom->cpu_mask);
572 dom->cqm_work_cpu = cpu;
573
574 schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
575}
576
577void mbm_handle_overflow(struct work_struct *work)
578{
579 unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
580 struct rdtgroup *prgrp, *crgrp;
581 int cpu = smp_processor_id();
582 struct list_head *head;
583 struct rdt_resource *r;
584 struct rdt_domain *d;
585
586 mutex_lock(&rdtgroup_mutex);
587
588 if (!static_branch_likely(&rdt_mon_enable_key))
589 goto out_unlock;
590
591 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
592 d = container_of(work, struct rdt_domain, mbm_over.work);
593
594 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
595 mbm_update(r, d, prgrp->mon.rmid);
596
597 head = &prgrp->mon.crdtgrp_list;
598 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
599 mbm_update(r, d, crgrp->mon.rmid);
600
601 if (is_mba_sc(NULL))
602 update_mba_bw(prgrp, d);
603 }
604
605 schedule_delayed_work_on(cpu, &d->mbm_over, delay);
606
607out_unlock:
608 mutex_unlock(&rdtgroup_mutex);
609}
610
611void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
612{
613 unsigned long delay = msecs_to_jiffies(delay_ms);
614 int cpu;
615
616 if (!static_branch_likely(&rdt_mon_enable_key))
617 return;
618 cpu = cpumask_any(&dom->cpu_mask);
619 dom->mbm_work_cpu = cpu;
620 schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
621}
622
623static int dom_data_init(struct rdt_resource *r)
624{
625 struct rmid_entry *entry = NULL;
626 int i, nr_rmids;
627
628 nr_rmids = r->num_rmid;
629 rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
630 if (!rmid_ptrs)
631 return -ENOMEM;
632
633 for (i = 0; i < nr_rmids; i++) {
634 entry = &rmid_ptrs[i];
635 INIT_LIST_HEAD(&entry->list);
636
637 entry->rmid = i;
638 list_add_tail(&entry->list, &rmid_free_lru);
639 }
640
641
642
643
644
645 entry = __rmid_entry(0);
646 list_del(&entry->list);
647
648 return 0;
649}
650
651static struct mon_evt llc_occupancy_event = {
652 .name = "llc_occupancy",
653 .evtid = QOS_L3_OCCUP_EVENT_ID,
654};
655
656static struct mon_evt mbm_total_event = {
657 .name = "mbm_total_bytes",
658 .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
659};
660
661static struct mon_evt mbm_local_event = {
662 .name = "mbm_local_bytes",
663 .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
664};
665
666
667
668
669
670
671
672
673static void l3_mon_evt_init(struct rdt_resource *r)
674{
675 INIT_LIST_HEAD(&r->evt_list);
676
677 if (is_llc_occupancy_enabled())
678 list_add_tail(&llc_occupancy_event.list, &r->evt_list);
679 if (is_mbm_total_enabled())
680 list_add_tail(&mbm_total_event.list, &r->evt_list);
681 if (is_mbm_local_enabled())
682 list_add_tail(&mbm_local_event.list, &r->evt_list);
683}
684
685int rdt_get_mon_l3_config(struct rdt_resource *r)
686{
687 unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
688 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
689 unsigned int cl_size = boot_cpu_data.x86_cache_size;
690 int ret;
691
692 hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
693 r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
694 hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
695
696 if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
697 hw_res->mbm_width += mbm_offset;
698 else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
699 pr_warn("Ignoring impossible MBM counter offset\n");
700
701
702
703
704
705
706
707
708 resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
709
710
711 resctrl_cqm_threshold /= hw_res->mon_scale;
712
713 ret = dom_data_init(r);
714 if (ret)
715 return ret;
716
717 l3_mon_evt_init(r);
718
719 r->mon_capable = true;
720 r->mon_enabled = true;
721
722 return 0;
723}
724
725void __init intel_rdt_mbm_apply_quirk(void)
726{
727 int cf_index;
728
729 cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
730 if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
731 pr_info("No MBM correction factor available\n");
732 return;
733 }
734
735 mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
736 mbm_cf = mbm_cf_table[cf_index].cf;
737}
738