1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/module.h>
27#include <linux/slab.h>
28#include <asm/cpu_device_id.h>
29#include "intel_rdt.h"
30
31#define MSR_IA32_QM_CTR 0x0c8e
32#define MSR_IA32_QM_EVTSEL 0x0c8d
33
34struct rmid_entry {
35 u32 rmid;
36 int busy;
37 struct list_head list;
38};
39
40
41
42
43
44
45static LIST_HEAD(rmid_free_lru);
46
47
48
49
50
51
52
53
54static unsigned int rmid_limbo_count;
55
56
57
58
59static struct rmid_entry *rmid_ptrs;
60
61
62
63
64
65bool rdt_mon_capable;
66
67
68
69
70unsigned int rdt_mon_features;
71
72
73
74
75
76unsigned int intel_cqm_threshold;
77
78static inline struct rmid_entry *__rmid_entry(u32 rmid)
79{
80 struct rmid_entry *entry;
81
82 entry = &rmid_ptrs[rmid];
83 WARN_ON(entry->rmid != rmid);
84
85 return entry;
86}
87
88static u64 __rmid_read(u32 rmid, u32 eventid)
89{
90 u64 val;
91
92
93
94
95
96
97
98
99
100 wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
101 rdmsrl(MSR_IA32_QM_CTR, val);
102
103 return val;
104}
105
106static bool rmid_dirty(struct rmid_entry *entry)
107{
108 u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
109
110 return val >= intel_cqm_threshold;
111}
112
113
114
115
116
117
118
119void __check_limbo(struct rdt_domain *d, bool force_free)
120{
121 struct rmid_entry *entry;
122 struct rdt_resource *r;
123 u32 crmid = 1, nrmid;
124
125 r = &rdt_resources_all[RDT_RESOURCE_L3];
126
127
128
129
130
131
132
133 for (;;) {
134 nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
135 if (nrmid >= r->num_rmid)
136 break;
137
138 entry = __rmid_entry(nrmid);
139 if (force_free || !rmid_dirty(entry)) {
140 clear_bit(entry->rmid, d->rmid_busy_llc);
141 if (!--entry->busy) {
142 rmid_limbo_count--;
143 list_add_tail(&entry->list, &rmid_free_lru);
144 }
145 }
146 crmid = nrmid + 1;
147 }
148}
149
150bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
151{
152 return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
153}
154
155
156
157
158
159
160int alloc_rmid(void)
161{
162 struct rmid_entry *entry;
163
164 lockdep_assert_held(&rdtgroup_mutex);
165
166 if (list_empty(&rmid_free_lru))
167 return rmid_limbo_count ? -EBUSY : -ENOSPC;
168
169 entry = list_first_entry(&rmid_free_lru,
170 struct rmid_entry, list);
171 list_del(&entry->list);
172
173 return entry->rmid;
174}
175
176static void add_rmid_to_limbo(struct rmid_entry *entry)
177{
178 struct rdt_resource *r;
179 struct rdt_domain *d;
180 int cpu;
181 u64 val;
182
183 r = &rdt_resources_all[RDT_RESOURCE_L3];
184
185 entry->busy = 0;
186 cpu = get_cpu();
187 list_for_each_entry(d, &r->domains, list) {
188 if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
189 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
190 if (val <= intel_cqm_threshold)
191 continue;
192 }
193
194
195
196
197
198 if (!has_busy_rmid(r, d))
199 cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
200 set_bit(entry->rmid, d->rmid_busy_llc);
201 entry->busy++;
202 }
203 put_cpu();
204
205 if (entry->busy)
206 rmid_limbo_count++;
207 else
208 list_add_tail(&entry->list, &rmid_free_lru);
209}
210
211void free_rmid(u32 rmid)
212{
213 struct rmid_entry *entry;
214
215 if (!rmid)
216 return;
217
218 lockdep_assert_held(&rdtgroup_mutex);
219
220 entry = __rmid_entry(rmid);
221
222 if (is_llc_occupancy_enabled())
223 add_rmid_to_limbo(entry);
224 else
225 list_add_tail(&entry->list, &rmid_free_lru);
226}
227
228static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
229{
230 u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
231
232 chunks = (cur_msr << shift) - (prev_msr << shift);
233 return chunks >>= shift;
234}
235
236static int __mon_event_count(u32 rmid, struct rmid_read *rr)
237{
238 struct mbm_state *m;
239 u64 chunks, tval;
240
241 tval = __rmid_read(rmid, rr->evtid);
242 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
243 rr->val = tval;
244 return -EINVAL;
245 }
246 switch (rr->evtid) {
247 case QOS_L3_OCCUP_EVENT_ID:
248 rr->val += tval;
249 return 0;
250 case QOS_L3_MBM_TOTAL_EVENT_ID:
251 m = &rr->d->mbm_total[rmid];
252 break;
253 case QOS_L3_MBM_LOCAL_EVENT_ID:
254 m = &rr->d->mbm_local[rmid];
255 break;
256 default:
257
258
259
260
261 return -EINVAL;
262 }
263
264 if (rr->first) {
265 memset(m, 0, sizeof(struct mbm_state));
266 m->prev_bw_msr = m->prev_msr = tval;
267 return 0;
268 }
269
270 chunks = mbm_overflow_count(m->prev_msr, tval);
271 m->chunks += chunks;
272 m->prev_msr = tval;
273
274 rr->val += m->chunks;
275 return 0;
276}
277
278
279
280
281
282static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
283{
284 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
285 struct mbm_state *m = &rr->d->mbm_local[rmid];
286 u64 tval, cur_bw, chunks;
287
288 tval = __rmid_read(rmid, rr->evtid);
289 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
290 return;
291
292 chunks = mbm_overflow_count(m->prev_bw_msr, tval);
293 m->chunks_bw += chunks;
294 m->chunks = m->chunks_bw;
295 cur_bw = (chunks * r->mon_scale) >> 20;
296
297 if (m->delta_comp)
298 m->delta_bw = abs(cur_bw - m->prev_bw);
299 m->delta_comp = false;
300 m->prev_bw = cur_bw;
301 m->prev_bw_msr = tval;
302}
303
304
305
306
307
308void mon_event_count(void *info)
309{
310 struct rdtgroup *rdtgrp, *entry;
311 struct rmid_read *rr = info;
312 struct list_head *head;
313
314 rdtgrp = rr->rgrp;
315
316 if (__mon_event_count(rdtgrp->mon.rmid, rr))
317 return;
318
319
320
321
322 head = &rdtgrp->mon.crdtgrp_list;
323
324 if (rdtgrp->type == RDTCTRL_GROUP) {
325 list_for_each_entry(entry, head, mon.crdtgrp_list) {
326 if (__mon_event_count(entry->mon.rmid, rr))
327 return;
328 }
329 }
330}
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
365{
366 u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
367 struct mbm_state *pmbm_data, *cmbm_data;
368 u32 cur_bw, delta_bw, user_bw;
369 struct rdt_resource *r_mba;
370 struct rdt_domain *dom_mba;
371 struct list_head *head;
372 struct rdtgroup *entry;
373
374 r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
375 closid = rgrp->closid;
376 rmid = rgrp->mon.rmid;
377 pmbm_data = &dom_mbm->mbm_local[rmid];
378
379 dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
380 if (!dom_mba) {
381 pr_warn_once("Failure to get domain for MBA update\n");
382 return;
383 }
384
385 cur_bw = pmbm_data->prev_bw;
386 user_bw = dom_mba->mbps_val[closid];
387 delta_bw = pmbm_data->delta_bw;
388 cur_msr_val = dom_mba->ctrl_val[closid];
389
390
391
392
393 head = &rgrp->mon.crdtgrp_list;
394 list_for_each_entry(entry, head, mon.crdtgrp_list) {
395 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
396 cur_bw += cmbm_data->prev_bw;
397 delta_bw += cmbm_data->delta_bw;
398 }
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
415 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
416 } else if (cur_msr_val < MAX_MBA_BW &&
417 (user_bw > (cur_bw + delta_bw))) {
418 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
419 } else {
420 return;
421 }
422
423 cur_msr = r_mba->msr_base + closid;
424 wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
425 dom_mba->ctrl_val[closid] = new_msr_val;
426
427
428
429
430
431
432
433
434
435
436
437 pmbm_data->delta_comp = true;
438 list_for_each_entry(entry, head, mon.crdtgrp_list) {
439 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
440 cmbm_data->delta_comp = true;
441 }
442}
443
444static void mbm_update(struct rdt_domain *d, int rmid)
445{
446 struct rmid_read rr;
447
448 rr.first = false;
449 rr.d = d;
450
451
452
453
454
455 if (is_mbm_total_enabled()) {
456 rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
457 __mon_event_count(rmid, &rr);
458 }
459 if (is_mbm_local_enabled()) {
460 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
461
462
463
464
465
466
467 if (!is_mba_sc(NULL))
468 __mon_event_count(rmid, &rr);
469 else
470 mbm_bw_count(rmid, &rr);
471 }
472}
473
474
475
476
477
478void cqm_handle_limbo(struct work_struct *work)
479{
480 unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
481 int cpu = smp_processor_id();
482 struct rdt_resource *r;
483 struct rdt_domain *d;
484
485 mutex_lock(&rdtgroup_mutex);
486
487 r = &rdt_resources_all[RDT_RESOURCE_L3];
488 d = get_domain_from_cpu(cpu, r);
489
490 if (!d) {
491 pr_warn_once("Failure to get domain for limbo worker\n");
492 goto out_unlock;
493 }
494
495 __check_limbo(d, false);
496
497 if (has_busy_rmid(r, d))
498 schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
499
500out_unlock:
501 mutex_unlock(&rdtgroup_mutex);
502}
503
504void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
505{
506 unsigned long delay = msecs_to_jiffies(delay_ms);
507 struct rdt_resource *r;
508 int cpu;
509
510 r = &rdt_resources_all[RDT_RESOURCE_L3];
511
512 cpu = cpumask_any(&dom->cpu_mask);
513 dom->cqm_work_cpu = cpu;
514
515 schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
516}
517
518void mbm_handle_overflow(struct work_struct *work)
519{
520 unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
521 struct rdtgroup *prgrp, *crgrp;
522 int cpu = smp_processor_id();
523 struct list_head *head;
524 struct rdt_domain *d;
525
526 mutex_lock(&rdtgroup_mutex);
527
528 if (!static_branch_likely(&rdt_enable_key))
529 goto out_unlock;
530
531 d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]);
532 if (!d)
533 goto out_unlock;
534
535 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
536 mbm_update(d, prgrp->mon.rmid);
537
538 head = &prgrp->mon.crdtgrp_list;
539 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
540 mbm_update(d, crgrp->mon.rmid);
541
542 if (is_mba_sc(NULL))
543 update_mba_bw(prgrp, d);
544 }
545
546 schedule_delayed_work_on(cpu, &d->mbm_over, delay);
547
548out_unlock:
549 mutex_unlock(&rdtgroup_mutex);
550}
551
552void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
553{
554 unsigned long delay = msecs_to_jiffies(delay_ms);
555 int cpu;
556
557 if (!static_branch_likely(&rdt_enable_key))
558 return;
559 cpu = cpumask_any(&dom->cpu_mask);
560 dom->mbm_work_cpu = cpu;
561 schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
562}
563
564static int dom_data_init(struct rdt_resource *r)
565{
566 struct rmid_entry *entry = NULL;
567 int i, nr_rmids;
568
569 nr_rmids = r->num_rmid;
570 rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
571 if (!rmid_ptrs)
572 return -ENOMEM;
573
574 for (i = 0; i < nr_rmids; i++) {
575 entry = &rmid_ptrs[i];
576 INIT_LIST_HEAD(&entry->list);
577
578 entry->rmid = i;
579 list_add_tail(&entry->list, &rmid_free_lru);
580 }
581
582
583
584
585
586 entry = __rmid_entry(0);
587 list_del(&entry->list);
588
589 return 0;
590}
591
592static struct mon_evt llc_occupancy_event = {
593 .name = "llc_occupancy",
594 .evtid = QOS_L3_OCCUP_EVENT_ID,
595};
596
597static struct mon_evt mbm_total_event = {
598 .name = "mbm_total_bytes",
599 .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
600};
601
602static struct mon_evt mbm_local_event = {
603 .name = "mbm_local_bytes",
604 .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
605};
606
607
608
609
610
611
612
613
614static void l3_mon_evt_init(struct rdt_resource *r)
615{
616 INIT_LIST_HEAD(&r->evt_list);
617
618 if (is_llc_occupancy_enabled())
619 list_add_tail(&llc_occupancy_event.list, &r->evt_list);
620 if (is_mbm_total_enabled())
621 list_add_tail(&mbm_total_event.list, &r->evt_list);
622 if (is_mbm_local_enabled())
623 list_add_tail(&mbm_local_event.list, &r->evt_list);
624}
625
626int rdt_get_mon_l3_config(struct rdt_resource *r)
627{
628 int ret;
629
630 r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
631 r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
632
633
634
635
636
637
638
639
640 intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid;
641
642
643 intel_cqm_threshold /= r->mon_scale;
644
645 ret = dom_data_init(r);
646 if (ret)
647 return ret;
648
649 l3_mon_evt_init(r);
650
651 r->mon_capable = true;
652 r->mon_enabled = true;
653
654 return 0;
655}
656