1
2
3
4
5
6
7
8
9
10
11
12
13#define pr_fmt(fmt) "hv-24x7: " fmt
14
15#include <linux/perf_event.h>
16#include <linux/rbtree.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#include <asm/firmware.h>
22#include <asm/hvcall.h>
23#include <asm/io.h>
24#include <linux/byteorder/generic.h>
25
26#include "hv-24x7.h"
27#include "hv-24x7-catalog.h"
28#include "hv-common.h"
29
30static bool domain_is_valid(unsigned domain)
31{
32 switch (domain) {
33#define DOMAIN(n, v, x, c) \
34 case HV_PERF_DOMAIN_##n: \
35
36#include "hv-24x7-domains.h"
37#undef DOMAIN
38 return true;
39 default:
40 return false;
41 }
42}
43
44static bool is_physical_domain(unsigned domain)
45{
46 switch (domain) {
47#define DOMAIN(n, v, x, c) \
48 case HV_PERF_DOMAIN_##n: \
49 return c;
50#include "hv-24x7-domains.h"
51#undef DOMAIN
52 default:
53 return false;
54 }
55}
56
57static const char *domain_name(unsigned domain)
58{
59 if (!domain_is_valid(domain))
60 return NULL;
61
62 switch (domain) {
63 case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip";
64 case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core";
65 case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core";
66 case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip";
67 case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node";
68 case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node";
69 }
70
71 WARN_ON_ONCE(domain);
72 return NULL;
73}
74
75static bool catalog_entry_domain_is_valid(unsigned domain)
76{
77 return is_physical_domain(domain);
78}
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
106
107EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
108EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
109EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
110
111EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
112
113EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
114
115EVENT_DEFINE_RANGE(reserved1, config, 4, 15);
116EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
117EVENT_DEFINE_RANGE(reserved3, config2, 0, 63);
118
119static struct attribute *format_attrs[] = {
120 &format_attr_domain.attr,
121 &format_attr_offset.attr,
122 &format_attr_core.attr,
123 &format_attr_chip.attr,
124 &format_attr_vcpu.attr,
125 &format_attr_lpar.attr,
126 NULL,
127};
128
129static struct attribute_group format_group = {
130 .name = "format",
131 .attrs = format_attrs,
132};
133
134static struct attribute_group event_group = {
135 .name = "events",
136
137};
138
139static struct attribute_group event_desc_group = {
140 .name = "event_descs",
141
142};
143
144static struct attribute_group event_long_desc_group = {
145 .name = "event_long_descs",
146
147};
148
149static struct kmem_cache *hv_page_cache;
150
151DEFINE_PER_CPU(int, hv_24x7_txn_flags);
152DEFINE_PER_CPU(int, hv_24x7_txn_err);
153
154struct hv_24x7_hw {
155 struct perf_event *events[255];
156};
157
158DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
159
160
161
162
163
164
165#define H24x7_DATA_BUFFER_SIZE 4096
166DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
167DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
168
169static char *event_name(struct hv_24x7_event_data *ev, int *len)
170{
171 *len = be16_to_cpu(ev->event_name_len) - 2;
172 return (char *)ev->remainder;
173}
174
175static char *event_desc(struct hv_24x7_event_data *ev, int *len)
176{
177 unsigned nl = be16_to_cpu(ev->event_name_len);
178 __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
179
180 *len = be16_to_cpu(*desc_len) - 2;
181 return (char *)ev->remainder + nl;
182}
183
184static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
185{
186 unsigned nl = be16_to_cpu(ev->event_name_len);
187 __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
188 unsigned desc_len = be16_to_cpu(*desc_len_);
189 __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
190
191 *len = be16_to_cpu(*long_desc_len) - 2;
192 return (char *)ev->remainder + nl + desc_len;
193}
194
195static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
196 void *end)
197{
198 void *start = ev;
199
200 return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
201}
202
203
204
205
206
207
208
209
210
211static void *event_end(struct hv_24x7_event_data *ev, void *end)
212{
213 void *start = ev;
214 __be16 *dl_, *ldl_;
215 unsigned dl, ldl;
216 unsigned nl = be16_to_cpu(ev->event_name_len);
217
218 if (nl < 2) {
219 pr_debug("%s: name length too short: %d", __func__, nl);
220 return NULL;
221 }
222
223 if (start + nl > end) {
224 pr_debug("%s: start=%p + nl=%u > end=%p",
225 __func__, start, nl, end);
226 return NULL;
227 }
228
229 dl_ = (__be16 *)(ev->remainder + nl - 2);
230 if (!IS_ALIGNED((uintptr_t)dl_, 2))
231 pr_warn("desc len not aligned %p", dl_);
232 dl = be16_to_cpu(*dl_);
233 if (dl < 2) {
234 pr_debug("%s: desc len too short: %d", __func__, dl);
235 return NULL;
236 }
237
238 if (start + nl + dl > end) {
239 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
240 __func__, start, nl, dl, start + nl + dl, end);
241 return NULL;
242 }
243
244 ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
245 if (!IS_ALIGNED((uintptr_t)ldl_, 2))
246 pr_warn("long desc len not aligned %p", ldl_);
247 ldl = be16_to_cpu(*ldl_);
248 if (ldl < 2) {
249 pr_debug("%s: long desc len too short (ldl=%u)",
250 __func__, ldl);
251 return NULL;
252 }
253
254 if (start + nl + dl + ldl > end) {
255 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
256 __func__, start, nl, dl, ldl, end);
257 return NULL;
258 }
259
260 return start + nl + dl + ldl;
261}
262
263static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
264 unsigned long version,
265 unsigned long index)
266{
267 pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
268 phys_4096, version, index);
269
270 WARN_ON(!IS_ALIGNED(phys_4096, 4096));
271
272 return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
273 phys_4096, version, index);
274}
275
276static unsigned long h_get_24x7_catalog_page(char page[],
277 u64 version, u32 index)
278{
279 return h_get_24x7_catalog_page_(virt_to_phys(page),
280 version, index);
281}
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
320{
321 const char *sindex;
322 const char *lpar;
323 const char *domain_str;
324 char buf[8];
325
326 switch (domain) {
327 case HV_PERF_DOMAIN_PHYS_CHIP:
328 snprintf(buf, sizeof(buf), "%d", domain);
329 domain_str = buf;
330 lpar = "0x0";
331 sindex = "chip";
332 break;
333 case HV_PERF_DOMAIN_PHYS_CORE:
334 domain_str = "?";
335 lpar = "0x0";
336 sindex = "core";
337 break;
338 default:
339 domain_str = "?";
340 lpar = "?";
341 sindex = "vcpu";
342 }
343
344 return kasprintf(GFP_KERNEL,
345 "domain=%s,offset=0x%x,%s=?,lpar=%s",
346 domain_str,
347 be16_to_cpu(event->event_counter_offs) +
348 be16_to_cpu(event->event_group_record_offs),
349 sindex,
350 lpar);
351}
352
353
354static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
355{
356 return kasprintf(gfp, "%.*s", max_len, maybe_str);
357}
358
359static ssize_t device_show_string(struct device *dev,
360 struct device_attribute *attr, char *buf)
361{
362 struct dev_ext_attribute *d;
363
364 d = container_of(attr, struct dev_ext_attribute, attr);
365
366 return sprintf(buf, "%s\n", (char *)d->var);
367}
368
369static struct attribute *device_str_attr_create_(char *name, char *str)
370{
371 struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
372
373 if (!attr)
374 return NULL;
375
376 sysfs_attr_init(&attr->attr.attr);
377
378 attr->var = str;
379 attr->attr.attr.name = name;
380 attr->attr.attr.mode = 0444;
381 attr->attr.show = device_show_string;
382
383 return &attr->attr.attr;
384}
385
386
387
388
389
390
391
392
393
394
395static struct attribute *device_str_attr_create(char *name, int name_max,
396 int name_nonce,
397 char *str, size_t str_max)
398{
399 char *n;
400 char *s = memdup_to_str(str, str_max, GFP_KERNEL);
401 struct attribute *a;
402
403 if (!s)
404 return NULL;
405
406 if (!name_nonce)
407 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
408 else
409 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
410 name_nonce);
411 if (!n)
412 goto out_s;
413
414 a = device_str_attr_create_(n, s);
415 if (!a)
416 goto out_n;
417
418 return a;
419out_n:
420 kfree(n);
421out_s:
422 kfree(s);
423 return NULL;
424}
425
426static struct attribute *event_to_attr(unsigned ix,
427 struct hv_24x7_event_data *event,
428 unsigned domain,
429 int nonce)
430{
431 int event_name_len;
432 char *ev_name, *a_ev_name, *val;
433 struct attribute *attr;
434
435 if (!domain_is_valid(domain)) {
436 pr_warn("catalog event %u has invalid domain %u\n",
437 ix, domain);
438 return NULL;
439 }
440
441 val = event_fmt(event, domain);
442 if (!val)
443 return NULL;
444
445 ev_name = event_name(event, &event_name_len);
446 if (!nonce)
447 a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
448 (int)event_name_len, ev_name);
449 else
450 a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
451 (int)event_name_len, ev_name, nonce);
452
453 if (!a_ev_name)
454 goto out_val;
455
456 attr = device_str_attr_create_(a_ev_name, val);
457 if (!attr)
458 goto out_name;
459
460 return attr;
461out_name:
462 kfree(a_ev_name);
463out_val:
464 kfree(val);
465 return NULL;
466}
467
468static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
469 int nonce)
470{
471 int nl, dl;
472 char *name = event_name(event, &nl);
473 char *desc = event_desc(event, &dl);
474
475
476 if (!dl)
477 return NULL;
478
479 return device_str_attr_create(name, nl, nonce, desc, dl);
480}
481
482static struct attribute *
483event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
484{
485 int nl, dl;
486 char *name = event_name(event, &nl);
487 char *desc = event_long_desc(event, &dl);
488
489
490 if (!dl)
491 return NULL;
492
493 return device_str_attr_create(name, nl, nonce, desc, dl);
494}
495
496static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
497 struct hv_24x7_event_data *event, int nonce)
498{
499 *attrs = event_to_attr(ix, event, event->domain, nonce);
500 if (!*attrs)
501 return -1;
502
503 return 0;
504}
505
506
507struct event_uniq {
508 struct rb_node node;
509 const char *name;
510 int nl;
511 unsigned ct;
512 unsigned domain;
513};
514
515static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
516{
517 if (s1 < s2)
518 return 1;
519 if (s2 > s1)
520 return -1;
521
522 return memcmp(d1, d2, s1);
523}
524
525static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
526 size_t s2, unsigned d2)
527{
528 int r = memord(v1, s1, v2, s2);
529
530 if (r)
531 return r;
532 if (d1 > d2)
533 return 1;
534 if (d2 > d1)
535 return -1;
536 return 0;
537}
538
539static int event_uniq_add(struct rb_root *root, const char *name, int nl,
540 unsigned domain)
541{
542 struct rb_node **new = &(root->rb_node), *parent = NULL;
543 struct event_uniq *data;
544
545
546 while (*new) {
547 struct event_uniq *it;
548 int result;
549
550 it = container_of(*new, struct event_uniq, node);
551 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
552 it->domain);
553
554 parent = *new;
555 if (result < 0)
556 new = &((*new)->rb_left);
557 else if (result > 0)
558 new = &((*new)->rb_right);
559 else {
560 it->ct++;
561 pr_info("found a duplicate event %.*s, ct=%u\n", nl,
562 name, it->ct);
563 return it->ct;
564 }
565 }
566
567 data = kmalloc(sizeof(*data), GFP_KERNEL);
568 if (!data)
569 return -ENOMEM;
570
571 *data = (struct event_uniq) {
572 .name = name,
573 .nl = nl,
574 .ct = 0,
575 .domain = domain,
576 };
577
578
579 rb_link_node(&data->node, parent, new);
580 rb_insert_color(&data->node, root);
581
582
583 return 0;
584}
585
586static void event_uniq_destroy(struct rb_root *root)
587{
588
589
590
591
592 struct event_uniq *pos, *n;
593
594 rbtree_postorder_for_each_entry_safe(pos, n, root, node)
595 kfree(pos);
596}
597
598
599
600
601
602
603
604
605
606static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
607 size_t event_idx,
608 size_t event_data_bytes,
609 size_t event_entry_count,
610 size_t offset, void *end)
611{
612 ssize_t ev_len;
613 void *ev_end, *calc_ev_end;
614
615 if (offset >= event_data_bytes)
616 return -1;
617
618 if (event_idx >= event_entry_count) {
619 pr_devel("catalog event data has %zu bytes of padding after last event\n",
620 event_data_bytes - offset);
621 return -1;
622 }
623
624 if (!event_fixed_portion_is_within(event, end)) {
625 pr_warn("event %zu fixed portion is not within range\n",
626 event_idx);
627 return -1;
628 }
629
630 ev_len = be16_to_cpu(event->length);
631
632 if (ev_len % 16)
633 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
634 event_idx, ev_len, event);
635
636 ev_end = (__u8 *)event + ev_len;
637 if (ev_end > end) {
638 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
639 event_idx, ev_len, ev_end, end,
640 offset);
641 return -1;
642 }
643
644 calc_ev_end = event_end(event, end);
645 if (!calc_ev_end) {
646 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
647 event_idx, event_data_bytes, event, end,
648 offset);
649 return -1;
650 }
651
652 if (calc_ev_end > ev_end) {
653 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
654 event_idx, event, ev_end, offset, calc_ev_end);
655 return -1;
656 }
657
658 return ev_len;
659}
660
661#define MAX_4K (SIZE_MAX / 4096)
662
663static int create_events_from_catalog(struct attribute ***events_,
664 struct attribute ***event_descs_,
665 struct attribute ***event_long_descs_)
666{
667 unsigned long hret;
668 size_t catalog_len, catalog_page_len, event_entry_count,
669 event_data_len, event_data_offs,
670 event_data_bytes, junk_events, event_idx, event_attr_ct, i,
671 attr_max, event_idx_last, desc_ct, long_desc_ct;
672 ssize_t ct, ev_len;
673 uint32_t catalog_version_num;
674 struct attribute **events, **event_descs, **event_long_descs;
675 struct hv_24x7_catalog_page_0 *page_0 =
676 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
677 void *page = page_0;
678 void *event_data, *end;
679 struct hv_24x7_event_data *event;
680 struct rb_root ev_uniq = RB_ROOT;
681 int ret = 0;
682
683 if (!page) {
684 ret = -ENOMEM;
685 goto e_out;
686 }
687
688 hret = h_get_24x7_catalog_page(page, 0, 0);
689 if (hret) {
690 ret = -EIO;
691 goto e_free;
692 }
693
694 catalog_version_num = be64_to_cpu(page_0->version);
695 catalog_page_len = be32_to_cpu(page_0->length);
696
697 if (MAX_4K < catalog_page_len) {
698 pr_err("invalid page count: %zu\n", catalog_page_len);
699 ret = -EIO;
700 goto e_free;
701 }
702
703 catalog_len = catalog_page_len * 4096;
704
705 event_entry_count = be16_to_cpu(page_0->event_entry_count);
706 event_data_offs = be16_to_cpu(page_0->event_data_offs);
707 event_data_len = be16_to_cpu(page_0->event_data_len);
708
709 pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
710 (size_t)catalog_version_num, catalog_len,
711 event_entry_count, event_data_offs, event_data_len);
712
713 if ((MAX_4K < event_data_len)
714 || (MAX_4K < event_data_offs)
715 || (MAX_4K - event_data_offs < event_data_len)) {
716 pr_err("invalid event data offs %zu and/or len %zu\n",
717 event_data_offs, event_data_len);
718 ret = -EIO;
719 goto e_free;
720 }
721
722 if ((event_data_offs + event_data_len) > catalog_page_len) {
723 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
724 event_data_offs,
725 event_data_offs + event_data_len,
726 catalog_page_len);
727 ret = -EIO;
728 goto e_free;
729 }
730
731 if (SIZE_MAX - 1 < event_entry_count) {
732 pr_err("event_entry_count %zu is invalid\n", event_entry_count);
733 ret = -EIO;
734 goto e_free;
735 }
736
737 event_data_bytes = event_data_len * 4096;
738
739
740
741
742
743 event_data = vmalloc(event_data_bytes);
744 if (!event_data) {
745 pr_err("could not allocate event data\n");
746 ret = -ENOMEM;
747 goto e_free;
748 }
749
750 end = event_data + event_data_bytes;
751
752
753
754
755
756 BUILD_BUG_ON(PAGE_SIZE % 4096);
757
758 for (i = 0; i < event_data_len; i++) {
759 hret = h_get_24x7_catalog_page_(
760 vmalloc_to_phys(event_data + i * 4096),
761 catalog_version_num,
762 i + event_data_offs);
763 if (hret) {
764 pr_err("failed to get event data in page %zu\n",
765 i + event_data_offs);
766 ret = -EIO;
767 goto e_event_data;
768 }
769 }
770
771
772
773
774
775 for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
776 ;
777 event_idx++, event = (void *)event + ev_len) {
778 size_t offset = (void *)event - (void *)event_data;
779 char *name;
780 int nl;
781
782 ev_len = catalog_event_len_validate(event, event_idx,
783 event_data_bytes,
784 event_entry_count,
785 offset, end);
786 if (ev_len < 0)
787 break;
788
789 name = event_name(event, &nl);
790
791 if (event->event_group_record_len == 0) {
792 pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
793 event_idx, nl, name);
794 junk_events++;
795 continue;
796 }
797
798 if (!catalog_entry_domain_is_valid(event->domain)) {
799 pr_info("event %zu (%.*s) has invalid domain %d\n",
800 event_idx, nl, name, event->domain);
801 junk_events++;
802 continue;
803 }
804
805 attr_max++;
806 }
807
808 event_idx_last = event_idx;
809 if (event_idx_last != event_entry_count)
810 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
811 event_idx_last, event_entry_count, junk_events);
812
813 events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
814 if (!events) {
815 ret = -ENOMEM;
816 goto e_event_data;
817 }
818
819 event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
820 GFP_KERNEL);
821 if (!event_descs) {
822 ret = -ENOMEM;
823 goto e_event_attrs;
824 }
825
826 event_long_descs = kmalloc_array(event_idx + 1,
827 sizeof(*event_long_descs), GFP_KERNEL);
828 if (!event_long_descs) {
829 ret = -ENOMEM;
830 goto e_event_descs;
831 }
832
833
834 for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
835 event = event_data, event_idx = 0;
836 event_idx < event_idx_last;
837 event_idx++, ev_len = be16_to_cpu(event->length),
838 event = (void *)event + ev_len) {
839 char *name;
840 int nl;
841 int nonce;
842
843
844
845
846 if (event->event_group_record_len == 0)
847 continue;
848 if (!catalog_entry_domain_is_valid(event->domain))
849 continue;
850
851 name = event_name(event, &nl);
852 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
853 ct = event_data_to_attrs(event_idx, events + event_attr_ct,
854 event, nonce);
855 if (ct < 0) {
856 pr_warn("event %zu (%.*s) creation failure, skipping\n",
857 event_idx, nl, name);
858 junk_events++;
859 } else {
860 event_attr_ct++;
861 event_descs[desc_ct] = event_to_desc_attr(event, nonce);
862 if (event_descs[desc_ct])
863 desc_ct++;
864 event_long_descs[long_desc_ct] =
865 event_to_long_desc_attr(event, nonce);
866 if (event_long_descs[long_desc_ct])
867 long_desc_ct++;
868 }
869 }
870
871 pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
872 event_idx, event_attr_ct, junk_events, desc_ct);
873
874 events[event_attr_ct] = NULL;
875 event_descs[desc_ct] = NULL;
876 event_long_descs[long_desc_ct] = NULL;
877
878 event_uniq_destroy(&ev_uniq);
879 vfree(event_data);
880 kmem_cache_free(hv_page_cache, page);
881
882 *events_ = events;
883 *event_descs_ = event_descs;
884 *event_long_descs_ = event_long_descs;
885 return 0;
886
887e_event_descs:
888 kfree(event_descs);
889e_event_attrs:
890 kfree(events);
891e_event_data:
892 vfree(event_data);
893e_free:
894 kmem_cache_free(hv_page_cache, page);
895e_out:
896 *events_ = NULL;
897 *event_descs_ = NULL;
898 *event_long_descs_ = NULL;
899 return ret;
900}
901
902static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
903 struct bin_attribute *bin_attr, char *buf,
904 loff_t offset, size_t count)
905{
906 unsigned long hret;
907 ssize_t ret = 0;
908 size_t catalog_len = 0, catalog_page_len = 0;
909 loff_t page_offset = 0;
910 loff_t offset_in_page;
911 size_t copy_len;
912 uint64_t catalog_version_num = 0;
913 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
914 struct hv_24x7_catalog_page_0 *page_0 = page;
915
916 if (!page)
917 return -ENOMEM;
918
919 hret = h_get_24x7_catalog_page(page, 0, 0);
920 if (hret) {
921 ret = -EIO;
922 goto e_free;
923 }
924
925 catalog_version_num = be64_to_cpu(page_0->version);
926 catalog_page_len = be32_to_cpu(page_0->length);
927 catalog_len = catalog_page_len * 4096;
928
929 page_offset = offset / 4096;
930 offset_in_page = offset % 4096;
931
932 if (page_offset >= catalog_page_len)
933 goto e_free;
934
935 if (page_offset != 0) {
936 hret = h_get_24x7_catalog_page(page, catalog_version_num,
937 page_offset);
938 if (hret) {
939 ret = -EIO;
940 goto e_free;
941 }
942 }
943
944 copy_len = 4096 - offset_in_page;
945 if (copy_len > count)
946 copy_len = count;
947
948 memcpy(buf, page+offset_in_page, copy_len);
949 ret = copy_len;
950
951e_free:
952 if (hret)
953 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
954 " rc=%ld\n",
955 catalog_version_num, page_offset, hret);
956 kmem_cache_free(hv_page_cache, page);
957
958 pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
959 "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
960 count, catalog_len, catalog_page_len, ret);
961
962 return ret;
963}
964
965static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
966 char *page)
967{
968 int d, n, count = 0;
969 const char *str;
970
971 for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
972 str = domain_name(d);
973 if (!str)
974 continue;
975
976 n = sprintf(page, "%d: %s\n", d, str);
977 if (n < 0)
978 break;
979
980 count += n;
981 page += n;
982 }
983 return count;
984}
985
986#define PAGE_0_ATTR(_name, _fmt, _expr) \
987static ssize_t _name##_show(struct device *dev, \
988 struct device_attribute *dev_attr, \
989 char *buf) \
990{ \
991 unsigned long hret; \
992 ssize_t ret = 0; \
993 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
994 struct hv_24x7_catalog_page_0 *page_0 = page; \
995 if (!page) \
996 return -ENOMEM; \
997 hret = h_get_24x7_catalog_page(page, 0, 0); \
998 if (hret) { \
999 ret = -EIO; \
1000 goto e_free; \
1001 } \
1002 ret = sprintf(buf, _fmt, _expr); \
1003e_free: \
1004 kmem_cache_free(hv_page_cache, page); \
1005 return ret; \
1006} \
1007static DEVICE_ATTR_RO(_name)
1008
1009PAGE_0_ATTR(catalog_version, "%lld\n",
1010 (unsigned long long)be64_to_cpu(page_0->version));
1011PAGE_0_ATTR(catalog_len, "%lld\n",
1012 (unsigned long long)be32_to_cpu(page_0->length) * 4096);
1013static BIN_ATTR_RO(catalog, 0);
1014static DEVICE_ATTR_RO(domains);
1015
1016static struct bin_attribute *if_bin_attrs[] = {
1017 &bin_attr_catalog,
1018 NULL,
1019};
1020
1021static struct attribute *if_attrs[] = {
1022 &dev_attr_catalog_len.attr,
1023 &dev_attr_catalog_version.attr,
1024 &dev_attr_domains.attr,
1025 NULL,
1026};
1027
1028static struct attribute_group if_group = {
1029 .name = "interface",
1030 .bin_attrs = if_bin_attrs,
1031 .attrs = if_attrs,
1032};
1033
1034static const struct attribute_group *attr_groups[] = {
1035 &format_group,
1036 &event_group,
1037 &event_desc_group,
1038 &event_long_desc_group,
1039 &if_group,
1040 NULL,
1041};
1042
1043static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
1044 struct hv_24x7_data_result_buffer *result_buffer,
1045 unsigned long ret)
1046{
1047 struct hv_24x7_request *req;
1048
1049 req = &request_buffer->requests[0];
1050 pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
1051 "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
1052 req->performance_domain, req->data_offset,
1053 req->starting_ix, req->starting_lpar_ix, ret, ret,
1054 result_buffer->detailed_rc,
1055 result_buffer->failing_request_ix);
1056}
1057
1058
1059
1060
1061static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1062 struct hv_24x7_data_result_buffer *result_buffer)
1063{
1064
1065 memset(request_buffer, 0, 4096);
1066 memset(result_buffer, 0, 4096);
1067
1068 request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
1069
1070}
1071
1072
1073
1074
1075
1076static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1077 struct hv_24x7_data_result_buffer *result_buffer)
1078{
1079 unsigned long ret;
1080
1081
1082
1083
1084
1085
1086 ret = plpar_hcall_norets(H_GET_24X7_DATA,
1087 virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
1088 virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
1089
1090 if (ret)
1091 log_24x7_hcall(request_buffer, result_buffer, ret);
1092
1093 return ret;
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103static int add_event_to_24x7_request(struct perf_event *event,
1104 struct hv_24x7_request_buffer *request_buffer)
1105{
1106 u16 idx;
1107 int i;
1108 struct hv_24x7_request *req;
1109
1110 if (request_buffer->num_requests > 254) {
1111 pr_devel("Too many requests for 24x7 HCALL %d\n",
1112 request_buffer->num_requests);
1113 return -EINVAL;
1114 }
1115
1116 switch (event_get_domain(event)) {
1117 case HV_PERF_DOMAIN_PHYS_CHIP:
1118 idx = event_get_chip(event);
1119 break;
1120 case HV_PERF_DOMAIN_PHYS_CORE:
1121 idx = event_get_core(event);
1122 break;
1123 default:
1124 idx = event_get_vcpu(event);
1125 }
1126
1127 i = request_buffer->num_requests++;
1128 req = &request_buffer->requests[i];
1129
1130 req->performance_domain = event_get_domain(event);
1131 req->data_size = cpu_to_be16(8);
1132 req->data_offset = cpu_to_be32(event_get_offset(event));
1133 req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
1134 req->max_num_lpars = cpu_to_be16(1);
1135 req->starting_ix = cpu_to_be16(idx);
1136 req->max_ix = cpu_to_be16(1);
1137
1138 return 0;
1139}
1140
1141static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
1142{
1143 unsigned long ret;
1144 struct hv_24x7_request_buffer *request_buffer;
1145 struct hv_24x7_data_result_buffer *result_buffer;
1146
1147 BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
1148 BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
1149
1150 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1151 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1152
1153 init_24x7_request(request_buffer, result_buffer);
1154
1155 ret = add_event_to_24x7_request(event, request_buffer);
1156 if (ret)
1157 goto out;
1158
1159 ret = make_24x7_request(request_buffer, result_buffer);
1160 if (ret) {
1161 log_24x7_hcall(request_buffer, result_buffer, ret);
1162 goto out;
1163 }
1164
1165
1166 *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
1167
1168out:
1169 put_cpu_var(hv_24x7_reqb);
1170 put_cpu_var(hv_24x7_resb);
1171 return ret;
1172}
1173
1174
1175static int h_24x7_event_init(struct perf_event *event)
1176{
1177 struct hv_perf_caps caps;
1178 unsigned domain;
1179 unsigned long hret;
1180 u64 ct;
1181
1182
1183 if (event->attr.type != event->pmu->type)
1184 return -ENOENT;
1185
1186
1187 if (event_get_reserved1(event) ||
1188 event_get_reserved2(event) ||
1189 event_get_reserved3(event)) {
1190 pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
1191 event->attr.config,
1192 event_get_reserved1(event),
1193 event->attr.config1,
1194 event_get_reserved2(event),
1195 event->attr.config2,
1196 event_get_reserved3(event));
1197 return -EINVAL;
1198 }
1199
1200
1201 if (event->attr.exclude_user ||
1202 event->attr.exclude_kernel ||
1203 event->attr.exclude_hv ||
1204 event->attr.exclude_idle ||
1205 event->attr.exclude_host ||
1206 event->attr.exclude_guest)
1207 return -EINVAL;
1208
1209
1210 if (has_branch_stack(event))
1211 return -EOPNOTSUPP;
1212
1213
1214 if (event_get_offset(event) % 8) {
1215 pr_devel("bad alignment\n");
1216 return -EINVAL;
1217 }
1218
1219
1220 domain = event_get_domain(event);
1221 if (domain > 6) {
1222 pr_devel("invalid domain %d\n", domain);
1223 return -EINVAL;
1224 }
1225
1226 hret = hv_perf_caps_get(&caps);
1227 if (hret) {
1228 pr_devel("could not get capabilities: rc=%ld\n", hret);
1229 return -EIO;
1230 }
1231
1232
1233 if (!caps.collect_privileged && (is_physical_domain(domain) ||
1234 (event_get_lpar(event) != event_get_lpar_max()))) {
1235 pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n",
1236 is_physical_domain(domain),
1237 event_get_lpar(event));
1238 return -EACCES;
1239 }
1240
1241
1242 if (single_24x7_request(event, &ct)) {
1243 pr_devel("test hcall failed\n");
1244 return -EIO;
1245 }
1246 (void)local64_xchg(&event->hw.prev_count, ct);
1247
1248 return 0;
1249}
1250
1251static u64 h_24x7_get_value(struct perf_event *event)
1252{
1253 unsigned long ret;
1254 u64 ct;
1255 ret = single_24x7_request(event, &ct);
1256 if (ret)
1257
1258 return 0;
1259
1260 return ct;
1261}
1262
1263static void update_event_count(struct perf_event *event, u64 now)
1264{
1265 s64 prev;
1266
1267 prev = local64_xchg(&event->hw.prev_count, now);
1268 local64_add(now - prev, &event->count);
1269}
1270
1271static void h_24x7_event_read(struct perf_event *event)
1272{
1273 u64 now;
1274 struct hv_24x7_request_buffer *request_buffer;
1275 struct hv_24x7_hw *h24x7hw;
1276 int txn_flags;
1277
1278 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1279
1280
1281
1282
1283
1284
1285
1286
1287 if (txn_flags & PERF_PMU_TXN_READ) {
1288 int i;
1289 int ret;
1290
1291 if (__this_cpu_read(hv_24x7_txn_err))
1292 return;
1293
1294 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1295
1296 ret = add_event_to_24x7_request(event, request_buffer);
1297 if (ret) {
1298 __this_cpu_write(hv_24x7_txn_err, ret);
1299 } else {
1300
1301
1302
1303
1304 i = request_buffer->num_requests - 1;
1305
1306 h24x7hw = &get_cpu_var(hv_24x7_hw);
1307 h24x7hw->events[i] = event;
1308 put_cpu_var(h24x7hw);
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318 local64_set(&event->count, 0);
1319 }
1320
1321 put_cpu_var(hv_24x7_reqb);
1322 } else {
1323 now = h_24x7_get_value(event);
1324 update_event_count(event, now);
1325 }
1326}
1327
1328static void h_24x7_event_start(struct perf_event *event, int flags)
1329{
1330 if (flags & PERF_EF_RELOAD)
1331 local64_set(&event->hw.prev_count, h_24x7_get_value(event));
1332}
1333
1334static void h_24x7_event_stop(struct perf_event *event, int flags)
1335{
1336 h_24x7_event_read(event);
1337}
1338
1339static int h_24x7_event_add(struct perf_event *event, int flags)
1340{
1341 if (flags & PERF_EF_START)
1342 h_24x7_event_start(event, flags);
1343
1344 return 0;
1345}
1346
1347
1348
1349
1350
1351
1352
1353static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
1354{
1355 struct hv_24x7_request_buffer *request_buffer;
1356 struct hv_24x7_data_result_buffer *result_buffer;
1357
1358
1359 WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
1360
1361 __this_cpu_write(hv_24x7_txn_flags, flags);
1362 if (flags & ~PERF_PMU_TXN_READ)
1363 return;
1364
1365 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1366 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1367
1368 init_24x7_request(request_buffer, result_buffer);
1369
1370 put_cpu_var(hv_24x7_resb);
1371 put_cpu_var(hv_24x7_reqb);
1372}
1373
1374
1375
1376
1377
1378
1379
1380static void reset_txn(void)
1381{
1382 __this_cpu_write(hv_24x7_txn_flags, 0);
1383 __this_cpu_write(hv_24x7_txn_err, 0);
1384}
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395static int h_24x7_event_commit_txn(struct pmu *pmu)
1396{
1397 struct hv_24x7_request_buffer *request_buffer;
1398 struct hv_24x7_data_result_buffer *result_buffer;
1399 struct hv_24x7_result *resb;
1400 struct perf_event *event;
1401 u64 count;
1402 int i, ret, txn_flags;
1403 struct hv_24x7_hw *h24x7hw;
1404
1405 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1406 WARN_ON_ONCE(!txn_flags);
1407
1408 ret = 0;
1409 if (txn_flags & ~PERF_PMU_TXN_READ)
1410 goto out;
1411
1412 ret = __this_cpu_read(hv_24x7_txn_err);
1413 if (ret)
1414 goto out;
1415
1416 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1417 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1418
1419 ret = make_24x7_request(request_buffer, result_buffer);
1420 if (ret) {
1421 log_24x7_hcall(request_buffer, result_buffer, ret);
1422 goto put_reqb;
1423 }
1424
1425 h24x7hw = &get_cpu_var(hv_24x7_hw);
1426
1427
1428 for (i = 0; i < request_buffer->num_requests; i++) {
1429 resb = &result_buffer->results[i];
1430 count = be64_to_cpu(resb->elements[0].element_data[0]);
1431 event = h24x7hw->events[i];
1432 h24x7hw->events[i] = NULL;
1433 update_event_count(event, count);
1434 }
1435
1436 put_cpu_var(hv_24x7_hw);
1437
1438put_reqb:
1439 put_cpu_var(hv_24x7_resb);
1440 put_cpu_var(hv_24x7_reqb);
1441out:
1442 reset_txn();
1443 return ret;
1444}
1445
1446
1447
1448
1449
1450
1451
1452static void h_24x7_event_cancel_txn(struct pmu *pmu)
1453{
1454 WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
1455 reset_txn();
1456}
1457
1458static struct pmu h_24x7_pmu = {
1459 .task_ctx_nr = perf_invalid_context,
1460
1461 .name = "hv_24x7",
1462 .attr_groups = attr_groups,
1463 .event_init = h_24x7_event_init,
1464 .add = h_24x7_event_add,
1465 .del = h_24x7_event_stop,
1466 .start = h_24x7_event_start,
1467 .stop = h_24x7_event_stop,
1468 .read = h_24x7_event_read,
1469 .start_txn = h_24x7_event_start_txn,
1470 .commit_txn = h_24x7_event_commit_txn,
1471 .cancel_txn = h_24x7_event_cancel_txn,
1472};
1473
1474static int hv_24x7_init(void)
1475{
1476 int r;
1477 unsigned long hret;
1478 struct hv_perf_caps caps;
1479
1480 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1481 pr_debug("not a virtualized system, not enabling\n");
1482 return -ENODEV;
1483 }
1484
1485 hret = hv_perf_caps_get(&caps);
1486 if (hret) {
1487 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1488 hret);
1489 return -ENODEV;
1490 }
1491
1492 hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
1493 if (!hv_page_cache)
1494 return -ENOMEM;
1495
1496
1497 h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1498
1499 r = create_events_from_catalog(&event_group.attrs,
1500 &event_desc_group.attrs,
1501 &event_long_desc_group.attrs);
1502
1503 if (r)
1504 return r;
1505
1506 r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
1507 if (r)
1508 return r;
1509
1510 return 0;
1511}
1512
1513device_initcall(hv_24x7_init);
1514