1
2
3
4
5
6
7
8
9
10
11
12
13#define pr_fmt(fmt) "hv-24x7: " fmt
14
15#include <linux/perf_event.h>
16#include <linux/rbtree.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#include <asm/cputhreads.h>
22#include <asm/firmware.h>
23#include <asm/hvcall.h>
24#include <asm/io.h>
25#include <linux/byteorder/generic.h>
26
27#include "hv-24x7.h"
28#include "hv-24x7-catalog.h"
29#include "hv-common.h"
30
31
32static int interface_version;
33
34
35static bool aggregate_result_elements;
36
37static bool domain_is_valid(unsigned domain)
38{
39 switch (domain) {
40#define DOMAIN(n, v, x, c) \
41 case HV_PERF_DOMAIN_##n: \
42
43#include "hv-24x7-domains.h"
44#undef DOMAIN
45 return true;
46 default:
47 return false;
48 }
49}
50
51static bool is_physical_domain(unsigned domain)
52{
53 switch (domain) {
54#define DOMAIN(n, v, x, c) \
55 case HV_PERF_DOMAIN_##n: \
56 return c;
57#include "hv-24x7-domains.h"
58#undef DOMAIN
59 default:
60 return false;
61 }
62}
63
64
65static bool domain_needs_aggregation(unsigned int domain)
66{
67 return aggregate_result_elements &&
68 (domain == HV_PERF_DOMAIN_PHYS_CORE ||
69 (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
70 domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
71}
72
73static const char *domain_name(unsigned domain)
74{
75 if (!domain_is_valid(domain))
76 return NULL;
77
78 switch (domain) {
79 case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip";
80 case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core";
81 case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core";
82 case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip";
83 case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node";
84 case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node";
85 }
86
87 WARN_ON_ONCE(domain);
88 return NULL;
89}
90
91static bool catalog_entry_domain_is_valid(unsigned domain)
92{
93
94 if (interface_version == 1)
95 return is_physical_domain(domain);
96 else
97 return domain_is_valid(domain);
98}
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
126
127EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
128EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
129EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
130
131EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
132
133EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
134
135EVENT_DEFINE_RANGE(reserved1, config, 4, 15);
136EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
137EVENT_DEFINE_RANGE(reserved3, config2, 0, 63);
138
139static struct attribute *format_attrs[] = {
140 &format_attr_domain.attr,
141 &format_attr_offset.attr,
142 &format_attr_core.attr,
143 &format_attr_chip.attr,
144 &format_attr_vcpu.attr,
145 &format_attr_lpar.attr,
146 NULL,
147};
148
149static struct attribute_group format_group = {
150 .name = "format",
151 .attrs = format_attrs,
152};
153
154static struct attribute_group event_group = {
155 .name = "events",
156
157};
158
159static struct attribute_group event_desc_group = {
160 .name = "event_descs",
161
162};
163
164static struct attribute_group event_long_desc_group = {
165 .name = "event_long_descs",
166
167};
168
169static struct kmem_cache *hv_page_cache;
170
171DEFINE_PER_CPU(int, hv_24x7_txn_flags);
172DEFINE_PER_CPU(int, hv_24x7_txn_err);
173
174struct hv_24x7_hw {
175 struct perf_event *events[255];
176};
177
178DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
179
180
181
182
183
184
185#define H24x7_DATA_BUFFER_SIZE 4096
186DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
187DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
188
189static unsigned int max_num_requests(int interface_version)
190{
191 return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
192 / H24x7_REQUEST_SIZE(interface_version);
193}
194
195static char *event_name(struct hv_24x7_event_data *ev, int *len)
196{
197 *len = be16_to_cpu(ev->event_name_len) - 2;
198 return (char *)ev->remainder;
199}
200
201static char *event_desc(struct hv_24x7_event_data *ev, int *len)
202{
203 unsigned nl = be16_to_cpu(ev->event_name_len);
204 __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
205
206 *len = be16_to_cpu(*desc_len) - 2;
207 return (char *)ev->remainder + nl;
208}
209
210static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
211{
212 unsigned nl = be16_to_cpu(ev->event_name_len);
213 __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
214 unsigned desc_len = be16_to_cpu(*desc_len_);
215 __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
216
217 *len = be16_to_cpu(*long_desc_len) - 2;
218 return (char *)ev->remainder + nl + desc_len;
219}
220
221static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
222 void *end)
223{
224 void *start = ev;
225
226 return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
227}
228
229
230
231
232
233
234
235
236
237static void *event_end(struct hv_24x7_event_data *ev, void *end)
238{
239 void *start = ev;
240 __be16 *dl_, *ldl_;
241 unsigned dl, ldl;
242 unsigned nl = be16_to_cpu(ev->event_name_len);
243
244 if (nl < 2) {
245 pr_debug("%s: name length too short: %d", __func__, nl);
246 return NULL;
247 }
248
249 if (start + nl > end) {
250 pr_debug("%s: start=%p + nl=%u > end=%p",
251 __func__, start, nl, end);
252 return NULL;
253 }
254
255 dl_ = (__be16 *)(ev->remainder + nl - 2);
256 if (!IS_ALIGNED((uintptr_t)dl_, 2))
257 pr_warn("desc len not aligned %p", dl_);
258 dl = be16_to_cpu(*dl_);
259 if (dl < 2) {
260 pr_debug("%s: desc len too short: %d", __func__, dl);
261 return NULL;
262 }
263
264 if (start + nl + dl > end) {
265 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
266 __func__, start, nl, dl, start + nl + dl, end);
267 return NULL;
268 }
269
270 ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
271 if (!IS_ALIGNED((uintptr_t)ldl_, 2))
272 pr_warn("long desc len not aligned %p", ldl_);
273 ldl = be16_to_cpu(*ldl_);
274 if (ldl < 2) {
275 pr_debug("%s: long desc len too short (ldl=%u)",
276 __func__, ldl);
277 return NULL;
278 }
279
280 if (start + nl + dl + ldl > end) {
281 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
282 __func__, start, nl, dl, ldl, end);
283 return NULL;
284 }
285
286 return start + nl + dl + ldl;
287}
288
289static long h_get_24x7_catalog_page_(unsigned long phys_4096,
290 unsigned long version, unsigned long index)
291{
292 pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
293 phys_4096, version, index);
294
295 WARN_ON(!IS_ALIGNED(phys_4096, 4096));
296
297 return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
298 phys_4096, version, index);
299}
300
301static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
302{
303 return h_get_24x7_catalog_page_(virt_to_phys(page),
304 version, index);
305}
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
344{
345 const char *sindex;
346 const char *lpar;
347 const char *domain_str;
348 char buf[8];
349
350 switch (domain) {
351 case HV_PERF_DOMAIN_PHYS_CHIP:
352 snprintf(buf, sizeof(buf), "%d", domain);
353 domain_str = buf;
354 lpar = "0x0";
355 sindex = "chip";
356 break;
357 case HV_PERF_DOMAIN_PHYS_CORE:
358 domain_str = "?";
359 lpar = "0x0";
360 sindex = "core";
361 break;
362 default:
363 domain_str = "?";
364 lpar = "?";
365 sindex = "vcpu";
366 }
367
368 return kasprintf(GFP_KERNEL,
369 "domain=%s,offset=0x%x,%s=?,lpar=%s",
370 domain_str,
371 be16_to_cpu(event->event_counter_offs) +
372 be16_to_cpu(event->event_group_record_offs),
373 sindex,
374 lpar);
375}
376
377
378static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
379{
380 return kasprintf(gfp, "%.*s", max_len, maybe_str);
381}
382
383static ssize_t device_show_string(struct device *dev,
384 struct device_attribute *attr, char *buf)
385{
386 struct dev_ext_attribute *d;
387
388 d = container_of(attr, struct dev_ext_attribute, attr);
389
390 return sprintf(buf, "%s\n", (char *)d->var);
391}
392
393static struct attribute *device_str_attr_create_(char *name, char *str)
394{
395 struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
396
397 if (!attr)
398 return NULL;
399
400 sysfs_attr_init(&attr->attr.attr);
401
402 attr->var = str;
403 attr->attr.attr.name = name;
404 attr->attr.attr.mode = 0444;
405 attr->attr.show = device_show_string;
406
407 return &attr->attr.attr;
408}
409
410
411
412
413
414
415
416
417
418
419static struct attribute *device_str_attr_create(char *name, int name_max,
420 int name_nonce,
421 char *str, size_t str_max)
422{
423 char *n;
424 char *s = memdup_to_str(str, str_max, GFP_KERNEL);
425 struct attribute *a;
426
427 if (!s)
428 return NULL;
429
430 if (!name_nonce)
431 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
432 else
433 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
434 name_nonce);
435 if (!n)
436 goto out_s;
437
438 a = device_str_attr_create_(n, s);
439 if (!a)
440 goto out_n;
441
442 return a;
443out_n:
444 kfree(n);
445out_s:
446 kfree(s);
447 return NULL;
448}
449
450static struct attribute *event_to_attr(unsigned ix,
451 struct hv_24x7_event_data *event,
452 unsigned domain,
453 int nonce)
454{
455 int event_name_len;
456 char *ev_name, *a_ev_name, *val;
457 struct attribute *attr;
458
459 if (!domain_is_valid(domain)) {
460 pr_warn("catalog event %u has invalid domain %u\n",
461 ix, domain);
462 return NULL;
463 }
464
465 val = event_fmt(event, domain);
466 if (!val)
467 return NULL;
468
469 ev_name = event_name(event, &event_name_len);
470 if (!nonce)
471 a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
472 (int)event_name_len, ev_name);
473 else
474 a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
475 (int)event_name_len, ev_name, nonce);
476
477 if (!a_ev_name)
478 goto out_val;
479
480 attr = device_str_attr_create_(a_ev_name, val);
481 if (!attr)
482 goto out_name;
483
484 return attr;
485out_name:
486 kfree(a_ev_name);
487out_val:
488 kfree(val);
489 return NULL;
490}
491
492static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
493 int nonce)
494{
495 int nl, dl;
496 char *name = event_name(event, &nl);
497 char *desc = event_desc(event, &dl);
498
499
500 if (!dl)
501 return NULL;
502
503 return device_str_attr_create(name, nl, nonce, desc, dl);
504}
505
506static struct attribute *
507event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
508{
509 int nl, dl;
510 char *name = event_name(event, &nl);
511 char *desc = event_long_desc(event, &dl);
512
513
514 if (!dl)
515 return NULL;
516
517 return device_str_attr_create(name, nl, nonce, desc, dl);
518}
519
520static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
521 struct hv_24x7_event_data *event, int nonce)
522{
523 *attrs = event_to_attr(ix, event, event->domain, nonce);
524 if (!*attrs)
525 return -1;
526
527 return 0;
528}
529
530
531struct event_uniq {
532 struct rb_node node;
533 const char *name;
534 int nl;
535 unsigned ct;
536 unsigned domain;
537};
538
539static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
540{
541 if (s1 < s2)
542 return 1;
543 if (s1 > s2)
544 return -1;
545
546 return memcmp(d1, d2, s1);
547}
548
549static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
550 size_t s2, unsigned d2)
551{
552 int r = memord(v1, s1, v2, s2);
553
554 if (r)
555 return r;
556 if (d1 > d2)
557 return 1;
558 if (d2 > d1)
559 return -1;
560 return 0;
561}
562
563static int event_uniq_add(struct rb_root *root, const char *name, int nl,
564 unsigned domain)
565{
566 struct rb_node **new = &(root->rb_node), *parent = NULL;
567 struct event_uniq *data;
568
569
570 while (*new) {
571 struct event_uniq *it;
572 int result;
573
574 it = container_of(*new, struct event_uniq, node);
575 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
576 it->domain);
577
578 parent = *new;
579 if (result < 0)
580 new = &((*new)->rb_left);
581 else if (result > 0)
582 new = &((*new)->rb_right);
583 else {
584 it->ct++;
585 pr_info("found a duplicate event %.*s, ct=%u\n", nl,
586 name, it->ct);
587 return it->ct;
588 }
589 }
590
591 data = kmalloc(sizeof(*data), GFP_KERNEL);
592 if (!data)
593 return -ENOMEM;
594
595 *data = (struct event_uniq) {
596 .name = name,
597 .nl = nl,
598 .ct = 0,
599 .domain = domain,
600 };
601
602
603 rb_link_node(&data->node, parent, new);
604 rb_insert_color(&data->node, root);
605
606
607 return 0;
608}
609
610static void event_uniq_destroy(struct rb_root *root)
611{
612
613
614
615
616 struct event_uniq *pos, *n;
617
618 rbtree_postorder_for_each_entry_safe(pos, n, root, node)
619 kfree(pos);
620}
621
622
623
624
625
626
627
628
629
630static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
631 size_t event_idx,
632 size_t event_data_bytes,
633 size_t event_entry_count,
634 size_t offset, void *end)
635{
636 ssize_t ev_len;
637 void *ev_end, *calc_ev_end;
638
639 if (offset >= event_data_bytes)
640 return -1;
641
642 if (event_idx >= event_entry_count) {
643 pr_devel("catalog event data has %zu bytes of padding after last event\n",
644 event_data_bytes - offset);
645 return -1;
646 }
647
648 if (!event_fixed_portion_is_within(event, end)) {
649 pr_warn("event %zu fixed portion is not within range\n",
650 event_idx);
651 return -1;
652 }
653
654 ev_len = be16_to_cpu(event->length);
655
656 if (ev_len % 16)
657 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
658 event_idx, ev_len, event);
659
660 ev_end = (__u8 *)event + ev_len;
661 if (ev_end > end) {
662 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
663 event_idx, ev_len, ev_end, end,
664 offset);
665 return -1;
666 }
667
668 calc_ev_end = event_end(event, end);
669 if (!calc_ev_end) {
670 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
671 event_idx, event_data_bytes, event, end,
672 offset);
673 return -1;
674 }
675
676 if (calc_ev_end > ev_end) {
677 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
678 event_idx, event, ev_end, offset, calc_ev_end);
679 return -1;
680 }
681
682 return ev_len;
683}
684
685#define MAX_4K (SIZE_MAX / 4096)
686
687static int create_events_from_catalog(struct attribute ***events_,
688 struct attribute ***event_descs_,
689 struct attribute ***event_long_descs_)
690{
691 long hret;
692 size_t catalog_len, catalog_page_len, event_entry_count,
693 event_data_len, event_data_offs,
694 event_data_bytes, junk_events, event_idx, event_attr_ct, i,
695 attr_max, event_idx_last, desc_ct, long_desc_ct;
696 ssize_t ct, ev_len;
697 uint64_t catalog_version_num;
698 struct attribute **events, **event_descs, **event_long_descs;
699 struct hv_24x7_catalog_page_0 *page_0 =
700 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
701 void *page = page_0;
702 void *event_data, *end;
703 struct hv_24x7_event_data *event;
704 struct rb_root ev_uniq = RB_ROOT;
705 int ret = 0;
706
707 if (!page) {
708 ret = -ENOMEM;
709 goto e_out;
710 }
711
712 hret = h_get_24x7_catalog_page(page, 0, 0);
713 if (hret) {
714 ret = -EIO;
715 goto e_free;
716 }
717
718 catalog_version_num = be64_to_cpu(page_0->version);
719 catalog_page_len = be32_to_cpu(page_0->length);
720
721 if (MAX_4K < catalog_page_len) {
722 pr_err("invalid page count: %zu\n", catalog_page_len);
723 ret = -EIO;
724 goto e_free;
725 }
726
727 catalog_len = catalog_page_len * 4096;
728
729 event_entry_count = be16_to_cpu(page_0->event_entry_count);
730 event_data_offs = be16_to_cpu(page_0->event_data_offs);
731 event_data_len = be16_to_cpu(page_0->event_data_len);
732
733 pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
734 catalog_version_num, catalog_len,
735 event_entry_count, event_data_offs, event_data_len);
736
737 if ((MAX_4K < event_data_len)
738 || (MAX_4K < event_data_offs)
739 || (MAX_4K - event_data_offs < event_data_len)) {
740 pr_err("invalid event data offs %zu and/or len %zu\n",
741 event_data_offs, event_data_len);
742 ret = -EIO;
743 goto e_free;
744 }
745
746 if ((event_data_offs + event_data_len) > catalog_page_len) {
747 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
748 event_data_offs,
749 event_data_offs + event_data_len,
750 catalog_page_len);
751 ret = -EIO;
752 goto e_free;
753 }
754
755 if (SIZE_MAX - 1 < event_entry_count) {
756 pr_err("event_entry_count %zu is invalid\n", event_entry_count);
757 ret = -EIO;
758 goto e_free;
759 }
760
761 event_data_bytes = event_data_len * 4096;
762
763
764
765
766
767 event_data = vmalloc(event_data_bytes);
768 if (!event_data) {
769 pr_err("could not allocate event data\n");
770 ret = -ENOMEM;
771 goto e_free;
772 }
773
774 end = event_data + event_data_bytes;
775
776
777
778
779
780 BUILD_BUG_ON(PAGE_SIZE % 4096);
781
782 for (i = 0; i < event_data_len; i++) {
783 hret = h_get_24x7_catalog_page_(
784 vmalloc_to_phys(event_data + i * 4096),
785 catalog_version_num,
786 i + event_data_offs);
787 if (hret) {
788 pr_err("Failed to get event data in page %zu: rc=%ld\n",
789 i + event_data_offs, hret);
790 ret = -EIO;
791 goto e_event_data;
792 }
793 }
794
795
796
797
798
799 for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
800 ;
801 event_idx++, event = (void *)event + ev_len) {
802 size_t offset = (void *)event - (void *)event_data;
803 char *name;
804 int nl;
805
806 ev_len = catalog_event_len_validate(event, event_idx,
807 event_data_bytes,
808 event_entry_count,
809 offset, end);
810 if (ev_len < 0)
811 break;
812
813 name = event_name(event, &nl);
814
815 if (event->event_group_record_len == 0) {
816 pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
817 event_idx, nl, name);
818 junk_events++;
819 continue;
820 }
821
822 if (!catalog_entry_domain_is_valid(event->domain)) {
823 pr_info("event %zu (%.*s) has invalid domain %d\n",
824 event_idx, nl, name, event->domain);
825 junk_events++;
826 continue;
827 }
828
829 attr_max++;
830 }
831
832 event_idx_last = event_idx;
833 if (event_idx_last != event_entry_count)
834 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
835 event_idx_last, event_entry_count, junk_events);
836
837 events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
838 if (!events) {
839 ret = -ENOMEM;
840 goto e_event_data;
841 }
842
843 event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
844 GFP_KERNEL);
845 if (!event_descs) {
846 ret = -ENOMEM;
847 goto e_event_attrs;
848 }
849
850 event_long_descs = kmalloc_array(event_idx + 1,
851 sizeof(*event_long_descs), GFP_KERNEL);
852 if (!event_long_descs) {
853 ret = -ENOMEM;
854 goto e_event_descs;
855 }
856
857
858 for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
859 event = event_data, event_idx = 0;
860 event_idx < event_idx_last;
861 event_idx++, ev_len = be16_to_cpu(event->length),
862 event = (void *)event + ev_len) {
863 char *name;
864 int nl;
865 int nonce;
866
867
868
869
870 if (event->event_group_record_len == 0)
871 continue;
872 if (!catalog_entry_domain_is_valid(event->domain))
873 continue;
874
875 name = event_name(event, &nl);
876 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
877 ct = event_data_to_attrs(event_idx, events + event_attr_ct,
878 event, nonce);
879 if (ct < 0) {
880 pr_warn("event %zu (%.*s) creation failure, skipping\n",
881 event_idx, nl, name);
882 junk_events++;
883 } else {
884 event_attr_ct++;
885 event_descs[desc_ct] = event_to_desc_attr(event, nonce);
886 if (event_descs[desc_ct])
887 desc_ct++;
888 event_long_descs[long_desc_ct] =
889 event_to_long_desc_attr(event, nonce);
890 if (event_long_descs[long_desc_ct])
891 long_desc_ct++;
892 }
893 }
894
895 pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
896 event_idx, event_attr_ct, junk_events, desc_ct);
897
898 events[event_attr_ct] = NULL;
899 event_descs[desc_ct] = NULL;
900 event_long_descs[long_desc_ct] = NULL;
901
902 event_uniq_destroy(&ev_uniq);
903 vfree(event_data);
904 kmem_cache_free(hv_page_cache, page);
905
906 *events_ = events;
907 *event_descs_ = event_descs;
908 *event_long_descs_ = event_long_descs;
909 return 0;
910
911e_event_descs:
912 kfree(event_descs);
913e_event_attrs:
914 kfree(events);
915e_event_data:
916 vfree(event_data);
917e_free:
918 kmem_cache_free(hv_page_cache, page);
919e_out:
920 *events_ = NULL;
921 *event_descs_ = NULL;
922 *event_long_descs_ = NULL;
923 return ret;
924}
925
926static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
927 struct bin_attribute *bin_attr, char *buf,
928 loff_t offset, size_t count)
929{
930 long hret;
931 ssize_t ret = 0;
932 size_t catalog_len = 0, catalog_page_len = 0;
933 loff_t page_offset = 0;
934 loff_t offset_in_page;
935 size_t copy_len;
936 uint64_t catalog_version_num = 0;
937 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
938 struct hv_24x7_catalog_page_0 *page_0 = page;
939
940 if (!page)
941 return -ENOMEM;
942
943 hret = h_get_24x7_catalog_page(page, 0, 0);
944 if (hret) {
945 ret = -EIO;
946 goto e_free;
947 }
948
949 catalog_version_num = be64_to_cpu(page_0->version);
950 catalog_page_len = be32_to_cpu(page_0->length);
951 catalog_len = catalog_page_len * 4096;
952
953 page_offset = offset / 4096;
954 offset_in_page = offset % 4096;
955
956 if (page_offset >= catalog_page_len)
957 goto e_free;
958
959 if (page_offset != 0) {
960 hret = h_get_24x7_catalog_page(page, catalog_version_num,
961 page_offset);
962 if (hret) {
963 ret = -EIO;
964 goto e_free;
965 }
966 }
967
968 copy_len = 4096 - offset_in_page;
969 if (copy_len > count)
970 copy_len = count;
971
972 memcpy(buf, page+offset_in_page, copy_len);
973 ret = copy_len;
974
975e_free:
976 if (hret)
977 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
978 " rc=%ld\n",
979 catalog_version_num, page_offset, hret);
980 kmem_cache_free(hv_page_cache, page);
981
982 pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
983 "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
984 count, catalog_len, catalog_page_len, ret);
985
986 return ret;
987}
988
989static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
990 char *page)
991{
992 int d, n, count = 0;
993 const char *str;
994
995 for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
996 str = domain_name(d);
997 if (!str)
998 continue;
999
1000 n = sprintf(page, "%d: %s\n", d, str);
1001 if (n < 0)
1002 break;
1003
1004 count += n;
1005 page += n;
1006 }
1007 return count;
1008}
1009
1010#define PAGE_0_ATTR(_name, _fmt, _expr) \
1011static ssize_t _name##_show(struct device *dev, \
1012 struct device_attribute *dev_attr, \
1013 char *buf) \
1014{ \
1015 long hret; \
1016 ssize_t ret = 0; \
1017 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
1018 struct hv_24x7_catalog_page_0 *page_0 = page; \
1019 if (!page) \
1020 return -ENOMEM; \
1021 hret = h_get_24x7_catalog_page(page, 0, 0); \
1022 if (hret) { \
1023 ret = -EIO; \
1024 goto e_free; \
1025 } \
1026 ret = sprintf(buf, _fmt, _expr); \
1027e_free: \
1028 kmem_cache_free(hv_page_cache, page); \
1029 return ret; \
1030} \
1031static DEVICE_ATTR_RO(_name)
1032
1033PAGE_0_ATTR(catalog_version, "%lld\n",
1034 (unsigned long long)be64_to_cpu(page_0->version));
1035PAGE_0_ATTR(catalog_len, "%lld\n",
1036 (unsigned long long)be32_to_cpu(page_0->length) * 4096);
1037static BIN_ATTR_RO(catalog, 0);
1038static DEVICE_ATTR_RO(domains);
1039
1040static struct bin_attribute *if_bin_attrs[] = {
1041 &bin_attr_catalog,
1042 NULL,
1043};
1044
1045static struct attribute *if_attrs[] = {
1046 &dev_attr_catalog_len.attr,
1047 &dev_attr_catalog_version.attr,
1048 &dev_attr_domains.attr,
1049 NULL,
1050};
1051
1052static struct attribute_group if_group = {
1053 .name = "interface",
1054 .bin_attrs = if_bin_attrs,
1055 .attrs = if_attrs,
1056};
1057
1058static const struct attribute_group *attr_groups[] = {
1059 &format_group,
1060 &event_group,
1061 &event_desc_group,
1062 &event_long_desc_group,
1063 &if_group,
1064 NULL,
1065};
1066
1067
1068
1069
1070static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1071 struct hv_24x7_data_result_buffer *result_buffer)
1072{
1073
1074 memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
1075 memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
1076
1077 request_buffer->interface_version = interface_version;
1078
1079}
1080
1081
1082
1083
1084
1085static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1086 struct hv_24x7_data_result_buffer *result_buffer)
1087{
1088 long ret;
1089
1090
1091
1092
1093
1094
1095 ret = plpar_hcall_norets(H_GET_24X7_DATA,
1096 virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
1097 virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
1098
1099 if (ret) {
1100 struct hv_24x7_request *req;
1101
1102 req = request_buffer->requests;
1103 pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
1104 req->performance_domain, req->data_offset,
1105 req->starting_ix, req->starting_lpar_ix,
1106 ret, ret, result_buffer->detailed_rc,
1107 result_buffer->failing_request_ix);
1108 return -EIO;
1109 }
1110
1111 return 0;
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121static int add_event_to_24x7_request(struct perf_event *event,
1122 struct hv_24x7_request_buffer *request_buffer)
1123{
1124 u16 idx;
1125 int i;
1126 size_t req_size;
1127 struct hv_24x7_request *req;
1128
1129 if (request_buffer->num_requests >=
1130 max_num_requests(request_buffer->interface_version)) {
1131 pr_devel("Too many requests for 24x7 HCALL %d\n",
1132 request_buffer->num_requests);
1133 return -EINVAL;
1134 }
1135
1136 switch (event_get_domain(event)) {
1137 case HV_PERF_DOMAIN_PHYS_CHIP:
1138 idx = event_get_chip(event);
1139 break;
1140 case HV_PERF_DOMAIN_PHYS_CORE:
1141 idx = event_get_core(event);
1142 break;
1143 default:
1144 idx = event_get_vcpu(event);
1145 }
1146
1147 req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
1148
1149 i = request_buffer->num_requests++;
1150 req = (void *) request_buffer->requests + i * req_size;
1151
1152 req->performance_domain = event_get_domain(event);
1153 req->data_size = cpu_to_be16(8);
1154 req->data_offset = cpu_to_be32(event_get_offset(event));
1155 req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
1156 req->max_num_lpars = cpu_to_be16(1);
1157 req->starting_ix = cpu_to_be16(idx);
1158 req->max_ix = cpu_to_be16(1);
1159
1160 if (request_buffer->interface_version > 1) {
1161 if (domain_needs_aggregation(req->performance_domain))
1162 req->max_num_thread_groups = -1;
1163 else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
1164 req->starting_thread_group_ix = idx % 2;
1165 req->max_num_thread_groups = 1;
1166 }
1167 }
1168
1169 return 0;
1170}
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184static int get_count_from_result(struct perf_event *event,
1185 struct hv_24x7_data_result_buffer *resb,
1186 struct hv_24x7_result *res, u64 *countp,
1187 struct hv_24x7_result **next)
1188{
1189 u16 num_elements = be16_to_cpu(res->num_elements_returned);
1190 u16 data_size = be16_to_cpu(res->result_element_data_size);
1191 unsigned int data_offset;
1192 void *element_data;
1193 int i;
1194 u64 count;
1195
1196
1197
1198
1199 if (!num_elements) {
1200 pr_debug("Result of request %hhu is empty, nothing to do\n",
1201 res->result_ix);
1202
1203 if (next)
1204 *next = (struct hv_24x7_result *) res->elements;
1205
1206 return -ENODATA;
1207 }
1208
1209
1210
1211
1212
1213
1214 if (num_elements != 1 &&
1215 !domain_needs_aggregation(event_get_domain(event))) {
1216 pr_err("Error: result of request %hhu has %hu elements\n",
1217 res->result_ix, num_elements);
1218
1219 return -EIO;
1220 }
1221
1222 if (data_size != sizeof(u64)) {
1223 pr_debug("Error: result of request %hhu has data of %hu bytes\n",
1224 res->result_ix, data_size);
1225
1226 return -ENOTSUPP;
1227 }
1228
1229 if (resb->interface_version == 1)
1230 data_offset = offsetof(struct hv_24x7_result_element_v1,
1231 element_data);
1232 else
1233 data_offset = offsetof(struct hv_24x7_result_element_v2,
1234 element_data);
1235
1236
1237 for (i = count = 0, element_data = res->elements + data_offset;
1238 i < num_elements;
1239 i++, element_data += data_size + data_offset)
1240 count += be64_to_cpu(*((u64 *) element_data));
1241
1242 *countp = count;
1243
1244
1245 if (next)
1246 *next = element_data - data_offset;
1247
1248 return 0;
1249}
1250
1251static int single_24x7_request(struct perf_event *event, u64 *count)
1252{
1253 int ret;
1254 struct hv_24x7_request_buffer *request_buffer;
1255 struct hv_24x7_data_result_buffer *result_buffer;
1256
1257 BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
1258 BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
1259
1260 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1261 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1262
1263 init_24x7_request(request_buffer, result_buffer);
1264
1265 ret = add_event_to_24x7_request(event, request_buffer);
1266 if (ret)
1267 goto out;
1268
1269 ret = make_24x7_request(request_buffer, result_buffer);
1270 if (ret)
1271 goto out;
1272
1273
1274 ret = get_count_from_result(event, result_buffer,
1275 result_buffer->results, count, NULL);
1276
1277out:
1278 put_cpu_var(hv_24x7_reqb);
1279 put_cpu_var(hv_24x7_resb);
1280 return ret;
1281}
1282
1283
1284static int h_24x7_event_init(struct perf_event *event)
1285{
1286 struct hv_perf_caps caps;
1287 unsigned domain;
1288 unsigned long hret;
1289 u64 ct;
1290
1291
1292 if (event->attr.type != event->pmu->type)
1293 return -ENOENT;
1294
1295
1296 if (event_get_reserved1(event) ||
1297 event_get_reserved2(event) ||
1298 event_get_reserved3(event)) {
1299 pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
1300 event->attr.config,
1301 event_get_reserved1(event),
1302 event->attr.config1,
1303 event_get_reserved2(event),
1304 event->attr.config2,
1305 event_get_reserved3(event));
1306 return -EINVAL;
1307 }
1308
1309
1310 if (event->attr.exclude_user ||
1311 event->attr.exclude_kernel ||
1312 event->attr.exclude_hv ||
1313 event->attr.exclude_idle ||
1314 event->attr.exclude_host ||
1315 event->attr.exclude_guest)
1316 return -EINVAL;
1317
1318
1319 if (has_branch_stack(event))
1320 return -EOPNOTSUPP;
1321
1322
1323 if (event_get_offset(event) % 8) {
1324 pr_devel("bad alignment\n");
1325 return -EINVAL;
1326 }
1327
1328 domain = event_get_domain(event);
1329 if (domain >= HV_PERF_DOMAIN_MAX) {
1330 pr_devel("invalid domain %d\n", domain);
1331 return -EINVAL;
1332 }
1333
1334 hret = hv_perf_caps_get(&caps);
1335 if (hret) {
1336 pr_devel("could not get capabilities: rc=%ld\n", hret);
1337 return -EIO;
1338 }
1339
1340
1341 if (!caps.collect_privileged && (is_physical_domain(domain) ||
1342 (event_get_lpar(event) != event_get_lpar_max()))) {
1343 pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
1344 is_physical_domain(domain),
1345 event_get_lpar(event));
1346 return -EACCES;
1347 }
1348
1349
1350 if (single_24x7_request(event, &ct)) {
1351 pr_devel("test hcall failed\n");
1352 return -EIO;
1353 }
1354 (void)local64_xchg(&event->hw.prev_count, ct);
1355
1356 return 0;
1357}
1358
1359static u64 h_24x7_get_value(struct perf_event *event)
1360{
1361 u64 ct;
1362
1363 if (single_24x7_request(event, &ct))
1364
1365 return 0;
1366
1367 return ct;
1368}
1369
1370static void update_event_count(struct perf_event *event, u64 now)
1371{
1372 s64 prev;
1373
1374 prev = local64_xchg(&event->hw.prev_count, now);
1375 local64_add(now - prev, &event->count);
1376}
1377
1378static void h_24x7_event_read(struct perf_event *event)
1379{
1380 u64 now;
1381 struct hv_24x7_request_buffer *request_buffer;
1382 struct hv_24x7_hw *h24x7hw;
1383 int txn_flags;
1384
1385 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1386
1387
1388
1389
1390
1391
1392
1393
1394 if (txn_flags & PERF_PMU_TXN_READ) {
1395 int i;
1396 int ret;
1397
1398 if (__this_cpu_read(hv_24x7_txn_err))
1399 return;
1400
1401 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1402
1403 ret = add_event_to_24x7_request(event, request_buffer);
1404 if (ret) {
1405 __this_cpu_write(hv_24x7_txn_err, ret);
1406 } else {
1407
1408
1409
1410
1411 i = request_buffer->num_requests - 1;
1412
1413 h24x7hw = &get_cpu_var(hv_24x7_hw);
1414 h24x7hw->events[i] = event;
1415 put_cpu_var(h24x7hw);
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425 local64_set(&event->count, 0);
1426 }
1427
1428 put_cpu_var(hv_24x7_reqb);
1429 } else {
1430 now = h_24x7_get_value(event);
1431 update_event_count(event, now);
1432 }
1433}
1434
1435static void h_24x7_event_start(struct perf_event *event, int flags)
1436{
1437 if (flags & PERF_EF_RELOAD)
1438 local64_set(&event->hw.prev_count, h_24x7_get_value(event));
1439}
1440
1441static void h_24x7_event_stop(struct perf_event *event, int flags)
1442{
1443 h_24x7_event_read(event);
1444}
1445
1446static int h_24x7_event_add(struct perf_event *event, int flags)
1447{
1448 if (flags & PERF_EF_START)
1449 h_24x7_event_start(event, flags);
1450
1451 return 0;
1452}
1453
1454
1455
1456
1457
1458
1459
1460static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
1461{
1462 struct hv_24x7_request_buffer *request_buffer;
1463 struct hv_24x7_data_result_buffer *result_buffer;
1464
1465
1466 WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
1467
1468 __this_cpu_write(hv_24x7_txn_flags, flags);
1469 if (flags & ~PERF_PMU_TXN_READ)
1470 return;
1471
1472 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1473 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1474
1475 init_24x7_request(request_buffer, result_buffer);
1476
1477 put_cpu_var(hv_24x7_resb);
1478 put_cpu_var(hv_24x7_reqb);
1479}
1480
1481
1482
1483
1484
1485
1486
1487static void reset_txn(void)
1488{
1489 __this_cpu_write(hv_24x7_txn_flags, 0);
1490 __this_cpu_write(hv_24x7_txn_err, 0);
1491}
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502static int h_24x7_event_commit_txn(struct pmu *pmu)
1503{
1504 struct hv_24x7_request_buffer *request_buffer;
1505 struct hv_24x7_data_result_buffer *result_buffer;
1506 struct hv_24x7_result *res, *next_res;
1507 u64 count;
1508 int i, ret, txn_flags;
1509 struct hv_24x7_hw *h24x7hw;
1510
1511 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1512 WARN_ON_ONCE(!txn_flags);
1513
1514 ret = 0;
1515 if (txn_flags & ~PERF_PMU_TXN_READ)
1516 goto out;
1517
1518 ret = __this_cpu_read(hv_24x7_txn_err);
1519 if (ret)
1520 goto out;
1521
1522 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1523 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1524
1525 ret = make_24x7_request(request_buffer, result_buffer);
1526 if (ret)
1527 goto put_reqb;
1528
1529 h24x7hw = &get_cpu_var(hv_24x7_hw);
1530
1531
1532 for (i = 0, res = result_buffer->results;
1533 i < result_buffer->num_results; i++, res = next_res) {
1534 struct perf_event *event = h24x7hw->events[res->result_ix];
1535
1536 ret = get_count_from_result(event, result_buffer, res, &count,
1537 &next_res);
1538 if (ret)
1539 break;
1540
1541 update_event_count(event, count);
1542 }
1543
1544 put_cpu_var(hv_24x7_hw);
1545
1546put_reqb:
1547 put_cpu_var(hv_24x7_resb);
1548 put_cpu_var(hv_24x7_reqb);
1549out:
1550 reset_txn();
1551 return ret;
1552}
1553
1554
1555
1556
1557
1558
1559
1560static void h_24x7_event_cancel_txn(struct pmu *pmu)
1561{
1562 WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
1563 reset_txn();
1564}
1565
1566static struct pmu h_24x7_pmu = {
1567 .task_ctx_nr = perf_invalid_context,
1568
1569 .name = "hv_24x7",
1570 .attr_groups = attr_groups,
1571 .event_init = h_24x7_event_init,
1572 .add = h_24x7_event_add,
1573 .del = h_24x7_event_stop,
1574 .start = h_24x7_event_start,
1575 .stop = h_24x7_event_stop,
1576 .read = h_24x7_event_read,
1577 .start_txn = h_24x7_event_start_txn,
1578 .commit_txn = h_24x7_event_commit_txn,
1579 .cancel_txn = h_24x7_event_cancel_txn,
1580};
1581
1582static int hv_24x7_init(void)
1583{
1584 int r;
1585 unsigned long hret;
1586 struct hv_perf_caps caps;
1587
1588 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1589 pr_debug("not a virtualized system, not enabling\n");
1590 return -ENODEV;
1591 } else if (!cur_cpu_spec->oprofile_cpu_type)
1592 return -ENODEV;
1593
1594
1595 if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
1596 interface_version = 1;
1597 else {
1598 interface_version = 2;
1599
1600
1601 if (threads_per_core == 8)
1602 aggregate_result_elements = true;
1603 }
1604
1605 hret = hv_perf_caps_get(&caps);
1606 if (hret) {
1607 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1608 hret);
1609 return -ENODEV;
1610 }
1611
1612 hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
1613 if (!hv_page_cache)
1614 return -ENOMEM;
1615
1616
1617 h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1618
1619 r = create_events_from_catalog(&event_group.attrs,
1620 &event_desc_group.attrs,
1621 &event_long_desc_group.attrs);
1622
1623 if (r)
1624 return r;
1625
1626 r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
1627 if (r)
1628 return r;
1629
1630 return 0;
1631}
1632
1633device_initcall(hv_24x7_init);
1634