1
2
3
4
5
6
7
8#define KMSG_COMPONENT "cpum_sf"
9#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/kernel_stat.h>
13#include <linux/perf_event.h>
14#include <linux/percpu.h>
15#include <linux/pid.h>
16#include <linux/notifier.h>
17#include <linux/export.h>
18#include <linux/slab.h>
19#include <linux/mm.h>
20#include <linux/moduleparam.h>
21#include <asm/cpu_mf.h>
22#include <asm/irq.h>
23#include <asm/debug.h>
24#include <asm/timex.h>
25
26
27
28
29
30#define CPUM_SF_MIN_SDBT 1
31
32
33
34
35
36#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
37
38
39
40
41
42#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
43static inline int require_table_link(const void *sdbt)
44{
45 return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
46}
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
67static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
68static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
69
70struct sf_buffer {
71 unsigned long *sdbt;
72
73 unsigned long num_sdb;
74 unsigned long num_sdbt;
75 unsigned long *tail;
76};
77
78struct aux_buffer {
79 struct sf_buffer sfb;
80 unsigned long head;
81 unsigned long alert_mark;
82 unsigned long empty_mark;
83 unsigned long *sdb_index;
84 unsigned long *sdbt_index;
85};
86
87struct cpu_hw_sf {
88
89 struct hws_qsi_info_block qsi;
90
91 struct hws_lsctl_request_block lsctl;
92 struct sf_buffer sfb;
93 unsigned int flags;
94 struct perf_event *event;
95 struct perf_output_handle handle;
96};
97static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
98
99
100static debug_info_t *sfdbg;
101
102
103
104
105static int sf_disable(void)
106{
107 struct hws_lsctl_request_block sreq;
108
109 memset(&sreq, 0, sizeof(sreq));
110 return lsctl(&sreq);
111}
112
113
114
115
116static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
117{
118 return !!cpuhw->sfb.sdbt;
119}
120
121
122
123
124static void free_sampling_buffer(struct sf_buffer *sfb)
125{
126 unsigned long *sdbt, *curr;
127
128 if (!sfb->sdbt)
129 return;
130
131 sdbt = sfb->sdbt;
132 curr = sdbt;
133
134
135 while (1) {
136 if (!*curr || !sdbt)
137 break;
138
139
140 if (is_link_entry(curr)) {
141 curr = get_next_sdbt(curr);
142 if (sdbt)
143 free_page((unsigned long) sdbt);
144
145
146 if (curr == sfb->sdbt)
147 break;
148 else
149 sdbt = curr;
150 } else {
151
152 if (*curr) {
153 free_page(*curr);
154 curr++;
155 }
156 }
157 }
158
159 debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
160 (unsigned long)sfb->sdbt);
161 memset(sfb, 0, sizeof(*sfb));
162}
163
164static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
165{
166 unsigned long sdb, *trailer;
167
168
169 sdb = get_zeroed_page(gfp_flags);
170 if (!sdb)
171 return -ENOMEM;
172 trailer = trailer_entry_ptr(sdb);
173 *trailer = SDB_TE_ALERT_REQ_MASK;
174
175
176 *sdbt = sdb;
177
178 return 0;
179}
180
181
182
183
184
185
186
187
188
189
190
191
192static int realloc_sampling_buffer(struct sf_buffer *sfb,
193 unsigned long num_sdb, gfp_t gfp_flags)
194{
195 int i, rc;
196 unsigned long *new, *tail, *tail_prev = NULL;
197
198 if (!sfb->sdbt || !sfb->tail)
199 return -EINVAL;
200
201 if (!is_link_entry(sfb->tail))
202 return -EINVAL;
203
204
205
206
207
208
209 tail = sfb->tail;
210
211
212
213
214 if (sfb->sdbt != get_next_sdbt(tail)) {
215 debug_sprintf_event(sfdbg, 3, "%s: "
216 "sampling buffer is not linked: origin %#lx"
217 " tail %#lx\n", __func__,
218 (unsigned long)sfb->sdbt,
219 (unsigned long)tail);
220 return -EINVAL;
221 }
222
223
224 rc = 0;
225 for (i = 0; i < num_sdb; i++) {
226
227 if (require_table_link(tail)) {
228 new = (unsigned long *) get_zeroed_page(gfp_flags);
229 if (!new) {
230 rc = -ENOMEM;
231 break;
232 }
233 sfb->num_sdbt++;
234
235 *tail = (unsigned long)(void *) new + 1;
236 tail_prev = tail;
237 tail = new;
238 }
239
240
241
242
243
244
245 rc = alloc_sample_data_block(tail, gfp_flags);
246 if (rc) {
247
248
249
250
251
252 if (tail_prev) {
253 sfb->num_sdbt--;
254 free_page((unsigned long) new);
255 tail = tail_prev;
256 }
257 break;
258 }
259 sfb->num_sdb++;
260 tail++;
261 tail_prev = new = NULL;
262 }
263
264
265 *tail = (unsigned long) sfb->sdbt + 1;
266 sfb->tail = tail;
267
268 debug_sprintf_event(sfdbg, 4, "%s: new buffer"
269 " settings: sdbt %lu sdb %lu\n", __func__,
270 sfb->num_sdbt, sfb->num_sdb);
271 return rc;
272}
273
274
275
276
277
278
279
280
281
282
283
284
285static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
286{
287 int rc;
288
289 if (sfb->sdbt)
290 return -EINVAL;
291
292
293 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
294 if (!sfb->sdbt)
295 return -ENOMEM;
296 sfb->num_sdb = 0;
297 sfb->num_sdbt = 1;
298
299
300
301
302 sfb->tail = sfb->sdbt;
303 *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
304
305
306 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
307 if (rc) {
308 free_sampling_buffer(sfb);
309 debug_sprintf_event(sfdbg, 4, "%s: "
310 "realloc_sampling_buffer failed with rc %i\n",
311 __func__, rc);
312 } else
313 debug_sprintf_event(sfdbg, 4,
314 "%s: tear %#lx dear %#lx\n", __func__,
315 (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
316 return rc;
317}
318
319static void sfb_set_limits(unsigned long min, unsigned long max)
320{
321 struct hws_qsi_info_block si;
322
323 CPUM_SF_MIN_SDB = min;
324 CPUM_SF_MAX_SDB = max;
325
326 memset(&si, 0, sizeof(si));
327 if (!qsi(&si))
328 CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
329}
330
331static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
332{
333 return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
334 : CPUM_SF_MAX_SDB;
335}
336
337static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
338 struct hw_perf_event *hwc)
339{
340 if (!sfb->sdbt)
341 return SFB_ALLOC_REG(hwc);
342 if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
343 return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
344 return 0;
345}
346
347static int sfb_has_pending_allocs(struct sf_buffer *sfb,
348 struct hw_perf_event *hwc)
349{
350 return sfb_pending_allocs(sfb, hwc) > 0;
351}
352
353static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
354{
355
356 num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
357 if (num)
358 SFB_ALLOC_REG(hwc) += num;
359}
360
361static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
362{
363 SFB_ALLOC_REG(hwc) = 0;
364 sfb_account_allocs(num, hwc);
365}
366
367static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
368{
369 if (cpuhw->sfb.sdbt)
370 free_sampling_buffer(&cpuhw->sfb);
371}
372
373static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
374{
375 unsigned long n_sdb, freq;
376 size_t sample_size;
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407 sample_size = sizeof(struct hws_basic_entry);
408 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
409 n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
410
411
412
413
414
415
416
417
418
419 sfb_init_allocs(n_sdb, hwc);
420 if (sf_buffer_available(cpuhw))
421 return 0;
422
423 debug_sprintf_event(sfdbg, 3,
424 "%s: rate %lu f %lu sdb %lu/%lu"
425 " sample_size %lu cpuhw %p\n", __func__,
426 SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
427 sample_size, cpuhw);
428
429 return alloc_sampling_buffer(&cpuhw->sfb,
430 sfb_pending_allocs(&cpuhw->sfb, hwc));
431}
432
433static unsigned long min_percent(unsigned int percent, unsigned long base,
434 unsigned long min)
435{
436 return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
437}
438
439static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
440{
441
442
443
444
445
446 if (ratio <= 5)
447 return 0;
448 if (ratio <= 25)
449 return min_percent(1, base, 1);
450 if (ratio <= 50)
451 return min_percent(1, base, 1);
452 if (ratio <= 75)
453 return min_percent(2, base, 2);
454 if (ratio <= 100)
455 return min_percent(3, base, 3);
456 if (ratio <= 250)
457 return min_percent(4, base, 4);
458
459 return min_percent(5, base, 8);
460}
461
462static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
463 struct hw_perf_event *hwc)
464{
465 unsigned long ratio, num;
466
467 if (!OVERFLOW_REG(hwc))
468 return;
469
470
471
472
473
474
475
476
477 ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
478 sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
479
480
481 num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
482 if (num)
483 sfb_account_allocs(num, hwc);
484
485 debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
486 __func__, OVERFLOW_REG(hwc), ratio, num);
487 OVERFLOW_REG(hwc) = 0;
488}
489
490
491
492
493
494
495
496
497
498
499
500
501static void extend_sampling_buffer(struct sf_buffer *sfb,
502 struct hw_perf_event *hwc)
503{
504 unsigned long num, num_old;
505 int rc;
506
507 num = sfb_pending_allocs(sfb, hwc);
508 if (!num)
509 return;
510 num_old = sfb->num_sdb;
511
512
513
514
515 sf_disable();
516
517
518
519
520
521
522 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
523 if (rc)
524 debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
525 __func__, rc);
526
527 if (sfb_has_pending_allocs(sfb, hwc))
528 debug_sprintf_event(sfdbg, 5, "%s: "
529 "req %lu alloc %lu remaining %lu\n",
530 __func__, num, sfb->num_sdb - num_old,
531 sfb_pending_allocs(sfb, hwc));
532}
533
534
535static atomic_t num_events;
536
537static DEFINE_MUTEX(pmc_reserve_mutex);
538
539#define PMC_INIT 0
540#define PMC_RELEASE 1
541#define PMC_FAILURE 2
542static void setup_pmc_cpu(void *flags)
543{
544 int err;
545 struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
546
547 err = 0;
548 switch (*((int *) flags)) {
549 case PMC_INIT:
550 memset(cpusf, 0, sizeof(*cpusf));
551 err = qsi(&cpusf->qsi);
552 if (err)
553 break;
554 cpusf->flags |= PMU_F_RESERVED;
555 err = sf_disable();
556 if (err)
557 pr_err("Switching off the sampling facility failed "
558 "with rc %i\n", err);
559 debug_sprintf_event(sfdbg, 5,
560 "%s: initialized: cpuhw %p\n", __func__,
561 cpusf);
562 break;
563 case PMC_RELEASE:
564 cpusf->flags &= ~PMU_F_RESERVED;
565 err = sf_disable();
566 if (err) {
567 pr_err("Switching off the sampling facility failed "
568 "with rc %i\n", err);
569 } else
570 deallocate_buffers(cpusf);
571 debug_sprintf_event(sfdbg, 5,
572 "%s: released: cpuhw %p\n", __func__,
573 cpusf);
574 break;
575 }
576 if (err)
577 *((int *) flags) |= PMC_FAILURE;
578}
579
580static void release_pmc_hardware(void)
581{
582 int flags = PMC_RELEASE;
583
584 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
585 on_each_cpu(setup_pmc_cpu, &flags, 1);
586}
587
588static int reserve_pmc_hardware(void)
589{
590 int flags = PMC_INIT;
591
592 on_each_cpu(setup_pmc_cpu, &flags, 1);
593 if (flags & PMC_FAILURE) {
594 release_pmc_hardware();
595 return -ENODEV;
596 }
597 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
598
599 return 0;
600}
601
602static void hw_perf_event_destroy(struct perf_event *event)
603{
604
605 if (!atomic_add_unless(&num_events, -1, 1)) {
606 mutex_lock(&pmc_reserve_mutex);
607 if (atomic_dec_return(&num_events) == 0)
608 release_pmc_hardware();
609 mutex_unlock(&pmc_reserve_mutex);
610 }
611}
612
613static void hw_init_period(struct hw_perf_event *hwc, u64 period)
614{
615 hwc->sample_period = period;
616 hwc->last_period = hwc->sample_period;
617 local64_set(&hwc->period_left, hwc->sample_period);
618}
619
620static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
621 unsigned long rate)
622{
623 return clamp_t(unsigned long, rate,
624 si->min_sampl_rate, si->max_sampl_rate);
625}
626
627static u32 cpumsf_pid_type(struct perf_event *event,
628 u32 pid, enum pid_type type)
629{
630 struct task_struct *tsk;
631
632
633 if (!pid)
634 goto out;
635
636 tsk = find_task_by_pid_ns(pid, &init_pid_ns);
637 pid = -1;
638 if (tsk) {
639
640
641
642
643 if (event->parent)
644 event = event->parent;
645 pid = __task_pid_nr_ns(tsk, type, event->ns);
646
647
648
649
650 if (!pid && !pid_alive(tsk))
651 pid = -1;
652 }
653out:
654 return pid;
655}
656
657static void cpumsf_output_event_pid(struct perf_event *event,
658 struct perf_sample_data *data,
659 struct pt_regs *regs)
660{
661 u32 pid;
662 struct perf_event_header header;
663 struct perf_output_handle handle;
664
665
666
667
668
669 pid = data->tid_entry.pid;
670
671
672 rcu_read_lock();
673
674 perf_prepare_sample(&header, data, event, regs);
675 if (perf_output_begin(&handle, data, event, header.size))
676 goto out;
677
678
679 data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
680 data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
681
682 perf_output_sample(&handle, &header, data, event);
683 perf_output_end(&handle);
684out:
685 rcu_read_unlock();
686}
687
688static unsigned long getrate(bool freq, unsigned long sample,
689 struct hws_qsi_info_block *si)
690{
691 unsigned long rate;
692
693 if (freq) {
694 rate = freq_to_sample_rate(si, sample);
695 rate = hw_limit_rate(si, rate);
696 } else {
697
698
699
700
701 rate = hw_limit_rate(si, sample);
702
703
704
705
706
707
708
709 if (sample_rate_to_freq(si, rate) >
710 sysctl_perf_event_sample_rate) {
711 debug_sprintf_event(sfdbg, 1, "%s: "
712 "Sampling rate exceeds maximum "
713 "perf sample rate\n", __func__);
714 rate = 0;
715 }
716 }
717 return rate;
718}
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737static int __hw_perf_event_init_rate(struct perf_event *event,
738 struct hws_qsi_info_block *si)
739{
740 struct perf_event_attr *attr = &event->attr;
741 struct hw_perf_event *hwc = &event->hw;
742 unsigned long rate;
743
744 if (attr->freq) {
745 if (!attr->sample_freq)
746 return -EINVAL;
747 rate = getrate(attr->freq, attr->sample_freq, si);
748 attr->freq = 0;
749 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
750 } else {
751 rate = getrate(attr->freq, attr->sample_period, si);
752 if (!rate)
753 return -EINVAL;
754 }
755 attr->sample_period = rate;
756 SAMPL_RATE(hwc) = rate;
757 hw_init_period(hwc, SAMPL_RATE(hwc));
758 debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
759 __func__, event->cpu, event->attr.sample_period,
760 event->attr.freq, SAMPLE_FREQ_MODE(hwc));
761 return 0;
762}
763
764static int __hw_perf_event_init(struct perf_event *event)
765{
766 struct cpu_hw_sf *cpuhw;
767 struct hws_qsi_info_block si;
768 struct perf_event_attr *attr = &event->attr;
769 struct hw_perf_event *hwc = &event->hw;
770 int cpu, err;
771
772
773 err = 0;
774 if (!atomic_inc_not_zero(&num_events)) {
775 mutex_lock(&pmc_reserve_mutex);
776 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
777 err = -EBUSY;
778 else
779 atomic_inc(&num_events);
780 mutex_unlock(&pmc_reserve_mutex);
781 }
782 event->destroy = hw_perf_event_destroy;
783
784 if (err)
785 goto out;
786
787
788
789
790
791
792
793
794
795
796 memset(&si, 0, sizeof(si));
797 cpuhw = NULL;
798 if (event->cpu == -1)
799 qsi(&si);
800 else {
801
802
803
804 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
805 si = cpuhw->qsi;
806 }
807
808
809
810
811
812 if (!si.as) {
813 err = -ENOENT;
814 goto out;
815 }
816
817 if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
818 pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
819 err = -EBUSY;
820 goto out;
821 }
822
823
824 SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
825
826
827
828
829 if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
830 if (!si.ad) {
831 err = -EPERM;
832 goto out;
833 }
834 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
835 }
836
837
838 if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
839 SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
840
841 err = __hw_perf_event_init_rate(event, &si);
842 if (err)
843 goto out;
844
845
846 hwc->extra_reg.reg = REG_OVERFLOW;
847 OVERFLOW_REG(hwc) = 0;
848
849
850 if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
851 return 0;
852
853
854
855
856
857
858 if (cpuhw)
859
860 err = allocate_buffers(cpuhw, hwc);
861 else {
862
863
864
865 for_each_online_cpu(cpu) {
866 cpuhw = &per_cpu(cpu_hw_sf, cpu);
867 err = allocate_buffers(cpuhw, hwc);
868 if (err)
869 break;
870 }
871 }
872
873
874
875
876
877 if (event->attr.sample_type & PERF_SAMPLE_TID)
878 if (is_default_overflow_handler(event))
879 event->overflow_handler = cpumsf_output_event_pid;
880out:
881 return err;
882}
883
884static bool is_callchain_event(struct perf_event *event)
885{
886 u64 sample_type = event->attr.sample_type;
887
888 return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
889 PERF_SAMPLE_STACK_USER);
890}
891
892static int cpumsf_pmu_event_init(struct perf_event *event)
893{
894 int err;
895
896
897
898 if (has_branch_stack(event) || is_callchain_event(event))
899 return -EOPNOTSUPP;
900
901 switch (event->attr.type) {
902 case PERF_TYPE_RAW:
903 if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
904 (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
905 return -ENOENT;
906 break;
907 case PERF_TYPE_HARDWARE:
908
909
910
911
912
913 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
914 return -ENOENT;
915 if (!is_sampling_event(event))
916 return -ENOENT;
917 break;
918 default:
919 return -ENOENT;
920 }
921
922
923 if (event->cpu >= 0 && !cpu_online(event->cpu))
924 return -ENODEV;
925
926
927
928
929 if (event->attr.exclude_hv)
930 event->attr.exclude_hv = 0;
931 if (event->attr.exclude_idle)
932 event->attr.exclude_idle = 0;
933
934 err = __hw_perf_event_init(event);
935 if (unlikely(err))
936 if (event->destroy)
937 event->destroy(event);
938 return err;
939}
940
941static void cpumsf_pmu_enable(struct pmu *pmu)
942{
943 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
944 struct hw_perf_event *hwc;
945 int err;
946
947 if (cpuhw->flags & PMU_F_ENABLED)
948 return;
949
950 if (cpuhw->flags & PMU_F_ERR_MASK)
951 return;
952
953
954
955
956
957
958
959
960
961
962
963
964 if (cpuhw->event) {
965 hwc = &cpuhw->event->hw;
966 if (!(SAMPL_DIAG_MODE(hwc))) {
967
968
969
970
971 sfb_account_overflows(cpuhw, hwc);
972 extend_sampling_buffer(&cpuhw->sfb, hwc);
973 }
974
975 cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
976 }
977
978
979 cpuhw->flags |= PMU_F_ENABLED;
980 barrier();
981
982 err = lsctl(&cpuhw->lsctl);
983 if (err) {
984 cpuhw->flags &= ~PMU_F_ENABLED;
985 pr_err("Loading sampling controls failed: op %i err %i\n",
986 1, err);
987 return;
988 }
989
990
991 lpp(&S390_lowcore.lpp);
992
993 debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
994 "interval %#lx tear %#lx dear %#lx\n", __func__,
995 cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
996 cpuhw->lsctl.cd, cpuhw->lsctl.interval,
997 cpuhw->lsctl.tear, cpuhw->lsctl.dear);
998}
999
1000static void cpumsf_pmu_disable(struct pmu *pmu)
1001{
1002 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1003 struct hws_lsctl_request_block inactive;
1004 struct hws_qsi_info_block si;
1005 int err;
1006
1007 if (!(cpuhw->flags & PMU_F_ENABLED))
1008 return;
1009
1010 if (cpuhw->flags & PMU_F_ERR_MASK)
1011 return;
1012
1013
1014 inactive = cpuhw->lsctl;
1015 inactive.cs = 0;
1016 inactive.cd = 0;
1017
1018 err = lsctl(&inactive);
1019 if (err) {
1020 pr_err("Loading sampling controls failed: op %i err %i\n",
1021 2, err);
1022 return;
1023 }
1024
1025
1026 err = qsi(&si);
1027 if (!err) {
1028
1029
1030
1031
1032
1033 if (si.es) {
1034 cpuhw->lsctl.tear = si.tear;
1035 cpuhw->lsctl.dear = si.dear;
1036 }
1037 } else
1038 debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
1039 __func__, err);
1040
1041 cpuhw->flags &= ~PMU_F_ENABLED;
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
1054 struct perf_sf_sde_regs *sde_regs)
1055{
1056 if (event->attr.exclude_user && user_mode(regs))
1057 return 1;
1058 if (event->attr.exclude_kernel && !user_mode(regs))
1059 return 1;
1060 if (event->attr.exclude_guest && sde_regs->in_guest)
1061 return 1;
1062 if (event->attr.exclude_host && !sde_regs->in_guest)
1063 return 1;
1064 return 0;
1065}
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078static int perf_push_sample(struct perf_event *event,
1079 struct hws_basic_entry *basic)
1080{
1081 int overflow;
1082 struct pt_regs regs;
1083 struct perf_sf_sde_regs *sde_regs;
1084 struct perf_sample_data data;
1085
1086
1087 perf_sample_data_init(&data, 0, event->hw.last_period);
1088
1089
1090
1091
1092
1093
1094 memset(®s, 0, sizeof(regs));
1095 regs.int_code = 0x1407;
1096 regs.int_parm = CPU_MF_INT_SF_PRA;
1097 sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long;
1098
1099 psw_bits(regs.psw).ia = basic->ia;
1100 psw_bits(regs.psw).dat = basic->T;
1101 psw_bits(regs.psw).wait = basic->W;
1102 psw_bits(regs.psw).pstate = basic->P;
1103 psw_bits(regs.psw).as = basic->AS;
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 switch (basic->CL) {
1117 case 1:
1118 sde_regs->in_guest = 0;
1119 break;
1120 case 2:
1121 sde_regs->in_guest = 1;
1122 break;
1123 default:
1124 if (basic->gpp || basic->prim_asn != 0xffff)
1125 sde_regs->in_guest = 1;
1126 break;
1127 }
1128
1129
1130
1131
1132
1133 data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
1134
1135 overflow = 0;
1136 if (perf_exclude_event(event, ®s, sde_regs))
1137 goto out;
1138 if (perf_event_overflow(event, &data, ®s)) {
1139 overflow = 1;
1140 event->pmu->stop(event, 0);
1141 }
1142 perf_event_update_userpage(event);
1143out:
1144 return overflow;
1145}
1146
1147static void perf_event_count_update(struct perf_event *event, u64 count)
1148{
1149 local64_add(count, &event->count);
1150}
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
1173 unsigned long long *overflow)
1174{
1175 struct hws_trailer_entry *te;
1176 struct hws_basic_entry *sample;
1177
1178 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
1179 sample = (struct hws_basic_entry *) *sdbt;
1180 while ((unsigned long *) sample < (unsigned long *) te) {
1181
1182 if (!sample->def || sample->LS)
1183 break;
1184
1185
1186 perf_event_count_update(event, SAMPL_RATE(&event->hw));
1187
1188
1189 if (sample->def == 0x0001) {
1190
1191
1192
1193
1194 if (!*overflow) {
1195
1196 if (sample->I == 0 && sample->W == 0) {
1197
1198 *overflow = perf_push_sample(event,
1199 sample);
1200 }
1201 } else
1202
1203 *overflow += 1;
1204 } else {
1205 debug_sprintf_event(sfdbg, 4,
1206 "%s: Found unknown"
1207 " sampling data entry: te->f %i"
1208 " basic.def %#4x (%p)\n", __func__,
1209 te->f, sample->def, sample);
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220 if (!te->f)
1221 break;
1222 }
1223
1224
1225 sample->def = 0;
1226 sample++;
1227 }
1228}
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244static void hw_perf_event_update(struct perf_event *event, int flush_all)
1245{
1246 struct hw_perf_event *hwc = &event->hw;
1247 struct hws_trailer_entry *te;
1248 unsigned long *sdbt;
1249 unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
1250 int done;
1251
1252
1253
1254
1255
1256 if (SAMPL_DIAG_MODE(&event->hw))
1257 return;
1258
1259 if (flush_all && SDB_FULL_BLOCKS(hwc))
1260 flush_all = 0;
1261
1262 sdbt = (unsigned long *) TEAR_REG(hwc);
1263 done = event_overflow = sampl_overflow = num_sdb = 0;
1264 while (!done) {
1265
1266 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
1267
1268
1269 if (!te->f) {
1270 done = 1;
1271 if (!flush_all)
1272 break;
1273 }
1274
1275
1276 if (te->overflow)
1277
1278
1279
1280
1281 sampl_overflow += te->overflow;
1282
1283
1284 debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
1285 "overflow %llu timestamp %#llx\n",
1286 __func__, (unsigned long)sdbt, te->overflow,
1287 (te->f) ? trailer_timestamp(te) : 0ULL);
1288
1289
1290
1291
1292
1293 hw_collect_samples(event, sdbt, &event_overflow);
1294 num_sdb++;
1295
1296
1297 do {
1298 te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
1299 te_flags |= SDB_TE_ALERT_REQ_MASK;
1300 } while (!cmpxchg_double(&te->flags, &te->overflow,
1301 te->flags, te->overflow,
1302 te_flags, 0ULL));
1303
1304
1305 sdbt++;
1306 if (is_link_entry(sdbt))
1307 sdbt = get_next_sdbt(sdbt);
1308
1309
1310 TEAR_REG(hwc) = (unsigned long) sdbt;
1311
1312
1313
1314
1315 if (flush_all && done)
1316 break;
1317 }
1318
1319
1320 if (sampl_overflow)
1321 OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
1322 sampl_overflow, 1 + num_sdb);
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332 if (event_overflow) {
1333 SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
1334 debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
1335 __func__,
1336 DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
1337 }
1338
1339 if (sampl_overflow || event_overflow)
1340 debug_sprintf_event(sfdbg, 4, "%s: "
1341 "overflows: sample %llu event %llu"
1342 " total %llu num_sdb %llu\n",
1343 __func__, sampl_overflow, event_overflow,
1344 OVERFLOW_REG(hwc), num_sdb);
1345}
1346
1347#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
1348#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0)
1349#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark)
1350#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark)
1351
1352
1353
1354
1355static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
1356 unsigned long index)
1357{
1358 unsigned long sdb;
1359
1360 index = AUX_SDB_INDEX(aux, index);
1361 sdb = aux->sdb_index[index];
1362 return (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374static void aux_output_end(struct perf_output_handle *handle)
1375{
1376 unsigned long i, range_scan, idx;
1377 struct aux_buffer *aux;
1378 struct hws_trailer_entry *te;
1379
1380 aux = perf_get_aux(handle);
1381 if (!aux)
1382 return;
1383
1384 range_scan = AUX_SDB_NUM_ALERT(aux);
1385 for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
1386 te = aux_sdb_trailer(aux, idx);
1387 if (!(te->flags & SDB_TE_BUFFER_FULL_MASK))
1388 break;
1389 }
1390
1391 perf_aux_output_end(handle, i << PAGE_SHIFT);
1392
1393
1394 te = aux_sdb_trailer(aux, aux->alert_mark);
1395 te->flags &= ~SDB_TE_ALERT_REQ_MASK;
1396
1397 debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
1398 __func__, i, range_scan, aux->head);
1399}
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410static int aux_output_begin(struct perf_output_handle *handle,
1411 struct aux_buffer *aux,
1412 struct cpu_hw_sf *cpuhw)
1413{
1414 unsigned long range;
1415 unsigned long i, range_scan, idx;
1416 unsigned long head, base, offset;
1417 struct hws_trailer_entry *te;
1418
1419 if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
1420 return -EINVAL;
1421
1422 aux->head = handle->head >> PAGE_SHIFT;
1423 range = (handle->size + 1) >> PAGE_SHIFT;
1424 if (range <= 1)
1425 return -ENOMEM;
1426
1427
1428
1429
1430
1431 debug_sprintf_event(sfdbg, 6,
1432 "%s: range %ld head %ld alert %ld empty %ld\n",
1433 __func__, range, aux->head, aux->alert_mark,
1434 aux->empty_mark);
1435 if (range > AUX_SDB_NUM_EMPTY(aux)) {
1436 range_scan = range - AUX_SDB_NUM_EMPTY(aux);
1437 idx = aux->empty_mark + 1;
1438 for (i = 0; i < range_scan; i++, idx++) {
1439 te = aux_sdb_trailer(aux, idx);
1440 te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
1441 SDB_TE_ALERT_REQ_MASK);
1442 te->overflow = 0;
1443 }
1444
1445 aux->empty_mark = aux->head + range - 1;
1446 }
1447
1448
1449 aux->alert_mark = aux->head + range/2 - 1;
1450 te = aux_sdb_trailer(aux, aux->alert_mark);
1451 te->flags = te->flags | SDB_TE_ALERT_REQ_MASK;
1452
1453
1454 head = AUX_SDB_INDEX(aux, aux->head);
1455 base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
1456 offset = head % CPUM_SF_SDB_PER_TABLE;
1457 cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
1458 cpuhw->lsctl.dear = aux->sdb_index[head];
1459
1460 debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
1461 "index %ld tear %#lx dear %#lx\n", __func__,
1462 aux->head, aux->alert_mark, aux->empty_mark,
1463 head / CPUM_SF_SDB_PER_TABLE,
1464 cpuhw->lsctl.tear, cpuhw->lsctl.dear);
1465
1466 return 0;
1467}
1468
1469
1470
1471
1472
1473
1474
1475static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
1476 unsigned long long *overflow)
1477{
1478 unsigned long long orig_overflow, orig_flags, new_flags;
1479 struct hws_trailer_entry *te;
1480
1481 te = aux_sdb_trailer(aux, alert_index);
1482 do {
1483 orig_flags = te->flags;
1484 *overflow = orig_overflow = te->overflow;
1485 if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
1486
1487
1488
1489
1490
1491 return false;
1492 }
1493 new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK;
1494 } while (!cmpxchg_double(&te->flags, &te->overflow,
1495 orig_flags, orig_overflow,
1496 new_flags, 0ULL));
1497 return true;
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
1523 unsigned long long *overflow)
1524{
1525 unsigned long long orig_overflow, orig_flags, new_flags;
1526 unsigned long i, range_scan, idx, idx_old;
1527 struct hws_trailer_entry *te;
1528
1529 debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
1530 "empty %ld\n", __func__, range, aux->head,
1531 aux->alert_mark, aux->empty_mark);
1532 if (range <= AUX_SDB_NUM_EMPTY(aux))
1533
1534
1535
1536
1537
1538 return aux_set_alert(aux, aux->alert_mark, overflow);
1539
1540 if (aux->alert_mark <= aux->empty_mark)
1541
1542
1543
1544
1545 if (!aux_set_alert(aux, aux->alert_mark, overflow))
1546 return false;
1547
1548
1549
1550
1551
1552
1553 range_scan = range - AUX_SDB_NUM_EMPTY(aux);
1554 idx_old = idx = aux->empty_mark + 1;
1555 for (i = 0; i < range_scan; i++, idx++) {
1556 te = aux_sdb_trailer(aux, idx);
1557 do {
1558 orig_flags = te->flags;
1559 orig_overflow = te->overflow;
1560 new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK;
1561 if (idx == aux->alert_mark)
1562 new_flags |= SDB_TE_ALERT_REQ_MASK;
1563 else
1564 new_flags &= ~SDB_TE_ALERT_REQ_MASK;
1565 } while (!cmpxchg_double(&te->flags, &te->overflow,
1566 orig_flags, orig_overflow,
1567 new_flags, 0ULL));
1568 *overflow += orig_overflow;
1569 }
1570
1571
1572 aux->empty_mark = aux->head + range - 1;
1573
1574 debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
1575 "empty %ld\n", __func__, range_scan, idx_old,
1576 idx - 1, aux->empty_mark);
1577 return true;
1578}
1579
1580
1581
1582
1583static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
1584{
1585 struct aux_buffer *aux;
1586 int done = 0;
1587 unsigned long range = 0, size;
1588 unsigned long long overflow = 0;
1589 struct perf_output_handle *handle = &cpuhw->handle;
1590 unsigned long num_sdb;
1591
1592 aux = perf_get_aux(handle);
1593 if (WARN_ON_ONCE(!aux))
1594 return;
1595
1596
1597 size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
1598 debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
1599 size >> PAGE_SHIFT);
1600 perf_aux_output_end(handle, size);
1601
1602 num_sdb = aux->sfb.num_sdb;
1603 while (!done) {
1604
1605 aux = perf_aux_output_begin(handle, cpuhw->event);
1606 if (handle->size == 0) {
1607 pr_err("The AUX buffer with %lu pages for the "
1608 "diagnostic-sampling mode is full\n",
1609 num_sdb);
1610 debug_sprintf_event(sfdbg, 1,
1611 "%s: AUX buffer used up\n",
1612 __func__);
1613 break;
1614 }
1615 if (WARN_ON_ONCE(!aux))
1616 return;
1617
1618
1619 aux->head = handle->head >> PAGE_SHIFT;
1620 range = (handle->size + 1) >> PAGE_SHIFT;
1621 if (range == 1)
1622 aux->alert_mark = aux->head;
1623 else
1624 aux->alert_mark = aux->head + range/2 - 1;
1625
1626 if (aux_reset_buffer(aux, range, &overflow)) {
1627 if (!overflow) {
1628 done = 1;
1629 break;
1630 }
1631 size = range << PAGE_SHIFT;
1632 perf_aux_output_end(&cpuhw->handle, size);
1633 pr_err("Sample data caused the AUX buffer with %lu "
1634 "pages to overflow\n", aux->sfb.num_sdb);
1635 debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
1636 "overflow %lld\n", __func__,
1637 aux->head, range, overflow);
1638 } else {
1639 size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
1640 perf_aux_output_end(&cpuhw->handle, size);
1641 debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
1642 "already full, try another\n",
1643 __func__,
1644 aux->head, aux->alert_mark);
1645 }
1646 }
1647
1648 if (done)
1649 debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
1650 "empty %ld\n", __func__, aux->head,
1651 aux->alert_mark, aux->empty_mark);
1652}
1653
1654
1655
1656
1657static void aux_buffer_free(void *data)
1658{
1659 struct aux_buffer *aux = data;
1660 unsigned long i, num_sdbt;
1661
1662 if (!aux)
1663 return;
1664
1665
1666 num_sdbt = aux->sfb.num_sdbt;
1667 for (i = 0; i < num_sdbt; i++)
1668 free_page(aux->sdbt_index[i]);
1669
1670 kfree(aux->sdbt_index);
1671 kfree(aux->sdb_index);
1672 kfree(aux);
1673
1674 debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
1675}
1676
1677static void aux_sdb_init(unsigned long sdb)
1678{
1679 struct hws_trailer_entry *te;
1680
1681 te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb);
1682
1683
1684 te->clock_base = 1;
1685 te->progusage2 = tod_clock_base.tod;
1686}
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702static void *aux_buffer_setup(struct perf_event *event, void **pages,
1703 int nr_pages, bool snapshot)
1704{
1705 struct sf_buffer *sfb;
1706 struct aux_buffer *aux;
1707 unsigned long *new, *tail;
1708 int i, n_sdbt;
1709
1710 if (!nr_pages || !pages)
1711 return NULL;
1712
1713 if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
1714 pr_err("AUX buffer size (%i pages) is larger than the "
1715 "maximum sampling buffer limit\n",
1716 nr_pages);
1717 return NULL;
1718 } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
1719 pr_err("AUX buffer size (%i pages) is less than the "
1720 "minimum sampling buffer limit\n",
1721 nr_pages);
1722 return NULL;
1723 }
1724
1725
1726 aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
1727 if (!aux)
1728 goto no_aux;
1729 sfb = &aux->sfb;
1730
1731
1732 n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
1733 aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
1734 if (!aux->sdbt_index)
1735 goto no_sdbt_index;
1736
1737
1738 aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
1739 if (!aux->sdb_index)
1740 goto no_sdb_index;
1741
1742
1743 sfb->num_sdbt = 0;
1744 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
1745 if (!sfb->sdbt)
1746 goto no_sdbt;
1747 aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
1748 tail = sfb->tail = sfb->sdbt;
1749
1750
1751
1752
1753
1754 for (i = 0; i < nr_pages; i++, tail++) {
1755 if (require_table_link(tail)) {
1756 new = (unsigned long *) get_zeroed_page(GFP_KERNEL);
1757 if (!new)
1758 goto no_sdbt;
1759 aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
1760
1761 *tail = (unsigned long)(void *) new + 1;
1762 tail = new;
1763 }
1764
1765 *tail = (unsigned long)pages[i];
1766 aux->sdb_index[i] = (unsigned long)pages[i];
1767 aux_sdb_init((unsigned long)pages[i]);
1768 }
1769 sfb->num_sdb = nr_pages;
1770
1771
1772 *tail = (unsigned long) sfb->sdbt + 1;
1773 sfb->tail = tail;
1774
1775
1776
1777
1778
1779
1780 aux->empty_mark = sfb->num_sdb - 1;
1781
1782 debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
1783 sfb->num_sdbt, sfb->num_sdb);
1784
1785 return aux;
1786
1787no_sdbt:
1788
1789 for (i = 0; i < sfb->num_sdbt; i++)
1790 free_page(aux->sdbt_index[i]);
1791 kfree(aux->sdb_index);
1792no_sdb_index:
1793 kfree(aux->sdbt_index);
1794no_sdbt_index:
1795 kfree(aux);
1796no_aux:
1797 return NULL;
1798}
1799
1800static void cpumsf_pmu_read(struct perf_event *event)
1801{
1802
1803}
1804
1805
1806
1807
1808
1809static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
1810{
1811 struct hws_qsi_info_block si;
1812 unsigned long rate;
1813 bool do_freq;
1814
1815 memset(&si, 0, sizeof(si));
1816 if (event->cpu == -1) {
1817 if (qsi(&si))
1818 return -ENODEV;
1819 } else {
1820
1821
1822
1823 struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
1824
1825 si = cpuhw->qsi;
1826 }
1827
1828 do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
1829 rate = getrate(do_freq, value, &si);
1830 if (!rate)
1831 return -EINVAL;
1832
1833 event->attr.sample_period = rate;
1834 SAMPL_RATE(&event->hw) = rate;
1835 hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
1836 debug_sprintf_event(sfdbg, 4, "%s:"
1837 " cpu %d value %#llx period %#llx freq %d\n",
1838 __func__, event->cpu, value,
1839 event->attr.sample_period, do_freq);
1840 return 0;
1841}
1842
1843
1844
1845
1846static void cpumsf_pmu_start(struct perf_event *event, int flags)
1847{
1848 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1849
1850 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1851 return;
1852
1853 if (flags & PERF_EF_RELOAD)
1854 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1855
1856 perf_pmu_disable(event->pmu);
1857 event->hw.state = 0;
1858 cpuhw->lsctl.cs = 1;
1859 if (SAMPL_DIAG_MODE(&event->hw))
1860 cpuhw->lsctl.cd = 1;
1861 perf_pmu_enable(event->pmu);
1862}
1863
1864
1865
1866
1867static void cpumsf_pmu_stop(struct perf_event *event, int flags)
1868{
1869 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1870
1871 if (event->hw.state & PERF_HES_STOPPED)
1872 return;
1873
1874 perf_pmu_disable(event->pmu);
1875 cpuhw->lsctl.cs = 0;
1876 cpuhw->lsctl.cd = 0;
1877 event->hw.state |= PERF_HES_STOPPED;
1878
1879 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
1880 hw_perf_event_update(event, 1);
1881 event->hw.state |= PERF_HES_UPTODATE;
1882 }
1883 perf_pmu_enable(event->pmu);
1884}
1885
1886static int cpumsf_pmu_add(struct perf_event *event, int flags)
1887{
1888 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1889 struct aux_buffer *aux;
1890 int err;
1891
1892 if (cpuhw->flags & PMU_F_IN_USE)
1893 return -EAGAIN;
1894
1895 if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
1896 return -EINVAL;
1897
1898 err = 0;
1899 perf_pmu_disable(event->pmu);
1900
1901 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1902
1903
1904
1905
1906
1907
1908 cpuhw->lsctl.s = 0;
1909 cpuhw->lsctl.h = 1;
1910 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
1911 if (!SAMPL_DIAG_MODE(&event->hw)) {
1912 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
1913 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
1914 TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
1915 }
1916
1917
1918
1919 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
1920 err = -EAGAIN;
1921 goto out;
1922 }
1923 if (SAMPL_DIAG_MODE(&event->hw)) {
1924 aux = perf_aux_output_begin(&cpuhw->handle, event);
1925 if (!aux) {
1926 err = -EINVAL;
1927 goto out;
1928 }
1929 err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
1930 if (err)
1931 goto out;
1932 cpuhw->lsctl.ed = 1;
1933 }
1934 cpuhw->lsctl.es = 1;
1935
1936
1937 cpuhw->event = event;
1938 cpuhw->flags |= PMU_F_IN_USE;
1939
1940 if (flags & PERF_EF_START)
1941 cpumsf_pmu_start(event, PERF_EF_RELOAD);
1942out:
1943 perf_event_update_userpage(event);
1944 perf_pmu_enable(event->pmu);
1945 return err;
1946}
1947
1948static void cpumsf_pmu_del(struct perf_event *event, int flags)
1949{
1950 struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
1951
1952 perf_pmu_disable(event->pmu);
1953 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
1954
1955 cpuhw->lsctl.es = 0;
1956 cpuhw->lsctl.ed = 0;
1957 cpuhw->flags &= ~PMU_F_IN_USE;
1958 cpuhw->event = NULL;
1959
1960 if (SAMPL_DIAG_MODE(&event->hw))
1961 aux_output_end(&cpuhw->handle);
1962 perf_event_update_userpage(event);
1963 perf_pmu_enable(event->pmu);
1964}
1965
1966CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
1967CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985enum {
1986 SF_CYCLES_BASIC_ATTR_IDX = 0,
1987 SF_CYCLES_BASIC_DIAG_ATTR_IDX,
1988 SF_CYCLES_ATTR_MAX
1989};
1990
1991static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
1992 [SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
1993};
1994
1995PMU_FORMAT_ATTR(event, "config:0-63");
1996
1997static struct attribute *cpumsf_pmu_format_attr[] = {
1998 &format_attr_event.attr,
1999 NULL,
2000};
2001
2002static struct attribute_group cpumsf_pmu_events_group = {
2003 .name = "events",
2004 .attrs = cpumsf_pmu_events_attr,
2005};
2006
2007static struct attribute_group cpumsf_pmu_format_group = {
2008 .name = "format",
2009 .attrs = cpumsf_pmu_format_attr,
2010};
2011
2012static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
2013 &cpumsf_pmu_events_group,
2014 &cpumsf_pmu_format_group,
2015 NULL,
2016};
2017
2018static struct pmu cpumf_sampling = {
2019 .pmu_enable = cpumsf_pmu_enable,
2020 .pmu_disable = cpumsf_pmu_disable,
2021
2022 .event_init = cpumsf_pmu_event_init,
2023 .add = cpumsf_pmu_add,
2024 .del = cpumsf_pmu_del,
2025
2026 .start = cpumsf_pmu_start,
2027 .stop = cpumsf_pmu_stop,
2028 .read = cpumsf_pmu_read,
2029
2030 .attr_groups = cpumsf_pmu_attr_groups,
2031
2032 .setup_aux = aux_buffer_setup,
2033 .free_aux = aux_buffer_free,
2034
2035 .check_period = cpumsf_pmu_check_period,
2036};
2037
2038static void cpumf_measurement_alert(struct ext_code ext_code,
2039 unsigned int alert, unsigned long unused)
2040{
2041 struct cpu_hw_sf *cpuhw;
2042
2043 if (!(alert & CPU_MF_INT_SF_MASK))
2044 return;
2045 inc_irq_stat(IRQEXT_CMS);
2046 cpuhw = this_cpu_ptr(&cpu_hw_sf);
2047
2048
2049
2050 if (!(cpuhw->flags & PMU_F_RESERVED))
2051 return;
2052
2053
2054
2055
2056
2057 if (alert & CPU_MF_INT_SF_PRA) {
2058 if (cpuhw->flags & PMU_F_IN_USE)
2059 if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
2060 hw_collect_aux(cpuhw);
2061 else
2062 hw_perf_event_update(cpuhw->event, 0);
2063 else
2064 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
2065 }
2066
2067
2068 if (alert != CPU_MF_INT_SF_PRA)
2069 debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
2070 alert);
2071
2072
2073 if (alert & CPU_MF_INT_SF_SACA)
2074 qsi(&cpuhw->qsi);
2075
2076
2077 if (alert & CPU_MF_INT_SF_LSDA) {
2078 pr_err("Sample data was lost\n");
2079 cpuhw->flags |= PMU_F_ERR_LSDA;
2080 sf_disable();
2081 }
2082
2083
2084 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
2085 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
2086 alert);
2087 cpuhw->flags |= PMU_F_ERR_IBE;
2088 sf_disable();
2089 }
2090}
2091
2092static int cpusf_pmu_setup(unsigned int cpu, int flags)
2093{
2094
2095
2096
2097 if (!atomic_read(&num_events))
2098 return 0;
2099
2100 local_irq_disable();
2101 setup_pmc_cpu(&flags);
2102 local_irq_enable();
2103 return 0;
2104}
2105
2106static int s390_pmu_sf_online_cpu(unsigned int cpu)
2107{
2108 return cpusf_pmu_setup(cpu, PMC_INIT);
2109}
2110
2111static int s390_pmu_sf_offline_cpu(unsigned int cpu)
2112{
2113 return cpusf_pmu_setup(cpu, PMC_RELEASE);
2114}
2115
2116static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
2117{
2118 if (!cpum_sf_avail())
2119 return -ENODEV;
2120 return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
2121}
2122
2123static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
2124{
2125 int rc;
2126 unsigned long min, max;
2127
2128 if (!cpum_sf_avail())
2129 return -ENODEV;
2130 if (!val || !strlen(val))
2131 return -EINVAL;
2132
2133
2134 min = CPUM_SF_MIN_SDB;
2135 max = CPUM_SF_MAX_SDB;
2136 if (strchr(val, ','))
2137 rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
2138 else
2139 rc = kstrtoul(val, 10, &max);
2140
2141 if (min < 2 || min >= max || max > get_num_physpages())
2142 rc = -EINVAL;
2143 if (rc)
2144 return rc;
2145
2146 sfb_set_limits(min, max);
2147 pr_info("The sampling buffer limits have changed to: "
2148 "min %lu max %lu (diag %lu)\n",
2149 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
2150 return 0;
2151}
2152
2153#define param_check_sfb_size(name, p) __param_check(name, p, void)
2154static const struct kernel_param_ops param_ops_sfb_size = {
2155 .set = param_set_sfb_size,
2156 .get = param_get_sfb_size,
2157};
2158
2159#define RS_INIT_FAILURE_QSI 0x0001
2160#define RS_INIT_FAILURE_BSDES 0x0002
2161#define RS_INIT_FAILURE_ALRT 0x0003
2162#define RS_INIT_FAILURE_PERF 0x0004
2163static void __init pr_cpumsf_err(unsigned int reason)
2164{
2165 pr_err("Sampling facility support for perf is not available: "
2166 "reason %#x\n", reason);
2167}
2168
2169static int __init init_cpum_sampling_pmu(void)
2170{
2171 struct hws_qsi_info_block si;
2172 int err;
2173
2174 if (!cpum_sf_avail())
2175 return -ENODEV;
2176
2177 memset(&si, 0, sizeof(si));
2178 if (qsi(&si)) {
2179 pr_cpumsf_err(RS_INIT_FAILURE_QSI);
2180 return -ENODEV;
2181 }
2182
2183 if (!si.as && !si.ad)
2184 return -ENODEV;
2185
2186 if (si.bsdes != sizeof(struct hws_basic_entry)) {
2187 pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
2188 return -EINVAL;
2189 }
2190
2191 if (si.ad) {
2192 sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
2193
2194
2195
2196 cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
2197 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
2198 }
2199
2200 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
2201 if (!sfdbg) {
2202 pr_err("Registering for s390dbf failed\n");
2203 return -ENOMEM;
2204 }
2205 debug_register_view(sfdbg, &debug_sprintf_view);
2206
2207 err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
2208 cpumf_measurement_alert);
2209 if (err) {
2210 pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
2211 debug_unregister(sfdbg);
2212 goto out;
2213 }
2214
2215 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
2216 if (err) {
2217 pr_cpumsf_err(RS_INIT_FAILURE_PERF);
2218 unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
2219 cpumf_measurement_alert);
2220 debug_unregister(sfdbg);
2221 goto out;
2222 }
2223
2224 cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
2225 s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
2226out:
2227 return err;
2228}
2229
2230arch_initcall(init_cpum_sampling_pmu);
2231core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
2232