1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194#include <linux/anon_inodes.h>
195#include <linux/sizes.h>
196#include <linux/uuid.h>
197
198#include "gem/i915_gem_context.h"
199#include "gem/i915_gem_pm.h"
200#include "gt/intel_lrc_reg.h"
201
202#include "i915_drv.h"
203#include "i915_oa_hsw.h"
204#include "i915_oa_bdw.h"
205#include "i915_oa_chv.h"
206#include "i915_oa_sklgt2.h"
207#include "i915_oa_sklgt3.h"
208#include "i915_oa_sklgt4.h"
209#include "i915_oa_bxt.h"
210#include "i915_oa_kblgt2.h"
211#include "i915_oa_kblgt3.h"
212#include "i915_oa_glk.h"
213#include "i915_oa_cflgt2.h"
214#include "i915_oa_cflgt3.h"
215#include "i915_oa_cnl.h"
216#include "i915_oa_icl.h"
217
218
219
220
221
222#define OA_BUFFER_SIZE SZ_16M
223
224#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267#define OA_TAIL_MARGIN_NSEC 100000ULL
268#define INVALID_TAIL_PTR 0xffffffff
269
270
271
272
273#define POLL_FREQUENCY 200
274#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
275
276
277static u32 i915_perf_stream_paranoid = true;
278
279
280
281
282
283
284
285
286
287#define OA_EXPONENT_MAX 31
288
289#define INVALID_CTX_ID 0xffffffff
290
291
292#define OAREPORT_REASON_MASK 0x3f
293#define OAREPORT_REASON_SHIFT 19
294#define OAREPORT_REASON_TIMER (1<<0)
295#define OAREPORT_REASON_CTX_SWITCH (1<<3)
296#define OAREPORT_REASON_CLK_RATIO (1<<5)
297
298
299
300
301
302
303
304
305
306static int oa_sample_rate_hard_limit;
307
308
309
310
311
312
313
314static u32 i915_oa_max_sample_rate = 100000;
315
316
317
318
319
320static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
321 [I915_OA_FORMAT_A13] = { 0, 64 },
322 [I915_OA_FORMAT_A29] = { 1, 128 },
323 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
324
325 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
326 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
327 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
328 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
329};
330
331static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
332 [I915_OA_FORMAT_A12] = { 0, 64 },
333 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
334 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
335 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
336};
337
338#define SAMPLE_OA_REPORT (1<<0)
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354struct perf_open_properties {
355 u32 sample_flags;
356
357 u64 single_context:1;
358 u64 ctx_handle;
359
360
361 int metrics_set;
362 int oa_format;
363 bool oa_periodic;
364 int oa_period_exponent;
365};
366
367static void free_oa_config(struct drm_i915_private *dev_priv,
368 struct i915_oa_config *oa_config)
369{
370 if (!PTR_ERR(oa_config->flex_regs))
371 kfree(oa_config->flex_regs);
372 if (!PTR_ERR(oa_config->b_counter_regs))
373 kfree(oa_config->b_counter_regs);
374 if (!PTR_ERR(oa_config->mux_regs))
375 kfree(oa_config->mux_regs);
376 kfree(oa_config);
377}
378
379static void put_oa_config(struct drm_i915_private *dev_priv,
380 struct i915_oa_config *oa_config)
381{
382 if (!atomic_dec_and_test(&oa_config->ref_count))
383 return;
384
385 free_oa_config(dev_priv, oa_config);
386}
387
388static int get_oa_config(struct drm_i915_private *dev_priv,
389 int metrics_set,
390 struct i915_oa_config **out_config)
391{
392 int ret;
393
394 if (metrics_set == 1) {
395 *out_config = &dev_priv->perf.oa.test_config;
396 atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
397 return 0;
398 }
399
400 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
401 if (ret)
402 return ret;
403
404 *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
405 if (!*out_config)
406 ret = -EINVAL;
407 else
408 atomic_inc(&(*out_config)->ref_count);
409
410 mutex_unlock(&dev_priv->perf.metrics_lock);
411
412 return ret;
413}
414
415static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
416{
417 return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
418}
419
420static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
421{
422 u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
423
424 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
425}
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
452{
453 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
454 unsigned long flags;
455 unsigned int aged_idx;
456 u32 head, hw_tail, aged_tail, aging_tail;
457 u64 now;
458
459
460
461
462
463 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
464
465
466
467
468
469 head = dev_priv->perf.oa.oa_buffer.head;
470
471 aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
472 aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
473 aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
474
475 hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
476
477
478
479
480 hw_tail &= ~(report_size - 1);
481
482 now = ktime_get_mono_fast_ns();
483
484
485
486
487
488
489
490
491
492
493
494 if (aging_tail != INVALID_TAIL_PTR &&
495 ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
496 OA_TAIL_MARGIN_NSEC)) {
497
498 aged_idx ^= 1;
499 dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
500
501 aged_tail = aging_tail;
502
503
504 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
505 aging_tail = INVALID_TAIL_PTR;
506 }
507
508
509
510
511
512
513
514
515
516 if (aging_tail == INVALID_TAIL_PTR &&
517 (aged_tail == INVALID_TAIL_PTR ||
518 OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
519 struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
520 u32 gtt_offset = i915_ggtt_offset(vma);
521
522
523
524
525
526 if (hw_tail >= gtt_offset &&
527 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
528 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
529 aging_tail = hw_tail;
530 dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
531 } else {
532 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
533 hw_tail);
534 }
535 }
536
537 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
538
539 return aged_tail == INVALID_TAIL_PTR ?
540 false : OA_TAKEN(aged_tail, head) >= report_size;
541}
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558static int append_oa_status(struct i915_perf_stream *stream,
559 char __user *buf,
560 size_t count,
561 size_t *offset,
562 enum drm_i915_perf_record_type type)
563{
564 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
565
566 if ((count - *offset) < header.size)
567 return -ENOSPC;
568
569 if (copy_to_user(buf + *offset, &header, sizeof(header)))
570 return -EFAULT;
571
572 (*offset) += header.size;
573
574 return 0;
575}
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594static int append_oa_sample(struct i915_perf_stream *stream,
595 char __user *buf,
596 size_t count,
597 size_t *offset,
598 const u8 *report)
599{
600 struct drm_i915_private *dev_priv = stream->dev_priv;
601 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
602 struct drm_i915_perf_record_header header;
603 u32 sample_flags = stream->sample_flags;
604
605 header.type = DRM_I915_PERF_RECORD_SAMPLE;
606 header.pad = 0;
607 header.size = stream->sample_size;
608
609 if ((count - *offset) < header.size)
610 return -ENOSPC;
611
612 buf += *offset;
613 if (copy_to_user(buf, &header, sizeof(header)))
614 return -EFAULT;
615 buf += sizeof(header);
616
617 if (sample_flags & SAMPLE_OA_REPORT) {
618 if (copy_to_user(buf, report, report_size))
619 return -EFAULT;
620 }
621
622 (*offset) += header.size;
623
624 return 0;
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647static int gen8_append_oa_reports(struct i915_perf_stream *stream,
648 char __user *buf,
649 size_t count,
650 size_t *offset)
651{
652 struct drm_i915_private *dev_priv = stream->dev_priv;
653 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
654 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
655 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
656 u32 mask = (OA_BUFFER_SIZE - 1);
657 size_t start_offset = *offset;
658 unsigned long flags;
659 unsigned int aged_tail_idx;
660 u32 head, tail;
661 u32 taken;
662 int ret = 0;
663
664 if (WARN_ON(!stream->enabled))
665 return -EIO;
666
667 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
668
669 head = dev_priv->perf.oa.oa_buffer.head;
670 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
671 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
672
673 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
674
675
676
677
678
679 if (tail == INVALID_TAIL_PTR)
680 return -EAGAIN;
681
682
683
684
685
686 head -= gtt_offset;
687 tail -= gtt_offset;
688
689
690
691
692
693
694
695
696 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
697 tail > OA_BUFFER_SIZE || tail % report_size,
698 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
699 head, tail))
700 return -EIO;
701
702
703 for (;
704 (taken = OA_TAKEN(tail, head));
705 head = (head + report_size) & mask) {
706 u8 *report = oa_buf_base + head;
707 u32 *report32 = (void *)report;
708 u32 ctx_id;
709 u32 reason;
710
711
712
713
714
715
716
717
718
719
720 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
721 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
722 break;
723 }
724
725
726
727
728
729
730
731
732
733
734 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
735 OAREPORT_REASON_MASK);
736 if (reason == 0) {
737 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
738 DRM_NOTE("Skipping spurious, invalid OA report\n");
739 continue;
740 }
741
742 ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
743
744
745
746
747
748
749
750
751
752 if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
753 ctx_id = report32[2] = INVALID_CTX_ID;
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786 if (!dev_priv->perf.oa.exclusive_stream->ctx ||
787 dev_priv->perf.oa.specific_ctx_id == ctx_id ||
788 (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
789 dev_priv->perf.oa.specific_ctx_id) ||
790 reason & OAREPORT_REASON_CTX_SWITCH) {
791
792
793
794
795
796 if (dev_priv->perf.oa.exclusive_stream->ctx &&
797 dev_priv->perf.oa.specific_ctx_id != ctx_id) {
798 report32[2] = INVALID_CTX_ID;
799 }
800
801 ret = append_oa_sample(stream, buf, count, offset,
802 report);
803 if (ret)
804 break;
805
806 dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
807 }
808
809
810
811
812
813
814
815
816 report32[0] = 0;
817 }
818
819 if (start_offset != *offset) {
820 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
821
822
823
824
825
826 head += gtt_offset;
827
828 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
829 dev_priv->perf.oa.oa_buffer.head = head;
830
831 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
832 }
833
834 return ret;
835}
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857static int gen8_oa_read(struct i915_perf_stream *stream,
858 char __user *buf,
859 size_t count,
860 size_t *offset)
861{
862 struct drm_i915_private *dev_priv = stream->dev_priv;
863 u32 oastatus;
864 int ret;
865
866 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
867 return -EIO;
868
869 oastatus = I915_READ(GEN8_OASTATUS);
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
886 ret = append_oa_status(stream, buf, count, offset,
887 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
888 if (ret)
889 return ret;
890
891 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
892 dev_priv->perf.oa.period_exponent);
893
894 dev_priv->perf.oa.ops.oa_disable(stream);
895 dev_priv->perf.oa.ops.oa_enable(stream);
896
897
898
899
900
901 oastatus = I915_READ(GEN8_OASTATUS);
902 }
903
904 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
905 ret = append_oa_status(stream, buf, count, offset,
906 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
907 if (ret)
908 return ret;
909 I915_WRITE(GEN8_OASTATUS,
910 oastatus & ~GEN8_OASTATUS_REPORT_LOST);
911 }
912
913 return gen8_append_oa_reports(stream, buf, count, offset);
914}
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936static int gen7_append_oa_reports(struct i915_perf_stream *stream,
937 char __user *buf,
938 size_t count,
939 size_t *offset)
940{
941 struct drm_i915_private *dev_priv = stream->dev_priv;
942 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
943 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
944 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
945 u32 mask = (OA_BUFFER_SIZE - 1);
946 size_t start_offset = *offset;
947 unsigned long flags;
948 unsigned int aged_tail_idx;
949 u32 head, tail;
950 u32 taken;
951 int ret = 0;
952
953 if (WARN_ON(!stream->enabled))
954 return -EIO;
955
956 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
957
958 head = dev_priv->perf.oa.oa_buffer.head;
959 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
960 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
961
962 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
963
964
965
966
967 if (tail == INVALID_TAIL_PTR)
968 return -EAGAIN;
969
970
971
972
973 head -= gtt_offset;
974 tail -= gtt_offset;
975
976
977
978
979
980
981
982 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
983 tail > OA_BUFFER_SIZE || tail % report_size,
984 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
985 head, tail))
986 return -EIO;
987
988
989 for (;
990 (taken = OA_TAKEN(tail, head));
991 head = (head + report_size) & mask) {
992 u8 *report = oa_buf_base + head;
993 u32 *report32 = (void *)report;
994
995
996
997
998
999
1000
1001
1002
1003 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
1004 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
1005 break;
1006 }
1007
1008
1009
1010
1011
1012
1013
1014 if (report32[0] == 0) {
1015 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
1016 DRM_NOTE("Skipping spurious, invalid OA report\n");
1017 continue;
1018 }
1019
1020 ret = append_oa_sample(stream, buf, count, offset, report);
1021 if (ret)
1022 break;
1023
1024
1025
1026
1027
1028
1029
1030 report32[0] = 0;
1031 }
1032
1033 if (start_offset != *offset) {
1034 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1035
1036
1037
1038
1039 head += gtt_offset;
1040
1041 I915_WRITE(GEN7_OASTATUS2,
1042 ((head & GEN7_OASTATUS2_HEAD_MASK) |
1043 GEN7_OASTATUS2_MEM_SELECT_GGTT));
1044 dev_priv->perf.oa.oa_buffer.head = head;
1045
1046 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1047 }
1048
1049 return ret;
1050}
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068static int gen7_oa_read(struct i915_perf_stream *stream,
1069 char __user *buf,
1070 size_t count,
1071 size_t *offset)
1072{
1073 struct drm_i915_private *dev_priv = stream->dev_priv;
1074 u32 oastatus1;
1075 int ret;
1076
1077 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
1078 return -EIO;
1079
1080 oastatus1 = I915_READ(GEN7_OASTATUS1);
1081
1082
1083
1084
1085
1086
1087 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1110 ret = append_oa_status(stream, buf, count, offset,
1111 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1112 if (ret)
1113 return ret;
1114
1115 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
1116 dev_priv->perf.oa.period_exponent);
1117
1118 dev_priv->perf.oa.ops.oa_disable(stream);
1119 dev_priv->perf.oa.ops.oa_enable(stream);
1120
1121 oastatus1 = I915_READ(GEN7_OASTATUS1);
1122 }
1123
1124 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1125 ret = append_oa_status(stream, buf, count, offset,
1126 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1127 if (ret)
1128 return ret;
1129 dev_priv->perf.oa.gen7_latched_oastatus1 |=
1130 GEN7_OASTATUS1_REPORT_LOST;
1131 }
1132
1133 return gen7_append_oa_reports(stream, buf, count, offset);
1134}
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1151{
1152 struct drm_i915_private *dev_priv = stream->dev_priv;
1153
1154
1155 if (!dev_priv->perf.oa.periodic)
1156 return -EIO;
1157
1158 return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
1159 oa_buffer_check_unlocked(dev_priv));
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1173 struct file *file,
1174 poll_table *wait)
1175{
1176 struct drm_i915_private *dev_priv = stream->dev_priv;
1177
1178 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
1179}
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193static int i915_oa_read(struct i915_perf_stream *stream,
1194 char __user *buf,
1195 size_t count,
1196 size_t *offset)
1197{
1198 struct drm_i915_private *dev_priv = stream->dev_priv;
1199
1200 return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
1201}
1202
1203static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
1204 struct i915_gem_context *ctx)
1205{
1206 struct i915_gem_engines_iter it;
1207 struct intel_context *ce;
1208 int err;
1209
1210 err = i915_mutex_lock_interruptible(&i915->drm);
1211 if (err)
1212 return ERR_PTR(err);
1213
1214 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
1215 if (ce->engine->class != RENDER_CLASS)
1216 continue;
1217
1218
1219
1220
1221
1222 err = intel_context_pin(ce);
1223 if (err == 0) {
1224 i915->perf.oa.pinned_ctx = ce;
1225 break;
1226 }
1227 }
1228 i915_gem_context_unlock_engines(ctx);
1229
1230 mutex_unlock(&i915->drm.struct_mutex);
1231 if (err)
1232 return ERR_PTR(err);
1233
1234 return i915->perf.oa.pinned_ctx;
1235}
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1248{
1249 struct drm_i915_private *i915 = stream->dev_priv;
1250 struct intel_context *ce;
1251
1252 ce = oa_pin_context(i915, stream->ctx);
1253 if (IS_ERR(ce))
1254 return PTR_ERR(ce);
1255
1256 switch (INTEL_GEN(i915)) {
1257 case 7: {
1258
1259
1260
1261
1262 i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
1263 i915->perf.oa.specific_ctx_id_mask = 0;
1264 break;
1265 }
1266
1267 case 8:
1268 case 9:
1269 case 10:
1270 if (USES_GUC_SUBMISSION(i915)) {
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281 i915->perf.oa.specific_ctx_id =
1282 lower_32_bits(ce->lrc_desc) >> 12;
1283
1284
1285
1286
1287
1288 i915->perf.oa.specific_ctx_id_mask =
1289 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
1290 } else {
1291 i915->perf.oa.specific_ctx_id_mask =
1292 (1U << GEN8_CTX_ID_WIDTH) - 1;
1293 i915->perf.oa.specific_ctx_id =
1294 upper_32_bits(ce->lrc_desc);
1295 i915->perf.oa.specific_ctx_id &=
1296 i915->perf.oa.specific_ctx_id_mask;
1297 }
1298 break;
1299
1300 case 11: {
1301 i915->perf.oa.specific_ctx_id_mask =
1302 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
1303 ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
1304 ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
1305 i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc);
1306 i915->perf.oa.specific_ctx_id &=
1307 i915->perf.oa.specific_ctx_id_mask;
1308 break;
1309 }
1310
1311 default:
1312 MISSING_CASE(INTEL_GEN(i915));
1313 }
1314
1315 DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
1316 i915->perf.oa.specific_ctx_id,
1317 i915->perf.oa.specific_ctx_id_mask);
1318
1319 return 0;
1320}
1321
1322
1323
1324
1325
1326
1327
1328
1329static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1330{
1331 struct drm_i915_private *dev_priv = stream->dev_priv;
1332 struct intel_context *ce;
1333
1334 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1335 dev_priv->perf.oa.specific_ctx_id_mask = 0;
1336
1337 ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
1338 if (ce) {
1339 mutex_lock(&dev_priv->drm.struct_mutex);
1340 intel_context_unpin(ce);
1341 mutex_unlock(&dev_priv->drm.struct_mutex);
1342 }
1343}
1344
1345static void
1346free_oa_buffer(struct drm_i915_private *i915)
1347{
1348 mutex_lock(&i915->drm.struct_mutex);
1349
1350 i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma,
1351 I915_VMA_RELEASE_MAP);
1352
1353 mutex_unlock(&i915->drm.struct_mutex);
1354
1355 i915->perf.oa.oa_buffer.vaddr = NULL;
1356}
1357
1358static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1359{
1360 struct drm_i915_private *dev_priv = stream->dev_priv;
1361
1362 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
1363
1364
1365
1366
1367
1368 mutex_lock(&dev_priv->drm.struct_mutex);
1369 dev_priv->perf.oa.exclusive_stream = NULL;
1370 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1371 mutex_unlock(&dev_priv->drm.struct_mutex);
1372
1373 free_oa_buffer(dev_priv);
1374
1375 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1376 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
1377
1378 if (stream->ctx)
1379 oa_put_render_ctx_id(stream);
1380
1381 put_oa_config(dev_priv, stream->oa_config);
1382
1383 if (dev_priv->perf.oa.spurious_report_rs.missed) {
1384 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
1385 dev_priv->perf.oa.spurious_report_rs.missed);
1386 }
1387}
1388
1389static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
1390{
1391 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1392 unsigned long flags;
1393
1394 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1395
1396
1397
1398
1399 I915_WRITE(GEN7_OASTATUS2,
1400 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
1401 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1402
1403 I915_WRITE(GEN7_OABUFFER, gtt_offset);
1404
1405 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M);
1406
1407
1408 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1409 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1410
1411 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1412
1413
1414
1415
1416
1417 dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1431
1432
1433
1434
1435 dev_priv->perf.oa.pollin = false;
1436}
1437
1438static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
1439{
1440 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1441 unsigned long flags;
1442
1443 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1444
1445 I915_WRITE(GEN8_OASTATUS, 0);
1446 I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
1447 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1448
1449 I915_WRITE(GEN8_OABUFFER_UDW, 0);
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459 I915_WRITE(GEN8_OABUFFER, gtt_offset |
1460 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1461 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1462
1463
1464 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1465 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1466
1467
1468
1469
1470
1471
1472 dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
1473
1474 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1489
1490
1491
1492
1493
1494 dev_priv->perf.oa.pollin = false;
1495}
1496
1497static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
1498{
1499 struct drm_i915_gem_object *bo;
1500 struct i915_vma *vma;
1501 int ret;
1502
1503 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
1504 return -ENODEV;
1505
1506 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1507 if (ret)
1508 return ret;
1509
1510 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1511 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1512
1513 bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
1514 if (IS_ERR(bo)) {
1515 DRM_ERROR("Failed to allocate OA buffer\n");
1516 ret = PTR_ERR(bo);
1517 goto unlock;
1518 }
1519
1520 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
1521
1522
1523 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1524 if (IS_ERR(vma)) {
1525 ret = PTR_ERR(vma);
1526 goto err_unref;
1527 }
1528 dev_priv->perf.oa.oa_buffer.vma = vma;
1529
1530 dev_priv->perf.oa.oa_buffer.vaddr =
1531 i915_gem_object_pin_map(bo, I915_MAP_WB);
1532 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
1533 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
1534 goto err_unpin;
1535 }
1536
1537 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
1538 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
1539 dev_priv->perf.oa.oa_buffer.vaddr);
1540
1541 goto unlock;
1542
1543err_unpin:
1544 __i915_vma_unpin(vma);
1545
1546err_unref:
1547 i915_gem_object_put(bo);
1548
1549 dev_priv->perf.oa.oa_buffer.vaddr = NULL;
1550 dev_priv->perf.oa.oa_buffer.vma = NULL;
1551
1552unlock:
1553 mutex_unlock(&dev_priv->drm.struct_mutex);
1554 return ret;
1555}
1556
1557static void config_oa_regs(struct drm_i915_private *dev_priv,
1558 const struct i915_oa_reg *regs,
1559 u32 n_regs)
1560{
1561 u32 i;
1562
1563 for (i = 0; i < n_regs; i++) {
1564 const struct i915_oa_reg *reg = regs + i;
1565
1566 I915_WRITE(reg->addr, reg->value);
1567 }
1568}
1569
1570static void delay_after_mux(void)
1571{
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594 usleep_range(15000, 20000);
1595}
1596
1597static int hsw_enable_metric_set(struct i915_perf_stream *stream)
1598{
1599 struct drm_i915_private *dev_priv = stream->dev_priv;
1600 const struct i915_oa_config *oa_config = stream->oa_config;
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1613 ~GEN7_DOP_CLOCK_GATE_ENABLE));
1614 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
1615 GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1616
1617 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1618 delay_after_mux();
1619
1620 config_oa_regs(dev_priv, oa_config->b_counter_regs,
1621 oa_config->b_counter_regs_len);
1622
1623 return 0;
1624}
1625
1626static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
1627{
1628 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
1629 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1630 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
1631 GEN7_DOP_CLOCK_GATE_ENABLE));
1632
1633 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1634 ~GT_NOA_ENABLE));
1635}
1636
1637
1638
1639
1640
1641
1642
1643
1644static void
1645gen8_update_reg_state_unlocked(struct intel_context *ce,
1646 u32 *reg_state,
1647 const struct i915_oa_config *oa_config)
1648{
1649 struct drm_i915_private *i915 = ce->gem_context->i915;
1650 u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset;
1651 u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
1652
1653 i915_reg_t flex_regs[] = {
1654 EU_PERF_CNTL0,
1655 EU_PERF_CNTL1,
1656 EU_PERF_CNTL2,
1657 EU_PERF_CNTL3,
1658 EU_PERF_CNTL4,
1659 EU_PERF_CNTL5,
1660 EU_PERF_CNTL6,
1661 };
1662 int i;
1663
1664 CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
1665 (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
1666 (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
1667 GEN8_OA_COUNTER_RESUME);
1668
1669 for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
1670 u32 state_offset = ctx_flexeu0 + i * 2;
1671 u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
1672
1673
1674
1675
1676
1677
1678 u32 value = 0;
1679
1680 if (oa_config) {
1681 u32 j;
1682
1683 for (j = 0; j < oa_config->flex_regs_len; j++) {
1684 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1685 value = oa_config->flex_regs[j].value;
1686 break;
1687 }
1688 }
1689 }
1690
1691 CTX_REG(reg_state, state_offset, flex_regs[i], value);
1692 }
1693
1694 CTX_REG(reg_state,
1695 CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
1696 intel_sseu_make_rpcs(i915, &ce->sseu));
1697}
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
1724 const struct i915_oa_config *oa_config)
1725{
1726 unsigned int map_type = i915_coherent_map_type(dev_priv);
1727 struct i915_gem_context *ctx;
1728 struct i915_request *rq;
1729 int ret;
1730
1731 lockdep_assert_held(&dev_priv->drm.struct_mutex);
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746 ret = i915_gem_wait_for_idle(dev_priv,
1747 I915_WAIT_LOCKED,
1748 MAX_SCHEDULE_TIMEOUT);
1749 if (ret)
1750 return ret;
1751
1752
1753 list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
1754 struct i915_gem_engines_iter it;
1755 struct intel_context *ce;
1756
1757 for_each_gem_engine(ce,
1758 i915_gem_context_lock_engines(ctx),
1759 it) {
1760 u32 *regs;
1761
1762 if (ce->engine->class != RENDER_CLASS)
1763 continue;
1764
1765
1766 if (!ce->state)
1767 continue;
1768
1769 regs = i915_gem_object_pin_map(ce->state->obj,
1770 map_type);
1771 if (IS_ERR(regs)) {
1772 i915_gem_context_unlock_engines(ctx);
1773 return PTR_ERR(regs);
1774 }
1775
1776 ce->state->obj->mm.dirty = true;
1777 regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
1778
1779 gen8_update_reg_state_unlocked(ce, regs, oa_config);
1780
1781 i915_gem_object_unpin_map(ce->state->obj);
1782 }
1783 i915_gem_context_unlock_engines(ctx);
1784 }
1785
1786
1787
1788
1789
1790 rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
1791 if (IS_ERR(rq))
1792 return PTR_ERR(rq);
1793
1794 i915_request_add(rq);
1795
1796 return 0;
1797}
1798
1799static int gen8_enable_metric_set(struct i915_perf_stream *stream)
1800{
1801 struct drm_i915_private *dev_priv = stream->dev_priv;
1802 const struct i915_oa_config *oa_config = stream->oa_config;
1803 int ret;
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828 if (IS_GEN_RANGE(dev_priv, 9, 11)) {
1829 I915_WRITE(GEN8_OA_DEBUG,
1830 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
1831 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
1832 }
1833
1834
1835
1836
1837
1838
1839 ret = gen8_configure_all_contexts(dev_priv, oa_config);
1840 if (ret)
1841 return ret;
1842
1843 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1844 delay_after_mux();
1845
1846 config_oa_regs(dev_priv, oa_config->b_counter_regs,
1847 oa_config->b_counter_regs_len);
1848
1849 return 0;
1850}
1851
1852static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
1853{
1854
1855 gen8_configure_all_contexts(dev_priv, NULL);
1856
1857 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1858 ~GT_NOA_ENABLE));
1859}
1860
1861static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
1862{
1863
1864 gen8_configure_all_contexts(dev_priv, NULL);
1865
1866
1867 I915_WRITE(RPM_CONFIG1,
1868 I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE);
1869}
1870
1871static void gen7_oa_enable(struct i915_perf_stream *stream)
1872{
1873 struct drm_i915_private *dev_priv = stream->dev_priv;
1874 struct i915_gem_context *ctx = stream->ctx;
1875 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
1876 bool periodic = dev_priv->perf.oa.periodic;
1877 u32 period_exponent = dev_priv->perf.oa.period_exponent;
1878 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889 gen7_init_oa_buffer(dev_priv);
1890
1891 I915_WRITE(GEN7_OACONTROL,
1892 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
1893 (period_exponent <<
1894 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
1895 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
1896 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
1897 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
1898 GEN7_OACONTROL_ENABLE);
1899}
1900
1901static void gen8_oa_enable(struct i915_perf_stream *stream)
1902{
1903 struct drm_i915_private *dev_priv = stream->dev_priv;
1904 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915 gen8_init_oa_buffer(dev_priv);
1916
1917
1918
1919
1920
1921
1922 I915_WRITE(GEN8_OACONTROL, (report_format <<
1923 GEN8_OA_REPORT_FORMAT_SHIFT) |
1924 GEN8_OA_COUNTER_ENABLE);
1925}
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936static void i915_oa_stream_enable(struct i915_perf_stream *stream)
1937{
1938 struct drm_i915_private *dev_priv = stream->dev_priv;
1939
1940 dev_priv->perf.oa.ops.oa_enable(stream);
1941
1942 if (dev_priv->perf.oa.periodic)
1943 hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
1944 ns_to_ktime(POLL_PERIOD),
1945 HRTIMER_MODE_REL_PINNED);
1946}
1947
1948static void gen7_oa_disable(struct i915_perf_stream *stream)
1949{
1950 struct intel_uncore *uncore = &stream->dev_priv->uncore;
1951
1952 intel_uncore_write(uncore, GEN7_OACONTROL, 0);
1953 if (intel_wait_for_register(uncore,
1954 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
1955 50))
1956 DRM_ERROR("wait for OA to be disabled timed out\n");
1957}
1958
1959static void gen8_oa_disable(struct i915_perf_stream *stream)
1960{
1961 struct intel_uncore *uncore = &stream->dev_priv->uncore;
1962
1963 intel_uncore_write(uncore, GEN8_OACONTROL, 0);
1964 if (intel_wait_for_register(uncore,
1965 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
1966 50))
1967 DRM_ERROR("wait for OA to be disabled timed out\n");
1968}
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978static void i915_oa_stream_disable(struct i915_perf_stream *stream)
1979{
1980 struct drm_i915_private *dev_priv = stream->dev_priv;
1981
1982 dev_priv->perf.oa.ops.oa_disable(stream);
1983
1984 if (dev_priv->perf.oa.periodic)
1985 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
1986}
1987
1988static const struct i915_perf_stream_ops i915_oa_stream_ops = {
1989 .destroy = i915_oa_stream_destroy,
1990 .enable = i915_oa_stream_enable,
1991 .disable = i915_oa_stream_disable,
1992 .wait_unlocked = i915_oa_wait_unlocked,
1993 .poll_wait = i915_oa_poll_wait,
1994 .read = i915_oa_read,
1995};
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015static int i915_oa_stream_init(struct i915_perf_stream *stream,
2016 struct drm_i915_perf_open_param *param,
2017 struct perf_open_properties *props)
2018{
2019 struct drm_i915_private *dev_priv = stream->dev_priv;
2020 int format_size;
2021 int ret;
2022
2023
2024
2025
2026
2027 if (!dev_priv->perf.metrics_kobj) {
2028 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
2029 return -EINVAL;
2030 }
2031
2032 if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
2033 DRM_DEBUG("Only OA report sampling supported\n");
2034 return -EINVAL;
2035 }
2036
2037 if (!dev_priv->perf.oa.ops.enable_metric_set) {
2038 DRM_DEBUG("OA unit not supported\n");
2039 return -ENODEV;
2040 }
2041
2042
2043
2044
2045
2046 if (dev_priv->perf.oa.exclusive_stream) {
2047 DRM_DEBUG("OA unit already in use\n");
2048 return -EBUSY;
2049 }
2050
2051 if (!props->oa_format) {
2052 DRM_DEBUG("OA report format not specified\n");
2053 return -EINVAL;
2054 }
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
2068 5 * HZ, 10);
2069
2070
2071
2072
2073 ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
2074 RATELIMIT_MSG_ON_RELEASE);
2075
2076 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2077
2078 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
2079
2080 stream->sample_flags |= SAMPLE_OA_REPORT;
2081 stream->sample_size += format_size;
2082
2083 dev_priv->perf.oa.oa_buffer.format_size = format_size;
2084 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
2085 return -EINVAL;
2086
2087 dev_priv->perf.oa.oa_buffer.format =
2088 dev_priv->perf.oa.oa_formats[props->oa_format].format;
2089
2090 dev_priv->perf.oa.periodic = props->oa_periodic;
2091 if (dev_priv->perf.oa.periodic)
2092 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
2093
2094 if (stream->ctx) {
2095 ret = oa_get_render_ctx_id(stream);
2096 if (ret) {
2097 DRM_DEBUG("Invalid context id to filter with\n");
2098 return ret;
2099 }
2100 }
2101
2102 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
2103 if (ret) {
2104 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
2105 goto err_config;
2106 }
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120 stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
2121 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
2122
2123 ret = alloc_oa_buffer(dev_priv);
2124 if (ret)
2125 goto err_oa_buf_alloc;
2126
2127 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
2128 if (ret)
2129 goto err_lock;
2130
2131 stream->ops = &i915_oa_stream_ops;
2132 dev_priv->perf.oa.exclusive_stream = stream;
2133
2134 ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
2135 if (ret) {
2136 DRM_DEBUG("Unable to enable metric set\n");
2137 goto err_enable;
2138 }
2139
2140 mutex_unlock(&dev_priv->drm.struct_mutex);
2141
2142 return 0;
2143
2144err_enable:
2145 dev_priv->perf.oa.exclusive_stream = NULL;
2146 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
2147 mutex_unlock(&dev_priv->drm.struct_mutex);
2148
2149err_lock:
2150 free_oa_buffer(dev_priv);
2151
2152err_oa_buf_alloc:
2153 put_oa_config(dev_priv, stream->oa_config);
2154
2155 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
2156 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
2157
2158err_config:
2159 if (stream->ctx)
2160 oa_put_render_ctx_id(stream);
2161
2162 return ret;
2163}
2164
2165void i915_oa_init_reg_state(struct intel_engine_cs *engine,
2166 struct intel_context *ce,
2167 u32 *regs)
2168{
2169 struct i915_perf_stream *stream;
2170
2171 if (engine->class != RENDER_CLASS)
2172 return;
2173
2174 stream = engine->i915->perf.oa.exclusive_stream;
2175 if (stream)
2176 gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
2177}
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
2205 struct file *file,
2206 char __user *buf,
2207 size_t count,
2208 loff_t *ppos)
2209{
2210
2211
2212
2213
2214
2215
2216 size_t offset = 0;
2217 int ret = stream->ops->read(stream, buf, count, &offset);
2218
2219 return offset ?: (ret ?: -EAGAIN);
2220}
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240static ssize_t i915_perf_read(struct file *file,
2241 char __user *buf,
2242 size_t count,
2243 loff_t *ppos)
2244{
2245 struct i915_perf_stream *stream = file->private_data;
2246 struct drm_i915_private *dev_priv = stream->dev_priv;
2247 ssize_t ret;
2248
2249
2250
2251
2252
2253 if (!stream->enabled)
2254 return -EIO;
2255
2256 if (!(file->f_flags & O_NONBLOCK)) {
2257
2258
2259
2260
2261
2262
2263
2264 do {
2265 ret = stream->ops->wait_unlocked(stream);
2266 if (ret)
2267 return ret;
2268
2269 mutex_lock(&dev_priv->perf.lock);
2270 ret = i915_perf_read_locked(stream, file,
2271 buf, count, ppos);
2272 mutex_unlock(&dev_priv->perf.lock);
2273 } while (ret == -EAGAIN);
2274 } else {
2275 mutex_lock(&dev_priv->perf.lock);
2276 ret = i915_perf_read_locked(stream, file, buf, count, ppos);
2277 mutex_unlock(&dev_priv->perf.lock);
2278 }
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288 if (ret >= 0 || ret == -EAGAIN) {
2289
2290
2291
2292 dev_priv->perf.oa.pollin = false;
2293 }
2294
2295 return ret;
2296}
2297
2298static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
2299{
2300 struct drm_i915_private *dev_priv =
2301 container_of(hrtimer, typeof(*dev_priv),
2302 perf.oa.poll_check_timer);
2303
2304 if (oa_buffer_check_unlocked(dev_priv)) {
2305 dev_priv->perf.oa.pollin = true;
2306 wake_up(&dev_priv->perf.oa.poll_wq);
2307 }
2308
2309 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
2310
2311 return HRTIMER_RESTART;
2312}
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
2331 struct i915_perf_stream *stream,
2332 struct file *file,
2333 poll_table *wait)
2334{
2335 __poll_t events = 0;
2336
2337 stream->ops->poll_wait(stream, file, wait);
2338
2339
2340
2341
2342
2343
2344
2345 if (dev_priv->perf.oa.pollin)
2346 events |= EPOLLIN;
2347
2348 return events;
2349}
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
2365{
2366 struct i915_perf_stream *stream = file->private_data;
2367 struct drm_i915_private *dev_priv = stream->dev_priv;
2368 __poll_t ret;
2369
2370 mutex_lock(&dev_priv->perf.lock);
2371 ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
2372 mutex_unlock(&dev_priv->perf.lock);
2373
2374 return ret;
2375}
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387static void i915_perf_enable_locked(struct i915_perf_stream *stream)
2388{
2389 if (stream->enabled)
2390 return;
2391
2392
2393 stream->enabled = true;
2394
2395 if (stream->ops->enable)
2396 stream->ops->enable(stream);
2397}
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413static void i915_perf_disable_locked(struct i915_perf_stream *stream)
2414{
2415 if (!stream->enabled)
2416 return;
2417
2418
2419 stream->enabled = false;
2420
2421 if (stream->ops->disable)
2422 stream->ops->disable(stream);
2423}
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
2438 unsigned int cmd,
2439 unsigned long arg)
2440{
2441 switch (cmd) {
2442 case I915_PERF_IOCTL_ENABLE:
2443 i915_perf_enable_locked(stream);
2444 return 0;
2445 case I915_PERF_IOCTL_DISABLE:
2446 i915_perf_disable_locked(stream);
2447 return 0;
2448 }
2449
2450 return -EINVAL;
2451}
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464static long i915_perf_ioctl(struct file *file,
2465 unsigned int cmd,
2466 unsigned long arg)
2467{
2468 struct i915_perf_stream *stream = file->private_data;
2469 struct drm_i915_private *dev_priv = stream->dev_priv;
2470 long ret;
2471
2472 mutex_lock(&dev_priv->perf.lock);
2473 ret = i915_perf_ioctl_locked(stream, cmd, arg);
2474 mutex_unlock(&dev_priv->perf.lock);
2475
2476 return ret;
2477}
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
2490{
2491 if (stream->enabled)
2492 i915_perf_disable_locked(stream);
2493
2494 if (stream->ops->destroy)
2495 stream->ops->destroy(stream);
2496
2497 list_del(&stream->link);
2498
2499 if (stream->ctx)
2500 i915_gem_context_put(stream->ctx);
2501
2502 kfree(stream);
2503}
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516static int i915_perf_release(struct inode *inode, struct file *file)
2517{
2518 struct i915_perf_stream *stream = file->private_data;
2519 struct drm_i915_private *dev_priv = stream->dev_priv;
2520
2521 mutex_lock(&dev_priv->perf.lock);
2522 i915_perf_destroy_locked(stream);
2523 mutex_unlock(&dev_priv->perf.lock);
2524
2525
2526 drm_dev_put(&dev_priv->drm);
2527
2528 return 0;
2529}
2530
2531
2532static const struct file_operations fops = {
2533 .owner = THIS_MODULE,
2534 .llseek = no_llseek,
2535 .release = i915_perf_release,
2536 .poll = i915_perf_poll,
2537 .read = i915_perf_read,
2538 .unlocked_ioctl = i915_perf_ioctl,
2539
2540
2541
2542 .compat_ioctl = i915_perf_ioctl,
2543};
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570static int
2571i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
2572 struct drm_i915_perf_open_param *param,
2573 struct perf_open_properties *props,
2574 struct drm_file *file)
2575{
2576 struct i915_gem_context *specific_ctx = NULL;
2577 struct i915_perf_stream *stream = NULL;
2578 unsigned long f_flags = 0;
2579 bool privileged_op = true;
2580 int stream_fd;
2581 int ret;
2582
2583 if (props->single_context) {
2584 u32 ctx_handle = props->ctx_handle;
2585 struct drm_i915_file_private *file_priv = file->driver_priv;
2586
2587 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
2588 if (!specific_ctx) {
2589 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
2590 ctx_handle);
2591 ret = -ENOENT;
2592 goto err;
2593 }
2594 }
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610 if (IS_HASWELL(dev_priv) && specific_ctx)
2611 privileged_op = false;
2612
2613
2614
2615
2616
2617
2618 if (privileged_op &&
2619 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
2620 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
2621 ret = -EACCES;
2622 goto err_ctx;
2623 }
2624
2625 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
2626 if (!stream) {
2627 ret = -ENOMEM;
2628 goto err_ctx;
2629 }
2630
2631 stream->dev_priv = dev_priv;
2632 stream->ctx = specific_ctx;
2633
2634 ret = i915_oa_stream_init(stream, param, props);
2635 if (ret)
2636 goto err_alloc;
2637
2638
2639
2640
2641
2642 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
2643 ret = -ENODEV;
2644 goto err_flags;
2645 }
2646
2647 list_add(&stream->link, &dev_priv->perf.streams);
2648
2649 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
2650 f_flags |= O_CLOEXEC;
2651 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
2652 f_flags |= O_NONBLOCK;
2653
2654 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
2655 if (stream_fd < 0) {
2656 ret = stream_fd;
2657 goto err_open;
2658 }
2659
2660 if (!(param->flags & I915_PERF_FLAG_DISABLED))
2661 i915_perf_enable_locked(stream);
2662
2663
2664
2665
2666 drm_dev_get(&dev_priv->drm);
2667
2668 return stream_fd;
2669
2670err_open:
2671 list_del(&stream->link);
2672err_flags:
2673 if (stream->ops->destroy)
2674 stream->ops->destroy(stream);
2675err_alloc:
2676 kfree(stream);
2677err_ctx:
2678 if (specific_ctx)
2679 i915_gem_context_put(specific_ctx);
2680err:
2681 return ret;
2682}
2683
2684static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
2685{
2686 return div64_u64(1000000000ULL * (2ULL << exponent),
2687 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
2688}
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705static int read_properties_unlocked(struct drm_i915_private *dev_priv,
2706 u64 __user *uprops,
2707 u32 n_props,
2708 struct perf_open_properties *props)
2709{
2710 u64 __user *uprop = uprops;
2711 u32 i;
2712
2713 memset(props, 0, sizeof(struct perf_open_properties));
2714
2715 if (!n_props) {
2716 DRM_DEBUG("No i915 perf properties given\n");
2717 return -EINVAL;
2718 }
2719
2720
2721
2722
2723
2724
2725
2726 if (n_props >= DRM_I915_PERF_PROP_MAX) {
2727 DRM_DEBUG("More i915 perf properties specified than exist\n");
2728 return -EINVAL;
2729 }
2730
2731 for (i = 0; i < n_props; i++) {
2732 u64 oa_period, oa_freq_hz;
2733 u64 id, value;
2734 int ret;
2735
2736 ret = get_user(id, uprop);
2737 if (ret)
2738 return ret;
2739
2740 ret = get_user(value, uprop + 1);
2741 if (ret)
2742 return ret;
2743
2744 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
2745 DRM_DEBUG("Unknown i915 perf property ID\n");
2746 return -EINVAL;
2747 }
2748
2749 switch ((enum drm_i915_perf_property_id)id) {
2750 case DRM_I915_PERF_PROP_CTX_HANDLE:
2751 props->single_context = 1;
2752 props->ctx_handle = value;
2753 break;
2754 case DRM_I915_PERF_PROP_SAMPLE_OA:
2755 if (value)
2756 props->sample_flags |= SAMPLE_OA_REPORT;
2757 break;
2758 case DRM_I915_PERF_PROP_OA_METRICS_SET:
2759 if (value == 0) {
2760 DRM_DEBUG("Unknown OA metric set ID\n");
2761 return -EINVAL;
2762 }
2763 props->metrics_set = value;
2764 break;
2765 case DRM_I915_PERF_PROP_OA_FORMAT:
2766 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
2767 DRM_DEBUG("Out-of-range OA report format %llu\n",
2768 value);
2769 return -EINVAL;
2770 }
2771 if (!dev_priv->perf.oa.oa_formats[value].size) {
2772 DRM_DEBUG("Unsupported OA report format %llu\n",
2773 value);
2774 return -EINVAL;
2775 }
2776 props->oa_format = value;
2777 break;
2778 case DRM_I915_PERF_PROP_OA_EXPONENT:
2779 if (value > OA_EXPONENT_MAX) {
2780 DRM_DEBUG("OA timer exponent too high (> %u)\n",
2781 OA_EXPONENT_MAX);
2782 return -EINVAL;
2783 }
2784
2785
2786
2787
2788
2789
2790
2791 BUILD_BUG_ON(sizeof(oa_period) != 8);
2792 oa_period = oa_exponent_to_ns(dev_priv, value);
2793
2794
2795
2796
2797
2798
2799
2800 if (oa_period <= NSEC_PER_SEC) {
2801 u64 tmp = NSEC_PER_SEC;
2802 do_div(tmp, oa_period);
2803 oa_freq_hz = tmp;
2804 } else
2805 oa_freq_hz = 0;
2806
2807 if (oa_freq_hz > i915_oa_max_sample_rate &&
2808 !capable(CAP_SYS_ADMIN)) {
2809 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
2810 i915_oa_max_sample_rate);
2811 return -EACCES;
2812 }
2813
2814 props->oa_periodic = true;
2815 props->oa_period_exponent = value;
2816 break;
2817 case DRM_I915_PERF_PROP_MAX:
2818 MISSING_CASE(id);
2819 return -EINVAL;
2820 }
2821
2822 uprop += 2;
2823 }
2824
2825 return 0;
2826}
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852int i915_perf_open_ioctl(struct drm_device *dev, void *data,
2853 struct drm_file *file)
2854{
2855 struct drm_i915_private *dev_priv = dev->dev_private;
2856 struct drm_i915_perf_open_param *param = data;
2857 struct perf_open_properties props;
2858 u32 known_open_flags;
2859 int ret;
2860
2861 if (!dev_priv->perf.initialized) {
2862 DRM_DEBUG("i915 perf interface not available for this system\n");
2863 return -ENOTSUPP;
2864 }
2865
2866 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
2867 I915_PERF_FLAG_FD_NONBLOCK |
2868 I915_PERF_FLAG_DISABLED;
2869 if (param->flags & ~known_open_flags) {
2870 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
2871 return -EINVAL;
2872 }
2873
2874 ret = read_properties_unlocked(dev_priv,
2875 u64_to_user_ptr(param->properties_ptr),
2876 param->num_properties,
2877 &props);
2878 if (ret)
2879 return ret;
2880
2881 mutex_lock(&dev_priv->perf.lock);
2882 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
2883 mutex_unlock(&dev_priv->perf.lock);
2884
2885 return ret;
2886}
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896void i915_perf_register(struct drm_i915_private *dev_priv)
2897{
2898 int ret;
2899
2900 if (!dev_priv->perf.initialized)
2901 return;
2902
2903
2904
2905
2906
2907 mutex_lock(&dev_priv->perf.lock);
2908
2909 dev_priv->perf.metrics_kobj =
2910 kobject_create_and_add("metrics",
2911 &dev_priv->drm.primary->kdev->kobj);
2912 if (!dev_priv->perf.metrics_kobj)
2913 goto exit;
2914
2915 sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
2916
2917 if (INTEL_GEN(dev_priv) >= 11) {
2918 i915_perf_load_test_config_icl(dev_priv);
2919 } else if (IS_CANNONLAKE(dev_priv)) {
2920 i915_perf_load_test_config_cnl(dev_priv);
2921 } else if (IS_COFFEELAKE(dev_priv)) {
2922 if (IS_CFL_GT2(dev_priv))
2923 i915_perf_load_test_config_cflgt2(dev_priv);
2924 if (IS_CFL_GT3(dev_priv))
2925 i915_perf_load_test_config_cflgt3(dev_priv);
2926 } else if (IS_GEMINILAKE(dev_priv)) {
2927 i915_perf_load_test_config_glk(dev_priv);
2928 } else if (IS_KABYLAKE(dev_priv)) {
2929 if (IS_KBL_GT2(dev_priv))
2930 i915_perf_load_test_config_kblgt2(dev_priv);
2931 else if (IS_KBL_GT3(dev_priv))
2932 i915_perf_load_test_config_kblgt3(dev_priv);
2933 } else if (IS_BROXTON(dev_priv)) {
2934 i915_perf_load_test_config_bxt(dev_priv);
2935 } else if (IS_SKYLAKE(dev_priv)) {
2936 if (IS_SKL_GT2(dev_priv))
2937 i915_perf_load_test_config_sklgt2(dev_priv);
2938 else if (IS_SKL_GT3(dev_priv))
2939 i915_perf_load_test_config_sklgt3(dev_priv);
2940 else if (IS_SKL_GT4(dev_priv))
2941 i915_perf_load_test_config_sklgt4(dev_priv);
2942 } else if (IS_CHERRYVIEW(dev_priv)) {
2943 i915_perf_load_test_config_chv(dev_priv);
2944 } else if (IS_BROADWELL(dev_priv)) {
2945 i915_perf_load_test_config_bdw(dev_priv);
2946 } else if (IS_HASWELL(dev_priv)) {
2947 i915_perf_load_test_config_hsw(dev_priv);
2948}
2949
2950 if (dev_priv->perf.oa.test_config.id == 0)
2951 goto sysfs_error;
2952
2953 ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
2954 &dev_priv->perf.oa.test_config.sysfs_metric);
2955 if (ret)
2956 goto sysfs_error;
2957
2958 atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
2959
2960 goto exit;
2961
2962sysfs_error:
2963 kobject_put(dev_priv->perf.metrics_kobj);
2964 dev_priv->perf.metrics_kobj = NULL;
2965
2966exit:
2967 mutex_unlock(&dev_priv->perf.lock);
2968}
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979void i915_perf_unregister(struct drm_i915_private *dev_priv)
2980{
2981 if (!dev_priv->perf.metrics_kobj)
2982 return;
2983
2984 sysfs_remove_group(dev_priv->perf.metrics_kobj,
2985 &dev_priv->perf.oa.test_config.sysfs_metric);
2986
2987 kobject_put(dev_priv->perf.metrics_kobj);
2988 dev_priv->perf.metrics_kobj = NULL;
2989}
2990
2991static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
2992{
2993 static const i915_reg_t flex_eu_regs[] = {
2994 EU_PERF_CNTL0,
2995 EU_PERF_CNTL1,
2996 EU_PERF_CNTL2,
2997 EU_PERF_CNTL3,
2998 EU_PERF_CNTL4,
2999 EU_PERF_CNTL5,
3000 EU_PERF_CNTL6,
3001 };
3002 int i;
3003
3004 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
3005 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
3006 return true;
3007 }
3008 return false;
3009}
3010
3011static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
3012{
3013 return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
3014 addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
3015 (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
3016 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
3017 (addr >= i915_mmio_reg_offset(OACEC0_0) &&
3018 addr <= i915_mmio_reg_offset(OACEC7_1));
3019}
3020
3021static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3022{
3023 return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
3024 (addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
3025 addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
3026 (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
3027 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
3028 (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
3029 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
3030}
3031
3032static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3033{
3034 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3035 addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
3036 (addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
3037 addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
3038}
3039
3040static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3041{
3042 return gen8_is_valid_mux_addr(dev_priv, addr) ||
3043 addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
3044 (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
3045 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
3046}
3047
3048static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3049{
3050 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3051 (addr >= 0x25100 && addr <= 0x2FF90) ||
3052 (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
3053 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
3054 addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
3055}
3056
3057static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3058{
3059 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3060 (addr >= 0x182300 && addr <= 0x1823A4);
3061}
3062
3063static u32 mask_reg_value(u32 reg, u32 val)
3064{
3065
3066
3067
3068
3069 if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
3070 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
3071
3072
3073
3074
3075
3076 if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
3077 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
3078
3079 return val;
3080}
3081
3082static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
3083 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr),
3084 u32 __user *regs,
3085 u32 n_regs)
3086{
3087 struct i915_oa_reg *oa_regs;
3088 int err;
3089 u32 i;
3090
3091 if (!n_regs)
3092 return NULL;
3093
3094 if (!access_ok(regs, n_regs * sizeof(u32) * 2))
3095 return ERR_PTR(-EFAULT);
3096
3097
3098 GEM_BUG_ON(!is_valid);
3099 if (!is_valid)
3100 return ERR_PTR(-EINVAL);
3101
3102 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
3103 if (!oa_regs)
3104 return ERR_PTR(-ENOMEM);
3105
3106 for (i = 0; i < n_regs; i++) {
3107 u32 addr, value;
3108
3109 err = get_user(addr, regs);
3110 if (err)
3111 goto addr_err;
3112
3113 if (!is_valid(dev_priv, addr)) {
3114 DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
3115 err = -EINVAL;
3116 goto addr_err;
3117 }
3118
3119 err = get_user(value, regs + 1);
3120 if (err)
3121 goto addr_err;
3122
3123 oa_regs[i].addr = _MMIO(addr);
3124 oa_regs[i].value = mask_reg_value(addr, value);
3125
3126 regs += 2;
3127 }
3128
3129 return oa_regs;
3130
3131addr_err:
3132 kfree(oa_regs);
3133 return ERR_PTR(err);
3134}
3135
3136static ssize_t show_dynamic_id(struct device *dev,
3137 struct device_attribute *attr,
3138 char *buf)
3139{
3140 struct i915_oa_config *oa_config =
3141 container_of(attr, typeof(*oa_config), sysfs_metric_id);
3142
3143 return sprintf(buf, "%d\n", oa_config->id);
3144}
3145
3146static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
3147 struct i915_oa_config *oa_config)
3148{
3149 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
3150 oa_config->sysfs_metric_id.attr.name = "id";
3151 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
3152 oa_config->sysfs_metric_id.show = show_dynamic_id;
3153 oa_config->sysfs_metric_id.store = NULL;
3154
3155 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
3156 oa_config->attrs[1] = NULL;
3157
3158 oa_config->sysfs_metric.name = oa_config->uuid;
3159 oa_config->sysfs_metric.attrs = oa_config->attrs;
3160
3161 return sysfs_create_group(dev_priv->perf.metrics_kobj,
3162 &oa_config->sysfs_metric);
3163}
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
3179 struct drm_file *file)
3180{
3181 struct drm_i915_private *dev_priv = dev->dev_private;
3182 struct drm_i915_perf_oa_config *args = data;
3183 struct i915_oa_config *oa_config, *tmp;
3184 int err, id;
3185
3186 if (!dev_priv->perf.initialized) {
3187 DRM_DEBUG("i915 perf interface not available for this system\n");
3188 return -ENOTSUPP;
3189 }
3190
3191 if (!dev_priv->perf.metrics_kobj) {
3192 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
3193 return -EINVAL;
3194 }
3195
3196 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
3197 DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
3198 return -EACCES;
3199 }
3200
3201 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
3202 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
3203 (!args->flex_regs_ptr || !args->n_flex_regs)) {
3204 DRM_DEBUG("No OA registers given\n");
3205 return -EINVAL;
3206 }
3207
3208 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
3209 if (!oa_config) {
3210 DRM_DEBUG("Failed to allocate memory for the OA config\n");
3211 return -ENOMEM;
3212 }
3213
3214 atomic_set(&oa_config->ref_count, 1);
3215
3216 if (!uuid_is_valid(args->uuid)) {
3217 DRM_DEBUG("Invalid uuid format for OA config\n");
3218 err = -EINVAL;
3219 goto reg_err;
3220 }
3221
3222
3223
3224
3225 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
3226
3227 oa_config->mux_regs_len = args->n_mux_regs;
3228 oa_config->mux_regs =
3229 alloc_oa_regs(dev_priv,
3230 dev_priv->perf.oa.ops.is_valid_mux_reg,
3231 u64_to_user_ptr(args->mux_regs_ptr),
3232 args->n_mux_regs);
3233
3234 if (IS_ERR(oa_config->mux_regs)) {
3235 DRM_DEBUG("Failed to create OA config for mux_regs\n");
3236 err = PTR_ERR(oa_config->mux_regs);
3237 goto reg_err;
3238 }
3239
3240 oa_config->b_counter_regs_len = args->n_boolean_regs;
3241 oa_config->b_counter_regs =
3242 alloc_oa_regs(dev_priv,
3243 dev_priv->perf.oa.ops.is_valid_b_counter_reg,
3244 u64_to_user_ptr(args->boolean_regs_ptr),
3245 args->n_boolean_regs);
3246
3247 if (IS_ERR(oa_config->b_counter_regs)) {
3248 DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
3249 err = PTR_ERR(oa_config->b_counter_regs);
3250 goto reg_err;
3251 }
3252
3253 if (INTEL_GEN(dev_priv) < 8) {
3254 if (args->n_flex_regs != 0) {
3255 err = -EINVAL;
3256 goto reg_err;
3257 }
3258 } else {
3259 oa_config->flex_regs_len = args->n_flex_regs;
3260 oa_config->flex_regs =
3261 alloc_oa_regs(dev_priv,
3262 dev_priv->perf.oa.ops.is_valid_flex_reg,
3263 u64_to_user_ptr(args->flex_regs_ptr),
3264 args->n_flex_regs);
3265
3266 if (IS_ERR(oa_config->flex_regs)) {
3267 DRM_DEBUG("Failed to create OA config for flex_regs\n");
3268 err = PTR_ERR(oa_config->flex_regs);
3269 goto reg_err;
3270 }
3271 }
3272
3273 err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
3274 if (err)
3275 goto reg_err;
3276
3277
3278
3279
3280 idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) {
3281 if (!strcmp(tmp->uuid, oa_config->uuid)) {
3282 DRM_DEBUG("OA config already exists with this uuid\n");
3283 err = -EADDRINUSE;
3284 goto sysfs_err;
3285 }
3286 }
3287
3288 err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config);
3289 if (err) {
3290 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
3291 goto sysfs_err;
3292 }
3293
3294
3295 oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr,
3296 oa_config, 2,
3297 0, GFP_KERNEL);
3298 if (oa_config->id < 0) {
3299 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
3300 err = oa_config->id;
3301 goto sysfs_err;
3302 }
3303
3304 mutex_unlock(&dev_priv->perf.metrics_lock);
3305
3306 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
3307
3308 return oa_config->id;
3309
3310sysfs_err:
3311 mutex_unlock(&dev_priv->perf.metrics_lock);
3312reg_err:
3313 put_oa_config(dev_priv, oa_config);
3314 DRM_DEBUG("Failed to add new OA config\n");
3315 return err;
3316}
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
3330 struct drm_file *file)
3331{
3332 struct drm_i915_private *dev_priv = dev->dev_private;
3333 u64 *arg = data;
3334 struct i915_oa_config *oa_config;
3335 int ret;
3336
3337 if (!dev_priv->perf.initialized) {
3338 DRM_DEBUG("i915 perf interface not available for this system\n");
3339 return -ENOTSUPP;
3340 }
3341
3342 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
3343 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
3344 return -EACCES;
3345 }
3346
3347 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
3348 if (ret)
3349 goto lock_err;
3350
3351 oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
3352 if (!oa_config) {
3353 DRM_DEBUG("Failed to remove unknown OA config\n");
3354 ret = -ENOENT;
3355 goto config_err;
3356 }
3357
3358 GEM_BUG_ON(*arg != oa_config->id);
3359
3360 sysfs_remove_group(dev_priv->perf.metrics_kobj,
3361 &oa_config->sysfs_metric);
3362
3363 idr_remove(&dev_priv->perf.metrics_idr, *arg);
3364
3365 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
3366
3367 put_oa_config(dev_priv, oa_config);
3368
3369config_err:
3370 mutex_unlock(&dev_priv->perf.metrics_lock);
3371lock_err:
3372 return ret;
3373}
3374
3375static struct ctl_table oa_table[] = {
3376 {
3377 .procname = "perf_stream_paranoid",
3378 .data = &i915_perf_stream_paranoid,
3379 .maxlen = sizeof(i915_perf_stream_paranoid),
3380 .mode = 0644,
3381 .proc_handler = proc_dointvec_minmax,
3382 .extra1 = SYSCTL_ZERO,
3383 .extra2 = SYSCTL_ONE,
3384 },
3385 {
3386 .procname = "oa_max_sample_rate",
3387 .data = &i915_oa_max_sample_rate,
3388 .maxlen = sizeof(i915_oa_max_sample_rate),
3389 .mode = 0644,
3390 .proc_handler = proc_dointvec_minmax,
3391 .extra1 = SYSCTL_ZERO,
3392 .extra2 = &oa_sample_rate_hard_limit,
3393 },
3394 {}
3395};
3396
3397static struct ctl_table i915_root[] = {
3398 {
3399 .procname = "i915",
3400 .maxlen = 0,
3401 .mode = 0555,
3402 .child = oa_table,
3403 },
3404 {}
3405};
3406
3407static struct ctl_table dev_root[] = {
3408 {
3409 .procname = "dev",
3410 .maxlen = 0,
3411 .mode = 0555,
3412 .child = i915_root,
3413 },
3414 {}
3415};
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426void i915_perf_init(struct drm_i915_private *dev_priv)
3427{
3428 if (IS_HASWELL(dev_priv)) {
3429 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3430 gen7_is_valid_b_counter_addr;
3431 dev_priv->perf.oa.ops.is_valid_mux_reg =
3432 hsw_is_valid_mux_addr;
3433 dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
3434 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
3435 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
3436 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
3437 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
3438 dev_priv->perf.oa.ops.read = gen7_oa_read;
3439 dev_priv->perf.oa.ops.oa_hw_tail_read =
3440 gen7_oa_hw_tail_read;
3441
3442 dev_priv->perf.oa.oa_formats = hsw_oa_formats;
3443 } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
3444
3445
3446
3447
3448
3449
3450 dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
3451
3452 dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
3453 dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
3454 dev_priv->perf.oa.ops.read = gen8_oa_read;
3455 dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
3456
3457 if (IS_GEN_RANGE(dev_priv, 8, 9)) {
3458 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3459 gen7_is_valid_b_counter_addr;
3460 dev_priv->perf.oa.ops.is_valid_mux_reg =
3461 gen8_is_valid_mux_addr;
3462 dev_priv->perf.oa.ops.is_valid_flex_reg =
3463 gen8_is_valid_flex_addr;
3464
3465 if (IS_CHERRYVIEW(dev_priv)) {
3466 dev_priv->perf.oa.ops.is_valid_mux_reg =
3467 chv_is_valid_mux_addr;
3468 }
3469
3470 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
3471 dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
3472
3473 if (IS_GEN(dev_priv, 8)) {
3474 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
3475 dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
3476
3477 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
3478 } else {
3479 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
3480 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
3481
3482 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
3483 }
3484 } else if (IS_GEN_RANGE(dev_priv, 10, 11)) {
3485 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3486 gen7_is_valid_b_counter_addr;
3487 dev_priv->perf.oa.ops.is_valid_mux_reg =
3488 gen10_is_valid_mux_addr;
3489 dev_priv->perf.oa.ops.is_valid_flex_reg =
3490 gen8_is_valid_flex_addr;
3491
3492 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
3493 dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
3494
3495 if (IS_GEN(dev_priv, 10)) {
3496 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
3497 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
3498 } else {
3499 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124;
3500 dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e;
3501 }
3502 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
3503 }
3504 }
3505
3506 if (dev_priv->perf.oa.ops.enable_metric_set) {
3507 hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
3508 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3509 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
3510 init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
3511
3512 INIT_LIST_HEAD(&dev_priv->perf.streams);
3513 mutex_init(&dev_priv->perf.lock);
3514 spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
3515
3516 oa_sample_rate_hard_limit = 1000 *
3517 (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
3518 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
3519
3520 mutex_init(&dev_priv->perf.metrics_lock);
3521 idr_init(&dev_priv->perf.metrics_idr);
3522
3523 dev_priv->perf.initialized = true;
3524 }
3525}
3526
3527static int destroy_config(int id, void *p, void *data)
3528{
3529 struct drm_i915_private *dev_priv = data;
3530 struct i915_oa_config *oa_config = p;
3531
3532 put_oa_config(dev_priv, oa_config);
3533
3534 return 0;
3535}
3536
3537
3538
3539
3540
3541void i915_perf_fini(struct drm_i915_private *dev_priv)
3542{
3543 if (!dev_priv->perf.initialized)
3544 return;
3545
3546 idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
3547 idr_destroy(&dev_priv->perf.metrics_idr);
3548
3549 unregister_sysctl_table(dev_priv->perf.sysctl_header);
3550
3551 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
3552
3553 dev_priv->perf.initialized = false;
3554}
3555