1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194#include <linux/anon_inodes.h>
195#include <linux/sizes.h>
196#include <linux/uuid.h>
197
198#include "i915_drv.h"
199#include "i915_oa_hsw.h"
200#include "i915_oa_bdw.h"
201#include "i915_oa_chv.h"
202#include "i915_oa_sklgt2.h"
203#include "i915_oa_sklgt3.h"
204#include "i915_oa_sklgt4.h"
205#include "i915_oa_bxt.h"
206#include "i915_oa_kblgt2.h"
207#include "i915_oa_kblgt3.h"
208#include "i915_oa_glk.h"
209#include "i915_oa_cflgt2.h"
210#include "i915_oa_cflgt3.h"
211#include "i915_oa_cnl.h"
212#include "i915_oa_icl.h"
213#include "intel_lrc_reg.h"
214
215
216
217
218
219#define OA_BUFFER_SIZE SZ_16M
220
221#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264#define OA_TAIL_MARGIN_NSEC 100000ULL
265#define INVALID_TAIL_PTR 0xffffffff
266
267
268
269
270#define POLL_FREQUENCY 200
271#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
272
273
274static int zero;
275static int one = 1;
276static u32 i915_perf_stream_paranoid = true;
277
278
279
280
281
282
283
284
285
286#define OA_EXPONENT_MAX 31
287
288#define INVALID_CTX_ID 0xffffffff
289
290
291#define OAREPORT_REASON_MASK 0x3f
292#define OAREPORT_REASON_SHIFT 19
293#define OAREPORT_REASON_TIMER (1<<0)
294#define OAREPORT_REASON_CTX_SWITCH (1<<3)
295#define OAREPORT_REASON_CLK_RATIO (1<<5)
296
297
298
299
300
301
302
303
304
305static int oa_sample_rate_hard_limit;
306
307
308
309
310
311
312
313static u32 i915_oa_max_sample_rate = 100000;
314
315
316
317
318
319static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
320 [I915_OA_FORMAT_A13] = { 0, 64 },
321 [I915_OA_FORMAT_A29] = { 1, 128 },
322 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
323
324 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
325 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
326 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
327 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
328};
329
330static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
331 [I915_OA_FORMAT_A12] = { 0, 64 },
332 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
333 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
334 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
335};
336
337#define SAMPLE_OA_REPORT (1<<0)
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353struct perf_open_properties {
354 u32 sample_flags;
355
356 u64 single_context:1;
357 u64 ctx_handle;
358
359
360 int metrics_set;
361 int oa_format;
362 bool oa_periodic;
363 int oa_period_exponent;
364};
365
366static void free_oa_config(struct drm_i915_private *dev_priv,
367 struct i915_oa_config *oa_config)
368{
369 if (!PTR_ERR(oa_config->flex_regs))
370 kfree(oa_config->flex_regs);
371 if (!PTR_ERR(oa_config->b_counter_regs))
372 kfree(oa_config->b_counter_regs);
373 if (!PTR_ERR(oa_config->mux_regs))
374 kfree(oa_config->mux_regs);
375 kfree(oa_config);
376}
377
378static void put_oa_config(struct drm_i915_private *dev_priv,
379 struct i915_oa_config *oa_config)
380{
381 if (!atomic_dec_and_test(&oa_config->ref_count))
382 return;
383
384 free_oa_config(dev_priv, oa_config);
385}
386
387static int get_oa_config(struct drm_i915_private *dev_priv,
388 int metrics_set,
389 struct i915_oa_config **out_config)
390{
391 int ret;
392
393 if (metrics_set == 1) {
394 *out_config = &dev_priv->perf.oa.test_config;
395 atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
396 return 0;
397 }
398
399 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
400 if (ret)
401 return ret;
402
403 *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
404 if (!*out_config)
405 ret = -EINVAL;
406 else
407 atomic_inc(&(*out_config)->ref_count);
408
409 mutex_unlock(&dev_priv->perf.metrics_lock);
410
411 return ret;
412}
413
414static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
415{
416 return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
417}
418
419static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
420{
421 u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
422
423 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
424}
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
451{
452 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
453 unsigned long flags;
454 unsigned int aged_idx;
455 u32 head, hw_tail, aged_tail, aging_tail;
456 u64 now;
457
458
459
460
461
462 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
463
464
465
466
467
468 head = dev_priv->perf.oa.oa_buffer.head;
469
470 aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
471 aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
472 aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
473
474 hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
475
476
477
478
479 hw_tail &= ~(report_size - 1);
480
481 now = ktime_get_mono_fast_ns();
482
483
484
485
486
487
488
489
490
491
492
493 if (aging_tail != INVALID_TAIL_PTR &&
494 ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
495 OA_TAIL_MARGIN_NSEC)) {
496
497 aged_idx ^= 1;
498 dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
499
500 aged_tail = aging_tail;
501
502
503 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
504 aging_tail = INVALID_TAIL_PTR;
505 }
506
507
508
509
510
511
512
513
514
515 if (aging_tail == INVALID_TAIL_PTR &&
516 (aged_tail == INVALID_TAIL_PTR ||
517 OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
518 struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
519 u32 gtt_offset = i915_ggtt_offset(vma);
520
521
522
523
524
525 if (hw_tail >= gtt_offset &&
526 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
527 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
528 aging_tail = hw_tail;
529 dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
530 } else {
531 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
532 hw_tail);
533 }
534 }
535
536 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
537
538 return aged_tail == INVALID_TAIL_PTR ?
539 false : OA_TAKEN(aged_tail, head) >= report_size;
540}
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557static int append_oa_status(struct i915_perf_stream *stream,
558 char __user *buf,
559 size_t count,
560 size_t *offset,
561 enum drm_i915_perf_record_type type)
562{
563 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
564
565 if ((count - *offset) < header.size)
566 return -ENOSPC;
567
568 if (copy_to_user(buf + *offset, &header, sizeof(header)))
569 return -EFAULT;
570
571 (*offset) += header.size;
572
573 return 0;
574}
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593static int append_oa_sample(struct i915_perf_stream *stream,
594 char __user *buf,
595 size_t count,
596 size_t *offset,
597 const u8 *report)
598{
599 struct drm_i915_private *dev_priv = stream->dev_priv;
600 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
601 struct drm_i915_perf_record_header header;
602 u32 sample_flags = stream->sample_flags;
603
604 header.type = DRM_I915_PERF_RECORD_SAMPLE;
605 header.pad = 0;
606 header.size = stream->sample_size;
607
608 if ((count - *offset) < header.size)
609 return -ENOSPC;
610
611 buf += *offset;
612 if (copy_to_user(buf, &header, sizeof(header)))
613 return -EFAULT;
614 buf += sizeof(header);
615
616 if (sample_flags & SAMPLE_OA_REPORT) {
617 if (copy_to_user(buf, report, report_size))
618 return -EFAULT;
619 }
620
621 (*offset) += header.size;
622
623 return 0;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646static int gen8_append_oa_reports(struct i915_perf_stream *stream,
647 char __user *buf,
648 size_t count,
649 size_t *offset)
650{
651 struct drm_i915_private *dev_priv = stream->dev_priv;
652 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
653 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
654 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
655 u32 mask = (OA_BUFFER_SIZE - 1);
656 size_t start_offset = *offset;
657 unsigned long flags;
658 unsigned int aged_tail_idx;
659 u32 head, tail;
660 u32 taken;
661 int ret = 0;
662
663 if (WARN_ON(!stream->enabled))
664 return -EIO;
665
666 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
667
668 head = dev_priv->perf.oa.oa_buffer.head;
669 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
670 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
671
672 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
673
674
675
676
677
678 if (tail == INVALID_TAIL_PTR)
679 return -EAGAIN;
680
681
682
683
684
685 head -= gtt_offset;
686 tail -= gtt_offset;
687
688
689
690
691
692
693
694
695 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
696 tail > OA_BUFFER_SIZE || tail % report_size,
697 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
698 head, tail))
699 return -EIO;
700
701
702 for (;
703 (taken = OA_TAKEN(tail, head));
704 head = (head + report_size) & mask) {
705 u8 *report = oa_buf_base + head;
706 u32 *report32 = (void *)report;
707 u32 ctx_id;
708 u32 reason;
709
710
711
712
713
714
715
716
717
718
719 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
720 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
721 break;
722 }
723
724
725
726
727
728
729
730
731
732
733 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
734 OAREPORT_REASON_MASK);
735 if (reason == 0) {
736 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
737 DRM_NOTE("Skipping spurious, invalid OA report\n");
738 continue;
739 }
740
741 ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
742
743
744
745
746
747
748
749
750
751 if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
752 ctx_id = report32[2] = INVALID_CTX_ID;
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785 if (!dev_priv->perf.oa.exclusive_stream->ctx ||
786 dev_priv->perf.oa.specific_ctx_id == ctx_id ||
787 (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
788 dev_priv->perf.oa.specific_ctx_id) ||
789 reason & OAREPORT_REASON_CTX_SWITCH) {
790
791
792
793
794
795 if (dev_priv->perf.oa.exclusive_stream->ctx &&
796 dev_priv->perf.oa.specific_ctx_id != ctx_id) {
797 report32[2] = INVALID_CTX_ID;
798 }
799
800 ret = append_oa_sample(stream, buf, count, offset,
801 report);
802 if (ret)
803 break;
804
805 dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
806 }
807
808
809
810
811
812
813
814
815 report32[0] = 0;
816 }
817
818 if (start_offset != *offset) {
819 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
820
821
822
823
824
825 head += gtt_offset;
826
827 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
828 dev_priv->perf.oa.oa_buffer.head = head;
829
830 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
831 }
832
833 return ret;
834}
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856static int gen8_oa_read(struct i915_perf_stream *stream,
857 char __user *buf,
858 size_t count,
859 size_t *offset)
860{
861 struct drm_i915_private *dev_priv = stream->dev_priv;
862 u32 oastatus;
863 int ret;
864
865 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
866 return -EIO;
867
868 oastatus = I915_READ(GEN8_OASTATUS);
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
885 ret = append_oa_status(stream, buf, count, offset,
886 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
887 if (ret)
888 return ret;
889
890 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
891 dev_priv->perf.oa.period_exponent);
892
893 dev_priv->perf.oa.ops.oa_disable(stream);
894 dev_priv->perf.oa.ops.oa_enable(stream);
895
896
897
898
899
900 oastatus = I915_READ(GEN8_OASTATUS);
901 }
902
903 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
904 ret = append_oa_status(stream, buf, count, offset,
905 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
906 if (ret)
907 return ret;
908 I915_WRITE(GEN8_OASTATUS,
909 oastatus & ~GEN8_OASTATUS_REPORT_LOST);
910 }
911
912 return gen8_append_oa_reports(stream, buf, count, offset);
913}
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935static int gen7_append_oa_reports(struct i915_perf_stream *stream,
936 char __user *buf,
937 size_t count,
938 size_t *offset)
939{
940 struct drm_i915_private *dev_priv = stream->dev_priv;
941 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
942 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
943 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
944 u32 mask = (OA_BUFFER_SIZE - 1);
945 size_t start_offset = *offset;
946 unsigned long flags;
947 unsigned int aged_tail_idx;
948 u32 head, tail;
949 u32 taken;
950 int ret = 0;
951
952 if (WARN_ON(!stream->enabled))
953 return -EIO;
954
955 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
956
957 head = dev_priv->perf.oa.oa_buffer.head;
958 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
959 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
960
961 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
962
963
964
965
966 if (tail == INVALID_TAIL_PTR)
967 return -EAGAIN;
968
969
970
971
972 head -= gtt_offset;
973 tail -= gtt_offset;
974
975
976
977
978
979
980
981 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
982 tail > OA_BUFFER_SIZE || tail % report_size,
983 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
984 head, tail))
985 return -EIO;
986
987
988 for (;
989 (taken = OA_TAKEN(tail, head));
990 head = (head + report_size) & mask) {
991 u8 *report = oa_buf_base + head;
992 u32 *report32 = (void *)report;
993
994
995
996
997
998
999
1000
1001
1002 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
1003 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
1004 break;
1005 }
1006
1007
1008
1009
1010
1011
1012
1013 if (report32[0] == 0) {
1014 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
1015 DRM_NOTE("Skipping spurious, invalid OA report\n");
1016 continue;
1017 }
1018
1019 ret = append_oa_sample(stream, buf, count, offset, report);
1020 if (ret)
1021 break;
1022
1023
1024
1025
1026
1027
1028
1029 report32[0] = 0;
1030 }
1031
1032 if (start_offset != *offset) {
1033 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1034
1035
1036
1037
1038 head += gtt_offset;
1039
1040 I915_WRITE(GEN7_OASTATUS2,
1041 ((head & GEN7_OASTATUS2_HEAD_MASK) |
1042 GEN7_OASTATUS2_MEM_SELECT_GGTT));
1043 dev_priv->perf.oa.oa_buffer.head = head;
1044
1045 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1046 }
1047
1048 return ret;
1049}
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067static int gen7_oa_read(struct i915_perf_stream *stream,
1068 char __user *buf,
1069 size_t count,
1070 size_t *offset)
1071{
1072 struct drm_i915_private *dev_priv = stream->dev_priv;
1073 u32 oastatus1;
1074 int ret;
1075
1076 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
1077 return -EIO;
1078
1079 oastatus1 = I915_READ(GEN7_OASTATUS1);
1080
1081
1082
1083
1084
1085
1086 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1109 ret = append_oa_status(stream, buf, count, offset,
1110 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1111 if (ret)
1112 return ret;
1113
1114 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
1115 dev_priv->perf.oa.period_exponent);
1116
1117 dev_priv->perf.oa.ops.oa_disable(stream);
1118 dev_priv->perf.oa.ops.oa_enable(stream);
1119
1120 oastatus1 = I915_READ(GEN7_OASTATUS1);
1121 }
1122
1123 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1124 ret = append_oa_status(stream, buf, count, offset,
1125 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1126 if (ret)
1127 return ret;
1128 dev_priv->perf.oa.gen7_latched_oastatus1 |=
1129 GEN7_OASTATUS1_REPORT_LOST;
1130 }
1131
1132 return gen7_append_oa_reports(stream, buf, count, offset);
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1150{
1151 struct drm_i915_private *dev_priv = stream->dev_priv;
1152
1153
1154 if (!dev_priv->perf.oa.periodic)
1155 return -EIO;
1156
1157 return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
1158 oa_buffer_check_unlocked(dev_priv));
1159}
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1172 struct file *file,
1173 poll_table *wait)
1174{
1175 struct drm_i915_private *dev_priv = stream->dev_priv;
1176
1177 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
1178}
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192static int i915_oa_read(struct i915_perf_stream *stream,
1193 char __user *buf,
1194 size_t count,
1195 size_t *offset)
1196{
1197 struct drm_i915_private *dev_priv = stream->dev_priv;
1198
1199 return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
1200}
1201
1202static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
1203 struct i915_gem_context *ctx)
1204{
1205 struct intel_engine_cs *engine = i915->engine[RCS0];
1206 struct intel_context *ce;
1207 int ret;
1208
1209 ret = i915_mutex_lock_interruptible(&i915->drm);
1210 if (ret)
1211 return ERR_PTR(ret);
1212
1213
1214
1215
1216
1217
1218
1219 ce = intel_context_pin(ctx, engine);
1220 mutex_unlock(&i915->drm.struct_mutex);
1221 if (IS_ERR(ce))
1222 return ce;
1223
1224 i915->perf.oa.pinned_ctx = ce;
1225
1226 return ce;
1227}
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1240{
1241 struct drm_i915_private *i915 = stream->dev_priv;
1242 struct intel_context *ce;
1243
1244 ce = oa_pin_context(i915, stream->ctx);
1245 if (IS_ERR(ce))
1246 return PTR_ERR(ce);
1247
1248 switch (INTEL_GEN(i915)) {
1249 case 7: {
1250
1251
1252
1253
1254 i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
1255 i915->perf.oa.specific_ctx_id_mask = 0;
1256 break;
1257 }
1258
1259 case 8:
1260 case 9:
1261 case 10:
1262 if (USES_GUC_SUBMISSION(i915)) {
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273 i915->perf.oa.specific_ctx_id =
1274 lower_32_bits(ce->lrc_desc) >> 12;
1275
1276
1277
1278
1279
1280 i915->perf.oa.specific_ctx_id_mask =
1281 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
1282 } else {
1283 i915->perf.oa.specific_ctx_id_mask =
1284 (1U << GEN8_CTX_ID_WIDTH) - 1;
1285 i915->perf.oa.specific_ctx_id =
1286 upper_32_bits(ce->lrc_desc);
1287 i915->perf.oa.specific_ctx_id &=
1288 i915->perf.oa.specific_ctx_id_mask;
1289 }
1290 break;
1291
1292 case 11: {
1293 i915->perf.oa.specific_ctx_id_mask =
1294 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
1295 ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
1296 ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
1297 i915->perf.oa.specific_ctx_id = upper_32_bits(ce->lrc_desc);
1298 i915->perf.oa.specific_ctx_id &=
1299 i915->perf.oa.specific_ctx_id_mask;
1300 break;
1301 }
1302
1303 default:
1304 MISSING_CASE(INTEL_GEN(i915));
1305 }
1306
1307 DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
1308 i915->perf.oa.specific_ctx_id,
1309 i915->perf.oa.specific_ctx_id_mask);
1310
1311 return 0;
1312}
1313
1314
1315
1316
1317
1318
1319
1320
1321static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1322{
1323 struct drm_i915_private *dev_priv = stream->dev_priv;
1324 struct intel_context *ce;
1325
1326 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1327 dev_priv->perf.oa.specific_ctx_id_mask = 0;
1328
1329 ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
1330 if (ce) {
1331 mutex_lock(&dev_priv->drm.struct_mutex);
1332 intel_context_unpin(ce);
1333 mutex_unlock(&dev_priv->drm.struct_mutex);
1334 }
1335}
1336
1337static void
1338free_oa_buffer(struct drm_i915_private *i915)
1339{
1340 mutex_lock(&i915->drm.struct_mutex);
1341
1342 i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma,
1343 I915_VMA_RELEASE_MAP);
1344
1345 mutex_unlock(&i915->drm.struct_mutex);
1346
1347 i915->perf.oa.oa_buffer.vaddr = NULL;
1348}
1349
1350static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1351{
1352 struct drm_i915_private *dev_priv = stream->dev_priv;
1353
1354 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
1355
1356
1357
1358
1359
1360 mutex_lock(&dev_priv->drm.struct_mutex);
1361 dev_priv->perf.oa.exclusive_stream = NULL;
1362 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1363 mutex_unlock(&dev_priv->drm.struct_mutex);
1364
1365 free_oa_buffer(dev_priv);
1366
1367 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1368 intel_runtime_pm_put(dev_priv, stream->wakeref);
1369
1370 if (stream->ctx)
1371 oa_put_render_ctx_id(stream);
1372
1373 put_oa_config(dev_priv, stream->oa_config);
1374
1375 if (dev_priv->perf.oa.spurious_report_rs.missed) {
1376 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
1377 dev_priv->perf.oa.spurious_report_rs.missed);
1378 }
1379}
1380
1381static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
1382{
1383 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1384 unsigned long flags;
1385
1386 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1387
1388
1389
1390
1391 I915_WRITE(GEN7_OASTATUS2,
1392 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
1393 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1394
1395 I915_WRITE(GEN7_OABUFFER, gtt_offset);
1396
1397 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M);
1398
1399
1400 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1401 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1402
1403 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1404
1405
1406
1407
1408
1409 dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1423
1424
1425
1426
1427 dev_priv->perf.oa.pollin = false;
1428}
1429
1430static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
1431{
1432 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1433 unsigned long flags;
1434
1435 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1436
1437 I915_WRITE(GEN8_OASTATUS, 0);
1438 I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
1439 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1440
1441 I915_WRITE(GEN8_OABUFFER_UDW, 0);
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451 I915_WRITE(GEN8_OABUFFER, gtt_offset |
1452 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1453 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1454
1455
1456 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1457 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1458
1459
1460
1461
1462
1463
1464 dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
1465
1466 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1481
1482
1483
1484
1485
1486 dev_priv->perf.oa.pollin = false;
1487}
1488
1489static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
1490{
1491 struct drm_i915_gem_object *bo;
1492 struct i915_vma *vma;
1493 int ret;
1494
1495 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
1496 return -ENODEV;
1497
1498 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1499 if (ret)
1500 return ret;
1501
1502 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1503 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1504
1505 bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
1506 if (IS_ERR(bo)) {
1507 DRM_ERROR("Failed to allocate OA buffer\n");
1508 ret = PTR_ERR(bo);
1509 goto unlock;
1510 }
1511
1512 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
1513
1514
1515 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1516 if (IS_ERR(vma)) {
1517 ret = PTR_ERR(vma);
1518 goto err_unref;
1519 }
1520 dev_priv->perf.oa.oa_buffer.vma = vma;
1521
1522 dev_priv->perf.oa.oa_buffer.vaddr =
1523 i915_gem_object_pin_map(bo, I915_MAP_WB);
1524 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
1525 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
1526 goto err_unpin;
1527 }
1528
1529 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
1530 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
1531 dev_priv->perf.oa.oa_buffer.vaddr);
1532
1533 goto unlock;
1534
1535err_unpin:
1536 __i915_vma_unpin(vma);
1537
1538err_unref:
1539 i915_gem_object_put(bo);
1540
1541 dev_priv->perf.oa.oa_buffer.vaddr = NULL;
1542 dev_priv->perf.oa.oa_buffer.vma = NULL;
1543
1544unlock:
1545 mutex_unlock(&dev_priv->drm.struct_mutex);
1546 return ret;
1547}
1548
1549static void config_oa_regs(struct drm_i915_private *dev_priv,
1550 const struct i915_oa_reg *regs,
1551 u32 n_regs)
1552{
1553 u32 i;
1554
1555 for (i = 0; i < n_regs; i++) {
1556 const struct i915_oa_reg *reg = regs + i;
1557
1558 I915_WRITE(reg->addr, reg->value);
1559 }
1560}
1561
1562static int hsw_enable_metric_set(struct i915_perf_stream *stream)
1563{
1564 struct drm_i915_private *dev_priv = stream->dev_priv;
1565 const struct i915_oa_config *oa_config = stream->oa_config;
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1577 ~GEN7_DOP_CLOCK_GATE_ENABLE));
1578 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
1579 GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1580
1581 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604 usleep_range(15000, 20000);
1605
1606 config_oa_regs(dev_priv, oa_config->b_counter_regs,
1607 oa_config->b_counter_regs_len);
1608
1609 return 0;
1610}
1611
1612static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
1613{
1614 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
1615 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1616 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
1617 GEN7_DOP_CLOCK_GATE_ENABLE));
1618
1619 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1620 ~GT_NOA_ENABLE));
1621}
1622
1623
1624
1625
1626
1627
1628
1629
1630static void
1631gen8_update_reg_state_unlocked(struct intel_context *ce,
1632 u32 *reg_state,
1633 const struct i915_oa_config *oa_config)
1634{
1635 struct drm_i915_private *i915 = ce->gem_context->i915;
1636 u32 ctx_oactxctrl = i915->perf.oa.ctx_oactxctrl_offset;
1637 u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
1638
1639 i915_reg_t flex_regs[] = {
1640 EU_PERF_CNTL0,
1641 EU_PERF_CNTL1,
1642 EU_PERF_CNTL2,
1643 EU_PERF_CNTL3,
1644 EU_PERF_CNTL4,
1645 EU_PERF_CNTL5,
1646 EU_PERF_CNTL6,
1647 };
1648 int i;
1649
1650 CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
1651 (i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
1652 (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
1653 GEN8_OA_COUNTER_RESUME);
1654
1655 for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
1656 u32 state_offset = ctx_flexeu0 + i * 2;
1657 u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
1658
1659
1660
1661
1662
1663
1664 u32 value = 0;
1665
1666 if (oa_config) {
1667 u32 j;
1668
1669 for (j = 0; j < oa_config->flex_regs_len; j++) {
1670 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1671 value = oa_config->flex_regs[j].value;
1672 break;
1673 }
1674 }
1675 }
1676
1677 CTX_REG(reg_state, state_offset, flex_regs[i], value);
1678 }
1679
1680 CTX_REG(reg_state,
1681 CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
1682 gen8_make_rpcs(i915, &ce->sseu));
1683}
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
1710 const struct i915_oa_config *oa_config)
1711{
1712 struct intel_engine_cs *engine = dev_priv->engine[RCS0];
1713 unsigned int map_type = i915_coherent_map_type(dev_priv);
1714 struct i915_gem_context *ctx;
1715 struct i915_request *rq;
1716 int ret;
1717
1718 lockdep_assert_held(&dev_priv->drm.struct_mutex);
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733 ret = i915_gem_wait_for_idle(dev_priv,
1734 I915_WAIT_LOCKED,
1735 MAX_SCHEDULE_TIMEOUT);
1736 if (ret)
1737 return ret;
1738
1739
1740 list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
1741 struct intel_context *ce = intel_context_lookup(ctx, engine);
1742 u32 *regs;
1743
1744
1745 if (!ce || !ce->state)
1746 continue;
1747
1748 regs = i915_gem_object_pin_map(ce->state->obj, map_type);
1749 if (IS_ERR(regs))
1750 return PTR_ERR(regs);
1751
1752 ce->state->obj->mm.dirty = true;
1753 regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
1754
1755 gen8_update_reg_state_unlocked(ce, regs, oa_config);
1756
1757 i915_gem_object_unpin_map(ce->state->obj);
1758 }
1759
1760
1761
1762
1763
1764 rq = i915_request_alloc(engine, dev_priv->kernel_context);
1765 if (IS_ERR(rq))
1766 return PTR_ERR(rq);
1767
1768 i915_request_add(rq);
1769
1770 return 0;
1771}
1772
1773static int gen8_enable_metric_set(struct i915_perf_stream *stream)
1774{
1775 struct drm_i915_private *dev_priv = stream->dev_priv;
1776 const struct i915_oa_config *oa_config = stream->oa_config;
1777 int ret;
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 if (IS_GEN_RANGE(dev_priv, 9, 11)) {
1803 I915_WRITE(GEN8_OA_DEBUG,
1804 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
1805 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
1806 }
1807
1808
1809
1810
1811
1812
1813 ret = gen8_configure_all_contexts(dev_priv, oa_config);
1814 if (ret)
1815 return ret;
1816
1817 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1818
1819 config_oa_regs(dev_priv, oa_config->b_counter_regs,
1820 oa_config->b_counter_regs_len);
1821
1822 return 0;
1823}
1824
1825static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
1826{
1827
1828 gen8_configure_all_contexts(dev_priv, NULL);
1829
1830 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1831 ~GT_NOA_ENABLE));
1832}
1833
1834static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
1835{
1836
1837 gen8_configure_all_contexts(dev_priv, NULL);
1838
1839
1840 I915_WRITE(RPM_CONFIG1,
1841 I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE);
1842}
1843
1844static void gen7_oa_enable(struct i915_perf_stream *stream)
1845{
1846 struct drm_i915_private *dev_priv = stream->dev_priv;
1847 struct i915_gem_context *ctx = stream->ctx;
1848 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
1849 bool periodic = dev_priv->perf.oa.periodic;
1850 u32 period_exponent = dev_priv->perf.oa.period_exponent;
1851 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862 gen7_init_oa_buffer(dev_priv);
1863
1864 I915_WRITE(GEN7_OACONTROL,
1865 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
1866 (period_exponent <<
1867 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
1868 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
1869 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
1870 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
1871 GEN7_OACONTROL_ENABLE);
1872}
1873
1874static void gen8_oa_enable(struct i915_perf_stream *stream)
1875{
1876 struct drm_i915_private *dev_priv = stream->dev_priv;
1877 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888 gen8_init_oa_buffer(dev_priv);
1889
1890
1891
1892
1893
1894
1895 I915_WRITE(GEN8_OACONTROL, (report_format <<
1896 GEN8_OA_REPORT_FORMAT_SHIFT) |
1897 GEN8_OA_COUNTER_ENABLE);
1898}
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909static void i915_oa_stream_enable(struct i915_perf_stream *stream)
1910{
1911 struct drm_i915_private *dev_priv = stream->dev_priv;
1912
1913 dev_priv->perf.oa.ops.oa_enable(stream);
1914
1915 if (dev_priv->perf.oa.periodic)
1916 hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
1917 ns_to_ktime(POLL_PERIOD),
1918 HRTIMER_MODE_REL_PINNED);
1919}
1920
1921static void gen7_oa_disable(struct i915_perf_stream *stream)
1922{
1923 struct intel_uncore *uncore = &stream->dev_priv->uncore;
1924
1925 intel_uncore_write(uncore, GEN7_OACONTROL, 0);
1926 if (intel_wait_for_register(uncore,
1927 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
1928 50))
1929 DRM_ERROR("wait for OA to be disabled timed out\n");
1930}
1931
1932static void gen8_oa_disable(struct i915_perf_stream *stream)
1933{
1934 struct intel_uncore *uncore = &stream->dev_priv->uncore;
1935
1936 intel_uncore_write(uncore, GEN8_OACONTROL, 0);
1937 if (intel_wait_for_register(uncore,
1938 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
1939 50))
1940 DRM_ERROR("wait for OA to be disabled timed out\n");
1941}
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951static void i915_oa_stream_disable(struct i915_perf_stream *stream)
1952{
1953 struct drm_i915_private *dev_priv = stream->dev_priv;
1954
1955 dev_priv->perf.oa.ops.oa_disable(stream);
1956
1957 if (dev_priv->perf.oa.periodic)
1958 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
1959}
1960
1961static const struct i915_perf_stream_ops i915_oa_stream_ops = {
1962 .destroy = i915_oa_stream_destroy,
1963 .enable = i915_oa_stream_enable,
1964 .disable = i915_oa_stream_disable,
1965 .wait_unlocked = i915_oa_wait_unlocked,
1966 .poll_wait = i915_oa_poll_wait,
1967 .read = i915_oa_read,
1968};
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988static int i915_oa_stream_init(struct i915_perf_stream *stream,
1989 struct drm_i915_perf_open_param *param,
1990 struct perf_open_properties *props)
1991{
1992 struct drm_i915_private *dev_priv = stream->dev_priv;
1993 int format_size;
1994 int ret;
1995
1996
1997
1998
1999
2000 if (!dev_priv->perf.metrics_kobj) {
2001 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
2002 return -EINVAL;
2003 }
2004
2005 if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
2006 DRM_DEBUG("Only OA report sampling supported\n");
2007 return -EINVAL;
2008 }
2009
2010 if (!dev_priv->perf.oa.ops.enable_metric_set) {
2011 DRM_DEBUG("OA unit not supported\n");
2012 return -ENODEV;
2013 }
2014
2015
2016
2017
2018
2019 if (dev_priv->perf.oa.exclusive_stream) {
2020 DRM_DEBUG("OA unit already in use\n");
2021 return -EBUSY;
2022 }
2023
2024 if (!props->oa_format) {
2025 DRM_DEBUG("OA report format not specified\n");
2026 return -EINVAL;
2027 }
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040 ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
2041 5 * HZ, 10);
2042
2043
2044
2045
2046 ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
2047 RATELIMIT_MSG_ON_RELEASE);
2048
2049 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2050
2051 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
2052
2053 stream->sample_flags |= SAMPLE_OA_REPORT;
2054 stream->sample_size += format_size;
2055
2056 dev_priv->perf.oa.oa_buffer.format_size = format_size;
2057 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
2058 return -EINVAL;
2059
2060 dev_priv->perf.oa.oa_buffer.format =
2061 dev_priv->perf.oa.oa_formats[props->oa_format].format;
2062
2063 dev_priv->perf.oa.periodic = props->oa_periodic;
2064 if (dev_priv->perf.oa.periodic)
2065 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
2066
2067 if (stream->ctx) {
2068 ret = oa_get_render_ctx_id(stream);
2069 if (ret) {
2070 DRM_DEBUG("Invalid context id to filter with\n");
2071 return ret;
2072 }
2073 }
2074
2075 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
2076 if (ret) {
2077 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
2078 goto err_config;
2079 }
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093 stream->wakeref = intel_runtime_pm_get(dev_priv);
2094 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
2095
2096 ret = alloc_oa_buffer(dev_priv);
2097 if (ret)
2098 goto err_oa_buf_alloc;
2099
2100 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
2101 if (ret)
2102 goto err_lock;
2103
2104 stream->ops = &i915_oa_stream_ops;
2105 dev_priv->perf.oa.exclusive_stream = stream;
2106
2107 ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
2108 if (ret) {
2109 DRM_DEBUG("Unable to enable metric set\n");
2110 goto err_enable;
2111 }
2112
2113 mutex_unlock(&dev_priv->drm.struct_mutex);
2114
2115 return 0;
2116
2117err_enable:
2118 dev_priv->perf.oa.exclusive_stream = NULL;
2119 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
2120 mutex_unlock(&dev_priv->drm.struct_mutex);
2121
2122err_lock:
2123 free_oa_buffer(dev_priv);
2124
2125err_oa_buf_alloc:
2126 put_oa_config(dev_priv, stream->oa_config);
2127
2128 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
2129 intel_runtime_pm_put(dev_priv, stream->wakeref);
2130
2131err_config:
2132 if (stream->ctx)
2133 oa_put_render_ctx_id(stream);
2134
2135 return ret;
2136}
2137
2138void i915_oa_init_reg_state(struct intel_engine_cs *engine,
2139 struct intel_context *ce,
2140 u32 *regs)
2141{
2142 struct i915_perf_stream *stream;
2143
2144 if (engine->class != RENDER_CLASS)
2145 return;
2146
2147 stream = engine->i915->perf.oa.exclusive_stream;
2148 if (stream)
2149 gen8_update_reg_state_unlocked(ce, regs, stream->oa_config);
2150}
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
2178 struct file *file,
2179 char __user *buf,
2180 size_t count,
2181 loff_t *ppos)
2182{
2183
2184
2185
2186
2187
2188
2189 size_t offset = 0;
2190 int ret = stream->ops->read(stream, buf, count, &offset);
2191
2192 return offset ?: (ret ?: -EAGAIN);
2193}
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213static ssize_t i915_perf_read(struct file *file,
2214 char __user *buf,
2215 size_t count,
2216 loff_t *ppos)
2217{
2218 struct i915_perf_stream *stream = file->private_data;
2219 struct drm_i915_private *dev_priv = stream->dev_priv;
2220 ssize_t ret;
2221
2222
2223
2224
2225
2226 if (!stream->enabled)
2227 return -EIO;
2228
2229 if (!(file->f_flags & O_NONBLOCK)) {
2230
2231
2232
2233
2234
2235
2236
2237 do {
2238 ret = stream->ops->wait_unlocked(stream);
2239 if (ret)
2240 return ret;
2241
2242 mutex_lock(&dev_priv->perf.lock);
2243 ret = i915_perf_read_locked(stream, file,
2244 buf, count, ppos);
2245 mutex_unlock(&dev_priv->perf.lock);
2246 } while (ret == -EAGAIN);
2247 } else {
2248 mutex_lock(&dev_priv->perf.lock);
2249 ret = i915_perf_read_locked(stream, file, buf, count, ppos);
2250 mutex_unlock(&dev_priv->perf.lock);
2251 }
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261 if (ret >= 0 || ret == -EAGAIN) {
2262
2263
2264
2265 dev_priv->perf.oa.pollin = false;
2266 }
2267
2268 return ret;
2269}
2270
2271static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
2272{
2273 struct drm_i915_private *dev_priv =
2274 container_of(hrtimer, typeof(*dev_priv),
2275 perf.oa.poll_check_timer);
2276
2277 if (oa_buffer_check_unlocked(dev_priv)) {
2278 dev_priv->perf.oa.pollin = true;
2279 wake_up(&dev_priv->perf.oa.poll_wq);
2280 }
2281
2282 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
2283
2284 return HRTIMER_RESTART;
2285}
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
2304 struct i915_perf_stream *stream,
2305 struct file *file,
2306 poll_table *wait)
2307{
2308 __poll_t events = 0;
2309
2310 stream->ops->poll_wait(stream, file, wait);
2311
2312
2313
2314
2315
2316
2317
2318 if (dev_priv->perf.oa.pollin)
2319 events |= EPOLLIN;
2320
2321 return events;
2322}
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
2338{
2339 struct i915_perf_stream *stream = file->private_data;
2340 struct drm_i915_private *dev_priv = stream->dev_priv;
2341 __poll_t ret;
2342
2343 mutex_lock(&dev_priv->perf.lock);
2344 ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
2345 mutex_unlock(&dev_priv->perf.lock);
2346
2347 return ret;
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360static void i915_perf_enable_locked(struct i915_perf_stream *stream)
2361{
2362 if (stream->enabled)
2363 return;
2364
2365
2366 stream->enabled = true;
2367
2368 if (stream->ops->enable)
2369 stream->ops->enable(stream);
2370}
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386static void i915_perf_disable_locked(struct i915_perf_stream *stream)
2387{
2388 if (!stream->enabled)
2389 return;
2390
2391
2392 stream->enabled = false;
2393
2394 if (stream->ops->disable)
2395 stream->ops->disable(stream);
2396}
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
2411 unsigned int cmd,
2412 unsigned long arg)
2413{
2414 switch (cmd) {
2415 case I915_PERF_IOCTL_ENABLE:
2416 i915_perf_enable_locked(stream);
2417 return 0;
2418 case I915_PERF_IOCTL_DISABLE:
2419 i915_perf_disable_locked(stream);
2420 return 0;
2421 }
2422
2423 return -EINVAL;
2424}
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437static long i915_perf_ioctl(struct file *file,
2438 unsigned int cmd,
2439 unsigned long arg)
2440{
2441 struct i915_perf_stream *stream = file->private_data;
2442 struct drm_i915_private *dev_priv = stream->dev_priv;
2443 long ret;
2444
2445 mutex_lock(&dev_priv->perf.lock);
2446 ret = i915_perf_ioctl_locked(stream, cmd, arg);
2447 mutex_unlock(&dev_priv->perf.lock);
2448
2449 return ret;
2450}
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
2463{
2464 if (stream->enabled)
2465 i915_perf_disable_locked(stream);
2466
2467 if (stream->ops->destroy)
2468 stream->ops->destroy(stream);
2469
2470 list_del(&stream->link);
2471
2472 if (stream->ctx)
2473 i915_gem_context_put(stream->ctx);
2474
2475 kfree(stream);
2476}
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489static int i915_perf_release(struct inode *inode, struct file *file)
2490{
2491 struct i915_perf_stream *stream = file->private_data;
2492 struct drm_i915_private *dev_priv = stream->dev_priv;
2493
2494 mutex_lock(&dev_priv->perf.lock);
2495 i915_perf_destroy_locked(stream);
2496 mutex_unlock(&dev_priv->perf.lock);
2497
2498 return 0;
2499}
2500
2501
2502static const struct file_operations fops = {
2503 .owner = THIS_MODULE,
2504 .llseek = no_llseek,
2505 .release = i915_perf_release,
2506 .poll = i915_perf_poll,
2507 .read = i915_perf_read,
2508 .unlocked_ioctl = i915_perf_ioctl,
2509
2510
2511
2512 .compat_ioctl = i915_perf_ioctl,
2513};
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540static int
2541i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
2542 struct drm_i915_perf_open_param *param,
2543 struct perf_open_properties *props,
2544 struct drm_file *file)
2545{
2546 struct i915_gem_context *specific_ctx = NULL;
2547 struct i915_perf_stream *stream = NULL;
2548 unsigned long f_flags = 0;
2549 bool privileged_op = true;
2550 int stream_fd;
2551 int ret;
2552
2553 if (props->single_context) {
2554 u32 ctx_handle = props->ctx_handle;
2555 struct drm_i915_file_private *file_priv = file->driver_priv;
2556
2557 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
2558 if (!specific_ctx) {
2559 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
2560 ctx_handle);
2561 ret = -ENOENT;
2562 goto err;
2563 }
2564 }
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580 if (IS_HASWELL(dev_priv) && specific_ctx)
2581 privileged_op = false;
2582
2583
2584
2585
2586
2587
2588 if (privileged_op &&
2589 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
2590 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
2591 ret = -EACCES;
2592 goto err_ctx;
2593 }
2594
2595 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
2596 if (!stream) {
2597 ret = -ENOMEM;
2598 goto err_ctx;
2599 }
2600
2601 stream->dev_priv = dev_priv;
2602 stream->ctx = specific_ctx;
2603
2604 ret = i915_oa_stream_init(stream, param, props);
2605 if (ret)
2606 goto err_alloc;
2607
2608
2609
2610
2611
2612 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
2613 ret = -ENODEV;
2614 goto err_flags;
2615 }
2616
2617 list_add(&stream->link, &dev_priv->perf.streams);
2618
2619 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
2620 f_flags |= O_CLOEXEC;
2621 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
2622 f_flags |= O_NONBLOCK;
2623
2624 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
2625 if (stream_fd < 0) {
2626 ret = stream_fd;
2627 goto err_open;
2628 }
2629
2630 if (!(param->flags & I915_PERF_FLAG_DISABLED))
2631 i915_perf_enable_locked(stream);
2632
2633 return stream_fd;
2634
2635err_open:
2636 list_del(&stream->link);
2637err_flags:
2638 if (stream->ops->destroy)
2639 stream->ops->destroy(stream);
2640err_alloc:
2641 kfree(stream);
2642err_ctx:
2643 if (specific_ctx)
2644 i915_gem_context_put(specific_ctx);
2645err:
2646 return ret;
2647}
2648
2649static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
2650{
2651 return div64_u64(1000000000ULL * (2ULL << exponent),
2652 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
2653}
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670static int read_properties_unlocked(struct drm_i915_private *dev_priv,
2671 u64 __user *uprops,
2672 u32 n_props,
2673 struct perf_open_properties *props)
2674{
2675 u64 __user *uprop = uprops;
2676 u32 i;
2677
2678 memset(props, 0, sizeof(struct perf_open_properties));
2679
2680 if (!n_props) {
2681 DRM_DEBUG("No i915 perf properties given\n");
2682 return -EINVAL;
2683 }
2684
2685
2686
2687
2688
2689
2690
2691 if (n_props >= DRM_I915_PERF_PROP_MAX) {
2692 DRM_DEBUG("More i915 perf properties specified than exist\n");
2693 return -EINVAL;
2694 }
2695
2696 for (i = 0; i < n_props; i++) {
2697 u64 oa_period, oa_freq_hz;
2698 u64 id, value;
2699 int ret;
2700
2701 ret = get_user(id, uprop);
2702 if (ret)
2703 return ret;
2704
2705 ret = get_user(value, uprop + 1);
2706 if (ret)
2707 return ret;
2708
2709 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
2710 DRM_DEBUG("Unknown i915 perf property ID\n");
2711 return -EINVAL;
2712 }
2713
2714 switch ((enum drm_i915_perf_property_id)id) {
2715 case DRM_I915_PERF_PROP_CTX_HANDLE:
2716 props->single_context = 1;
2717 props->ctx_handle = value;
2718 break;
2719 case DRM_I915_PERF_PROP_SAMPLE_OA:
2720 if (value)
2721 props->sample_flags |= SAMPLE_OA_REPORT;
2722 break;
2723 case DRM_I915_PERF_PROP_OA_METRICS_SET:
2724 if (value == 0) {
2725 DRM_DEBUG("Unknown OA metric set ID\n");
2726 return -EINVAL;
2727 }
2728 props->metrics_set = value;
2729 break;
2730 case DRM_I915_PERF_PROP_OA_FORMAT:
2731 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
2732 DRM_DEBUG("Out-of-range OA report format %llu\n",
2733 value);
2734 return -EINVAL;
2735 }
2736 if (!dev_priv->perf.oa.oa_formats[value].size) {
2737 DRM_DEBUG("Unsupported OA report format %llu\n",
2738 value);
2739 return -EINVAL;
2740 }
2741 props->oa_format = value;
2742 break;
2743 case DRM_I915_PERF_PROP_OA_EXPONENT:
2744 if (value > OA_EXPONENT_MAX) {
2745 DRM_DEBUG("OA timer exponent too high (> %u)\n",
2746 OA_EXPONENT_MAX);
2747 return -EINVAL;
2748 }
2749
2750
2751
2752
2753
2754
2755
2756 BUILD_BUG_ON(sizeof(oa_period) != 8);
2757 oa_period = oa_exponent_to_ns(dev_priv, value);
2758
2759
2760
2761
2762
2763
2764
2765 if (oa_period <= NSEC_PER_SEC) {
2766 u64 tmp = NSEC_PER_SEC;
2767 do_div(tmp, oa_period);
2768 oa_freq_hz = tmp;
2769 } else
2770 oa_freq_hz = 0;
2771
2772 if (oa_freq_hz > i915_oa_max_sample_rate &&
2773 !capable(CAP_SYS_ADMIN)) {
2774 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
2775 i915_oa_max_sample_rate);
2776 return -EACCES;
2777 }
2778
2779 props->oa_periodic = true;
2780 props->oa_period_exponent = value;
2781 break;
2782 case DRM_I915_PERF_PROP_MAX:
2783 MISSING_CASE(id);
2784 return -EINVAL;
2785 }
2786
2787 uprop += 2;
2788 }
2789
2790 return 0;
2791}
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817int i915_perf_open_ioctl(struct drm_device *dev, void *data,
2818 struct drm_file *file)
2819{
2820 struct drm_i915_private *dev_priv = dev->dev_private;
2821 struct drm_i915_perf_open_param *param = data;
2822 struct perf_open_properties props;
2823 u32 known_open_flags;
2824 int ret;
2825
2826 if (!dev_priv->perf.initialized) {
2827 DRM_DEBUG("i915 perf interface not available for this system\n");
2828 return -ENOTSUPP;
2829 }
2830
2831 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
2832 I915_PERF_FLAG_FD_NONBLOCK |
2833 I915_PERF_FLAG_DISABLED;
2834 if (param->flags & ~known_open_flags) {
2835 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
2836 return -EINVAL;
2837 }
2838
2839 ret = read_properties_unlocked(dev_priv,
2840 u64_to_user_ptr(param->properties_ptr),
2841 param->num_properties,
2842 &props);
2843 if (ret)
2844 return ret;
2845
2846 mutex_lock(&dev_priv->perf.lock);
2847 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
2848 mutex_unlock(&dev_priv->perf.lock);
2849
2850 return ret;
2851}
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861void i915_perf_register(struct drm_i915_private *dev_priv)
2862{
2863 int ret;
2864
2865 if (!dev_priv->perf.initialized)
2866 return;
2867
2868
2869
2870
2871
2872 mutex_lock(&dev_priv->perf.lock);
2873
2874 dev_priv->perf.metrics_kobj =
2875 kobject_create_and_add("metrics",
2876 &dev_priv->drm.primary->kdev->kobj);
2877 if (!dev_priv->perf.metrics_kobj)
2878 goto exit;
2879
2880 sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
2881
2882 if (INTEL_GEN(dev_priv) >= 11) {
2883 i915_perf_load_test_config_icl(dev_priv);
2884 } else if (IS_CANNONLAKE(dev_priv)) {
2885 i915_perf_load_test_config_cnl(dev_priv);
2886 } else if (IS_COFFEELAKE(dev_priv)) {
2887 if (IS_CFL_GT2(dev_priv))
2888 i915_perf_load_test_config_cflgt2(dev_priv);
2889 if (IS_CFL_GT3(dev_priv))
2890 i915_perf_load_test_config_cflgt3(dev_priv);
2891 } else if (IS_GEMINILAKE(dev_priv)) {
2892 i915_perf_load_test_config_glk(dev_priv);
2893 } else if (IS_KABYLAKE(dev_priv)) {
2894 if (IS_KBL_GT2(dev_priv))
2895 i915_perf_load_test_config_kblgt2(dev_priv);
2896 else if (IS_KBL_GT3(dev_priv))
2897 i915_perf_load_test_config_kblgt3(dev_priv);
2898 } else if (IS_BROXTON(dev_priv)) {
2899 i915_perf_load_test_config_bxt(dev_priv);
2900 } else if (IS_SKYLAKE(dev_priv)) {
2901 if (IS_SKL_GT2(dev_priv))
2902 i915_perf_load_test_config_sklgt2(dev_priv);
2903 else if (IS_SKL_GT3(dev_priv))
2904 i915_perf_load_test_config_sklgt3(dev_priv);
2905 else if (IS_SKL_GT4(dev_priv))
2906 i915_perf_load_test_config_sklgt4(dev_priv);
2907 } else if (IS_CHERRYVIEW(dev_priv)) {
2908 i915_perf_load_test_config_chv(dev_priv);
2909 } else if (IS_BROADWELL(dev_priv)) {
2910 i915_perf_load_test_config_bdw(dev_priv);
2911 } else if (IS_HASWELL(dev_priv)) {
2912 i915_perf_load_test_config_hsw(dev_priv);
2913}
2914
2915 if (dev_priv->perf.oa.test_config.id == 0)
2916 goto sysfs_error;
2917
2918 ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
2919 &dev_priv->perf.oa.test_config.sysfs_metric);
2920 if (ret)
2921 goto sysfs_error;
2922
2923 atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
2924
2925 goto exit;
2926
2927sysfs_error:
2928 kobject_put(dev_priv->perf.metrics_kobj);
2929 dev_priv->perf.metrics_kobj = NULL;
2930
2931exit:
2932 mutex_unlock(&dev_priv->perf.lock);
2933}
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944void i915_perf_unregister(struct drm_i915_private *dev_priv)
2945{
2946 if (!dev_priv->perf.metrics_kobj)
2947 return;
2948
2949 sysfs_remove_group(dev_priv->perf.metrics_kobj,
2950 &dev_priv->perf.oa.test_config.sysfs_metric);
2951
2952 kobject_put(dev_priv->perf.metrics_kobj);
2953 dev_priv->perf.metrics_kobj = NULL;
2954}
2955
2956static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
2957{
2958 static const i915_reg_t flex_eu_regs[] = {
2959 EU_PERF_CNTL0,
2960 EU_PERF_CNTL1,
2961 EU_PERF_CNTL2,
2962 EU_PERF_CNTL3,
2963 EU_PERF_CNTL4,
2964 EU_PERF_CNTL5,
2965 EU_PERF_CNTL6,
2966 };
2967 int i;
2968
2969 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
2970 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
2971 return true;
2972 }
2973 return false;
2974}
2975
2976static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
2977{
2978 return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
2979 addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
2980 (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
2981 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
2982 (addr >= i915_mmio_reg_offset(OACEC0_0) &&
2983 addr <= i915_mmio_reg_offset(OACEC7_1));
2984}
2985
2986static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
2987{
2988 return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
2989 (addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
2990 addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
2991 (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
2992 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
2993 (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
2994 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
2995}
2996
2997static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
2998{
2999 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3000 addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
3001 (addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
3002 addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
3003}
3004
3005static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3006{
3007 return gen8_is_valid_mux_addr(dev_priv, addr) ||
3008 addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
3009 (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
3010 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
3011}
3012
3013static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3014{
3015 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3016 (addr >= 0x25100 && addr <= 0x2FF90) ||
3017 (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
3018 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
3019 addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
3020}
3021
3022static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3023{
3024 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3025 (addr >= 0x182300 && addr <= 0x1823A4);
3026}
3027
3028static u32 mask_reg_value(u32 reg, u32 val)
3029{
3030
3031
3032
3033
3034 if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
3035 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
3036
3037
3038
3039
3040
3041 if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
3042 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
3043
3044 return val;
3045}
3046
3047static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
3048 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr),
3049 u32 __user *regs,
3050 u32 n_regs)
3051{
3052 struct i915_oa_reg *oa_regs;
3053 int err;
3054 u32 i;
3055
3056 if (!n_regs)
3057 return NULL;
3058
3059 if (!access_ok(regs, n_regs * sizeof(u32) * 2))
3060 return ERR_PTR(-EFAULT);
3061
3062
3063 GEM_BUG_ON(!is_valid);
3064 if (!is_valid)
3065 return ERR_PTR(-EINVAL);
3066
3067 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
3068 if (!oa_regs)
3069 return ERR_PTR(-ENOMEM);
3070
3071 for (i = 0; i < n_regs; i++) {
3072 u32 addr, value;
3073
3074 err = get_user(addr, regs);
3075 if (err)
3076 goto addr_err;
3077
3078 if (!is_valid(dev_priv, addr)) {
3079 DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
3080 err = -EINVAL;
3081 goto addr_err;
3082 }
3083
3084 err = get_user(value, regs + 1);
3085 if (err)
3086 goto addr_err;
3087
3088 oa_regs[i].addr = _MMIO(addr);
3089 oa_regs[i].value = mask_reg_value(addr, value);
3090
3091 regs += 2;
3092 }
3093
3094 return oa_regs;
3095
3096addr_err:
3097 kfree(oa_regs);
3098 return ERR_PTR(err);
3099}
3100
3101static ssize_t show_dynamic_id(struct device *dev,
3102 struct device_attribute *attr,
3103 char *buf)
3104{
3105 struct i915_oa_config *oa_config =
3106 container_of(attr, typeof(*oa_config), sysfs_metric_id);
3107
3108 return sprintf(buf, "%d\n", oa_config->id);
3109}
3110
3111static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
3112 struct i915_oa_config *oa_config)
3113{
3114 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
3115 oa_config->sysfs_metric_id.attr.name = "id";
3116 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
3117 oa_config->sysfs_metric_id.show = show_dynamic_id;
3118 oa_config->sysfs_metric_id.store = NULL;
3119
3120 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
3121 oa_config->attrs[1] = NULL;
3122
3123 oa_config->sysfs_metric.name = oa_config->uuid;
3124 oa_config->sysfs_metric.attrs = oa_config->attrs;
3125
3126 return sysfs_create_group(dev_priv->perf.metrics_kobj,
3127 &oa_config->sysfs_metric);
3128}
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
3144 struct drm_file *file)
3145{
3146 struct drm_i915_private *dev_priv = dev->dev_private;
3147 struct drm_i915_perf_oa_config *args = data;
3148 struct i915_oa_config *oa_config, *tmp;
3149 int err, id;
3150
3151 if (!dev_priv->perf.initialized) {
3152 DRM_DEBUG("i915 perf interface not available for this system\n");
3153 return -ENOTSUPP;
3154 }
3155
3156 if (!dev_priv->perf.metrics_kobj) {
3157 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
3158 return -EINVAL;
3159 }
3160
3161 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
3162 DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
3163 return -EACCES;
3164 }
3165
3166 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
3167 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
3168 (!args->flex_regs_ptr || !args->n_flex_regs)) {
3169 DRM_DEBUG("No OA registers given\n");
3170 return -EINVAL;
3171 }
3172
3173 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
3174 if (!oa_config) {
3175 DRM_DEBUG("Failed to allocate memory for the OA config\n");
3176 return -ENOMEM;
3177 }
3178
3179 atomic_set(&oa_config->ref_count, 1);
3180
3181 if (!uuid_is_valid(args->uuid)) {
3182 DRM_DEBUG("Invalid uuid format for OA config\n");
3183 err = -EINVAL;
3184 goto reg_err;
3185 }
3186
3187
3188
3189
3190 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
3191
3192 oa_config->mux_regs_len = args->n_mux_regs;
3193 oa_config->mux_regs =
3194 alloc_oa_regs(dev_priv,
3195 dev_priv->perf.oa.ops.is_valid_mux_reg,
3196 u64_to_user_ptr(args->mux_regs_ptr),
3197 args->n_mux_regs);
3198
3199 if (IS_ERR(oa_config->mux_regs)) {
3200 DRM_DEBUG("Failed to create OA config for mux_regs\n");
3201 err = PTR_ERR(oa_config->mux_regs);
3202 goto reg_err;
3203 }
3204
3205 oa_config->b_counter_regs_len = args->n_boolean_regs;
3206 oa_config->b_counter_regs =
3207 alloc_oa_regs(dev_priv,
3208 dev_priv->perf.oa.ops.is_valid_b_counter_reg,
3209 u64_to_user_ptr(args->boolean_regs_ptr),
3210 args->n_boolean_regs);
3211
3212 if (IS_ERR(oa_config->b_counter_regs)) {
3213 DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
3214 err = PTR_ERR(oa_config->b_counter_regs);
3215 goto reg_err;
3216 }
3217
3218 if (INTEL_GEN(dev_priv) < 8) {
3219 if (args->n_flex_regs != 0) {
3220 err = -EINVAL;
3221 goto reg_err;
3222 }
3223 } else {
3224 oa_config->flex_regs_len = args->n_flex_regs;
3225 oa_config->flex_regs =
3226 alloc_oa_regs(dev_priv,
3227 dev_priv->perf.oa.ops.is_valid_flex_reg,
3228 u64_to_user_ptr(args->flex_regs_ptr),
3229 args->n_flex_regs);
3230
3231 if (IS_ERR(oa_config->flex_regs)) {
3232 DRM_DEBUG("Failed to create OA config for flex_regs\n");
3233 err = PTR_ERR(oa_config->flex_regs);
3234 goto reg_err;
3235 }
3236 }
3237
3238 err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
3239 if (err)
3240 goto reg_err;
3241
3242
3243
3244
3245 idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) {
3246 if (!strcmp(tmp->uuid, oa_config->uuid)) {
3247 DRM_DEBUG("OA config already exists with this uuid\n");
3248 err = -EADDRINUSE;
3249 goto sysfs_err;
3250 }
3251 }
3252
3253 err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config);
3254 if (err) {
3255 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
3256 goto sysfs_err;
3257 }
3258
3259
3260 oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr,
3261 oa_config, 2,
3262 0, GFP_KERNEL);
3263 if (oa_config->id < 0) {
3264 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
3265 err = oa_config->id;
3266 goto sysfs_err;
3267 }
3268
3269 mutex_unlock(&dev_priv->perf.metrics_lock);
3270
3271 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
3272
3273 return oa_config->id;
3274
3275sysfs_err:
3276 mutex_unlock(&dev_priv->perf.metrics_lock);
3277reg_err:
3278 put_oa_config(dev_priv, oa_config);
3279 DRM_DEBUG("Failed to add new OA config\n");
3280 return err;
3281}
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
3295 struct drm_file *file)
3296{
3297 struct drm_i915_private *dev_priv = dev->dev_private;
3298 u64 *arg = data;
3299 struct i915_oa_config *oa_config;
3300 int ret;
3301
3302 if (!dev_priv->perf.initialized) {
3303 DRM_DEBUG("i915 perf interface not available for this system\n");
3304 return -ENOTSUPP;
3305 }
3306
3307 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
3308 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
3309 return -EACCES;
3310 }
3311
3312 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
3313 if (ret)
3314 goto lock_err;
3315
3316 oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
3317 if (!oa_config) {
3318 DRM_DEBUG("Failed to remove unknown OA config\n");
3319 ret = -ENOENT;
3320 goto config_err;
3321 }
3322
3323 GEM_BUG_ON(*arg != oa_config->id);
3324
3325 sysfs_remove_group(dev_priv->perf.metrics_kobj,
3326 &oa_config->sysfs_metric);
3327
3328 idr_remove(&dev_priv->perf.metrics_idr, *arg);
3329
3330 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
3331
3332 put_oa_config(dev_priv, oa_config);
3333
3334config_err:
3335 mutex_unlock(&dev_priv->perf.metrics_lock);
3336lock_err:
3337 return ret;
3338}
3339
3340static struct ctl_table oa_table[] = {
3341 {
3342 .procname = "perf_stream_paranoid",
3343 .data = &i915_perf_stream_paranoid,
3344 .maxlen = sizeof(i915_perf_stream_paranoid),
3345 .mode = 0644,
3346 .proc_handler = proc_dointvec_minmax,
3347 .extra1 = &zero,
3348 .extra2 = &one,
3349 },
3350 {
3351 .procname = "oa_max_sample_rate",
3352 .data = &i915_oa_max_sample_rate,
3353 .maxlen = sizeof(i915_oa_max_sample_rate),
3354 .mode = 0644,
3355 .proc_handler = proc_dointvec_minmax,
3356 .extra1 = &zero,
3357 .extra2 = &oa_sample_rate_hard_limit,
3358 },
3359 {}
3360};
3361
3362static struct ctl_table i915_root[] = {
3363 {
3364 .procname = "i915",
3365 .maxlen = 0,
3366 .mode = 0555,
3367 .child = oa_table,
3368 },
3369 {}
3370};
3371
3372static struct ctl_table dev_root[] = {
3373 {
3374 .procname = "dev",
3375 .maxlen = 0,
3376 .mode = 0555,
3377 .child = i915_root,
3378 },
3379 {}
3380};
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391void i915_perf_init(struct drm_i915_private *dev_priv)
3392{
3393 if (IS_HASWELL(dev_priv)) {
3394 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3395 gen7_is_valid_b_counter_addr;
3396 dev_priv->perf.oa.ops.is_valid_mux_reg =
3397 hsw_is_valid_mux_addr;
3398 dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
3399 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
3400 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
3401 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
3402 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
3403 dev_priv->perf.oa.ops.read = gen7_oa_read;
3404 dev_priv->perf.oa.ops.oa_hw_tail_read =
3405 gen7_oa_hw_tail_read;
3406
3407 dev_priv->perf.oa.oa_formats = hsw_oa_formats;
3408 } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
3409
3410
3411
3412
3413
3414
3415 dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
3416
3417 dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
3418 dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
3419 dev_priv->perf.oa.ops.read = gen8_oa_read;
3420 dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
3421
3422 if (IS_GEN_RANGE(dev_priv, 8, 9)) {
3423 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3424 gen7_is_valid_b_counter_addr;
3425 dev_priv->perf.oa.ops.is_valid_mux_reg =
3426 gen8_is_valid_mux_addr;
3427 dev_priv->perf.oa.ops.is_valid_flex_reg =
3428 gen8_is_valid_flex_addr;
3429
3430 if (IS_CHERRYVIEW(dev_priv)) {
3431 dev_priv->perf.oa.ops.is_valid_mux_reg =
3432 chv_is_valid_mux_addr;
3433 }
3434
3435 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
3436 dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
3437
3438 if (IS_GEN(dev_priv, 8)) {
3439 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
3440 dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
3441
3442 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
3443 } else {
3444 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
3445 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
3446
3447 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
3448 }
3449 } else if (IS_GEN_RANGE(dev_priv, 10, 11)) {
3450 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3451 gen7_is_valid_b_counter_addr;
3452 dev_priv->perf.oa.ops.is_valid_mux_reg =
3453 gen10_is_valid_mux_addr;
3454 dev_priv->perf.oa.ops.is_valid_flex_reg =
3455 gen8_is_valid_flex_addr;
3456
3457 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
3458 dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
3459
3460 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
3461 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
3462
3463 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
3464 }
3465 }
3466
3467 if (dev_priv->perf.oa.ops.enable_metric_set) {
3468 hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
3469 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3470 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
3471 init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
3472
3473 INIT_LIST_HEAD(&dev_priv->perf.streams);
3474 mutex_init(&dev_priv->perf.lock);
3475 spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
3476
3477 oa_sample_rate_hard_limit = 1000 *
3478 (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
3479 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
3480
3481 mutex_init(&dev_priv->perf.metrics_lock);
3482 idr_init(&dev_priv->perf.metrics_idr);
3483
3484 dev_priv->perf.initialized = true;
3485 }
3486}
3487
3488static int destroy_config(int id, void *p, void *data)
3489{
3490 struct drm_i915_private *dev_priv = data;
3491 struct i915_oa_config *oa_config = p;
3492
3493 put_oa_config(dev_priv, oa_config);
3494
3495 return 0;
3496}
3497
3498
3499
3500
3501
3502void i915_perf_fini(struct drm_i915_private *dev_priv)
3503{
3504 if (!dev_priv->perf.initialized)
3505 return;
3506
3507 idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
3508 idr_destroy(&dev_priv->perf.metrics_idr);
3509
3510 unregister_sysctl_table(dev_priv->perf.sysctl_header);
3511
3512 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
3513
3514 dev_priv->perf.initialized = false;
3515}
3516