1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194#include <linux/anon_inodes.h>
195#include <linux/sizes.h>
196#include <linux/uuid.h>
197
198#include "i915_drv.h"
199#include "i915_oa_hsw.h"
200#include "i915_oa_bdw.h"
201#include "i915_oa_chv.h"
202#include "i915_oa_sklgt2.h"
203#include "i915_oa_sklgt3.h"
204#include "i915_oa_sklgt4.h"
205#include "i915_oa_bxt.h"
206#include "i915_oa_kblgt2.h"
207#include "i915_oa_kblgt3.h"
208#include "i915_oa_glk.h"
209#include "i915_oa_cflgt2.h"
210#include "i915_oa_cflgt3.h"
211#include "i915_oa_cnl.h"
212
213
214
215
216
217#define OA_BUFFER_SIZE SZ_16M
218
219#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262#define OA_TAIL_MARGIN_NSEC 100000ULL
263#define INVALID_TAIL_PTR 0xffffffff
264
265
266
267
268#define POLL_FREQUENCY 200
269#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
270
271
272static int zero;
273static int one = 1;
274static u32 i915_perf_stream_paranoid = true;
275
276
277
278
279
280
281
282
283
284#define OA_EXPONENT_MAX 31
285
286#define INVALID_CTX_ID 0xffffffff
287
288
289#define OAREPORT_REASON_MASK 0x3f
290#define OAREPORT_REASON_SHIFT 19
291#define OAREPORT_REASON_TIMER (1<<0)
292#define OAREPORT_REASON_CTX_SWITCH (1<<3)
293#define OAREPORT_REASON_CLK_RATIO (1<<5)
294
295
296
297
298
299
300
301
302
303static int oa_sample_rate_hard_limit;
304
305
306
307
308
309
310
311static u32 i915_oa_max_sample_rate = 100000;
312
313
314
315
316
317static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
318 [I915_OA_FORMAT_A13] = { 0, 64 },
319 [I915_OA_FORMAT_A29] = { 1, 128 },
320 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
321
322 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
323 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
324 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
325 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
326};
327
328static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
329 [I915_OA_FORMAT_A12] = { 0, 64 },
330 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
331 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
332 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
333};
334
335#define SAMPLE_OA_REPORT (1<<0)
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351struct perf_open_properties {
352 u32 sample_flags;
353
354 u64 single_context:1;
355 u64 ctx_handle;
356
357
358 int metrics_set;
359 int oa_format;
360 bool oa_periodic;
361 int oa_period_exponent;
362};
363
364static void free_oa_config(struct drm_i915_private *dev_priv,
365 struct i915_oa_config *oa_config)
366{
367 if (!PTR_ERR(oa_config->flex_regs))
368 kfree(oa_config->flex_regs);
369 if (!PTR_ERR(oa_config->b_counter_regs))
370 kfree(oa_config->b_counter_regs);
371 if (!PTR_ERR(oa_config->mux_regs))
372 kfree(oa_config->mux_regs);
373 kfree(oa_config);
374}
375
376static void put_oa_config(struct drm_i915_private *dev_priv,
377 struct i915_oa_config *oa_config)
378{
379 if (!atomic_dec_and_test(&oa_config->ref_count))
380 return;
381
382 free_oa_config(dev_priv, oa_config);
383}
384
385static int get_oa_config(struct drm_i915_private *dev_priv,
386 int metrics_set,
387 struct i915_oa_config **out_config)
388{
389 int ret;
390
391 if (metrics_set == 1) {
392 *out_config = &dev_priv->perf.oa.test_config;
393 atomic_inc(&dev_priv->perf.oa.test_config.ref_count);
394 return 0;
395 }
396
397 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
398 if (ret)
399 return ret;
400
401 *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
402 if (!*out_config)
403 ret = -EINVAL;
404 else
405 atomic_inc(&(*out_config)->ref_count);
406
407 mutex_unlock(&dev_priv->perf.metrics_lock);
408
409 return ret;
410}
411
412static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
413{
414 return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
415}
416
417static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
418{
419 u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
420
421 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
422}
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
449{
450 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
451 unsigned long flags;
452 unsigned int aged_idx;
453 u32 head, hw_tail, aged_tail, aging_tail;
454 u64 now;
455
456
457
458
459
460 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
461
462
463
464
465
466 head = dev_priv->perf.oa.oa_buffer.head;
467
468 aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
469 aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
470 aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
471
472 hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
473
474
475
476
477 hw_tail &= ~(report_size - 1);
478
479 now = ktime_get_mono_fast_ns();
480
481
482
483
484
485
486
487
488
489
490
491 if (aging_tail != INVALID_TAIL_PTR &&
492 ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
493 OA_TAIL_MARGIN_NSEC)) {
494
495 aged_idx ^= 1;
496 dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
497
498 aged_tail = aging_tail;
499
500
501 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
502 aging_tail = INVALID_TAIL_PTR;
503 }
504
505
506
507
508
509
510
511
512
513 if (aging_tail == INVALID_TAIL_PTR &&
514 (aged_tail == INVALID_TAIL_PTR ||
515 OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
516 struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
517 u32 gtt_offset = i915_ggtt_offset(vma);
518
519
520
521
522
523 if (hw_tail >= gtt_offset &&
524 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
525 dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
526 aging_tail = hw_tail;
527 dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
528 } else {
529 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
530 hw_tail);
531 }
532 }
533
534 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
535
536 return aged_tail == INVALID_TAIL_PTR ?
537 false : OA_TAKEN(aged_tail, head) >= report_size;
538}
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555static int append_oa_status(struct i915_perf_stream *stream,
556 char __user *buf,
557 size_t count,
558 size_t *offset,
559 enum drm_i915_perf_record_type type)
560{
561 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
562
563 if ((count - *offset) < header.size)
564 return -ENOSPC;
565
566 if (copy_to_user(buf + *offset, &header, sizeof(header)))
567 return -EFAULT;
568
569 (*offset) += header.size;
570
571 return 0;
572}
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591static int append_oa_sample(struct i915_perf_stream *stream,
592 char __user *buf,
593 size_t count,
594 size_t *offset,
595 const u8 *report)
596{
597 struct drm_i915_private *dev_priv = stream->dev_priv;
598 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
599 struct drm_i915_perf_record_header header;
600 u32 sample_flags = stream->sample_flags;
601
602 header.type = DRM_I915_PERF_RECORD_SAMPLE;
603 header.pad = 0;
604 header.size = stream->sample_size;
605
606 if ((count - *offset) < header.size)
607 return -ENOSPC;
608
609 buf += *offset;
610 if (copy_to_user(buf, &header, sizeof(header)))
611 return -EFAULT;
612 buf += sizeof(header);
613
614 if (sample_flags & SAMPLE_OA_REPORT) {
615 if (copy_to_user(buf, report, report_size))
616 return -EFAULT;
617 }
618
619 (*offset) += header.size;
620
621 return 0;
622}
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644static int gen8_append_oa_reports(struct i915_perf_stream *stream,
645 char __user *buf,
646 size_t count,
647 size_t *offset)
648{
649 struct drm_i915_private *dev_priv = stream->dev_priv;
650 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
651 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
652 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
653 u32 mask = (OA_BUFFER_SIZE - 1);
654 size_t start_offset = *offset;
655 unsigned long flags;
656 unsigned int aged_tail_idx;
657 u32 head, tail;
658 u32 taken;
659 int ret = 0;
660
661 if (WARN_ON(!stream->enabled))
662 return -EIO;
663
664 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
665
666 head = dev_priv->perf.oa.oa_buffer.head;
667 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
668 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
669
670 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
671
672
673
674
675
676 if (tail == INVALID_TAIL_PTR)
677 return -EAGAIN;
678
679
680
681
682
683 head -= gtt_offset;
684 tail -= gtt_offset;
685
686
687
688
689
690
691
692
693 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
694 tail > OA_BUFFER_SIZE || tail % report_size,
695 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
696 head, tail))
697 return -EIO;
698
699
700 for (;
701 (taken = OA_TAKEN(tail, head));
702 head = (head + report_size) & mask) {
703 u8 *report = oa_buf_base + head;
704 u32 *report32 = (void *)report;
705 u32 ctx_id;
706 u32 reason;
707
708
709
710
711
712
713
714
715
716
717 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
718 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
719 break;
720 }
721
722
723
724
725
726
727
728
729
730
731 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
732 OAREPORT_REASON_MASK);
733 if (reason == 0) {
734 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
735 DRM_NOTE("Skipping spurious, invalid OA report\n");
736 continue;
737 }
738
739
740
741
742
743
744 ctx_id = report32[2] & 0x1fffff;
745
746
747
748
749
750
751
752
753
754 if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
755 ctx_id = report32[2] = INVALID_CTX_ID;
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788 if (!dev_priv->perf.oa.exclusive_stream->ctx ||
789 dev_priv->perf.oa.specific_ctx_id == ctx_id ||
790 (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
791 dev_priv->perf.oa.specific_ctx_id) ||
792 reason & OAREPORT_REASON_CTX_SWITCH) {
793
794
795
796
797
798 if (dev_priv->perf.oa.exclusive_stream->ctx &&
799 dev_priv->perf.oa.specific_ctx_id != ctx_id) {
800 report32[2] = INVALID_CTX_ID;
801 }
802
803 ret = append_oa_sample(stream, buf, count, offset,
804 report);
805 if (ret)
806 break;
807
808 dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
809 }
810
811
812
813
814
815
816
817
818 report32[0] = 0;
819 }
820
821 if (start_offset != *offset) {
822 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
823
824
825
826
827
828 head += gtt_offset;
829
830 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
831 dev_priv->perf.oa.oa_buffer.head = head;
832
833 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
834 }
835
836 return ret;
837}
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859static int gen8_oa_read(struct i915_perf_stream *stream,
860 char __user *buf,
861 size_t count,
862 size_t *offset)
863{
864 struct drm_i915_private *dev_priv = stream->dev_priv;
865 u32 oastatus;
866 int ret;
867
868 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
869 return -EIO;
870
871 oastatus = I915_READ(GEN8_OASTATUS);
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
888 ret = append_oa_status(stream, buf, count, offset,
889 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
890 if (ret)
891 return ret;
892
893 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
894 dev_priv->perf.oa.period_exponent);
895
896 dev_priv->perf.oa.ops.oa_disable(dev_priv);
897 dev_priv->perf.oa.ops.oa_enable(dev_priv);
898
899
900
901
902
903 oastatus = I915_READ(GEN8_OASTATUS);
904 }
905
906 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
907 ret = append_oa_status(stream, buf, count, offset,
908 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
909 if (ret)
910 return ret;
911 I915_WRITE(GEN8_OASTATUS,
912 oastatus & ~GEN8_OASTATUS_REPORT_LOST);
913 }
914
915 return gen8_append_oa_reports(stream, buf, count, offset);
916}
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938static int gen7_append_oa_reports(struct i915_perf_stream *stream,
939 char __user *buf,
940 size_t count,
941 size_t *offset)
942{
943 struct drm_i915_private *dev_priv = stream->dev_priv;
944 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
945 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
946 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
947 u32 mask = (OA_BUFFER_SIZE - 1);
948 size_t start_offset = *offset;
949 unsigned long flags;
950 unsigned int aged_tail_idx;
951 u32 head, tail;
952 u32 taken;
953 int ret = 0;
954
955 if (WARN_ON(!stream->enabled))
956 return -EIO;
957
958 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
959
960 head = dev_priv->perf.oa.oa_buffer.head;
961 aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
962 tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
963
964 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
965
966
967
968
969 if (tail == INVALID_TAIL_PTR)
970 return -EAGAIN;
971
972
973
974
975 head -= gtt_offset;
976 tail -= gtt_offset;
977
978
979
980
981
982
983
984 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
985 tail > OA_BUFFER_SIZE || tail % report_size,
986 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
987 head, tail))
988 return -EIO;
989
990
991 for (;
992 (taken = OA_TAKEN(tail, head));
993 head = (head + report_size) & mask) {
994 u8 *report = oa_buf_base + head;
995 u32 *report32 = (void *)report;
996
997
998
999
1000
1001
1002
1003
1004
1005 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
1006 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
1007 break;
1008 }
1009
1010
1011
1012
1013
1014
1015
1016 if (report32[0] == 0) {
1017 if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
1018 DRM_NOTE("Skipping spurious, invalid OA report\n");
1019 continue;
1020 }
1021
1022 ret = append_oa_sample(stream, buf, count, offset, report);
1023 if (ret)
1024 break;
1025
1026
1027
1028
1029
1030
1031
1032 report32[0] = 0;
1033 }
1034
1035 if (start_offset != *offset) {
1036 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1037
1038
1039
1040
1041 head += gtt_offset;
1042
1043 I915_WRITE(GEN7_OASTATUS2,
1044 ((head & GEN7_OASTATUS2_HEAD_MASK) |
1045 OA_MEM_SELECT_GGTT));
1046 dev_priv->perf.oa.oa_buffer.head = head;
1047
1048 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1049 }
1050
1051 return ret;
1052}
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070static int gen7_oa_read(struct i915_perf_stream *stream,
1071 char __user *buf,
1072 size_t count,
1073 size_t *offset)
1074{
1075 struct drm_i915_private *dev_priv = stream->dev_priv;
1076 u32 oastatus1;
1077 int ret;
1078
1079 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
1080 return -EIO;
1081
1082 oastatus1 = I915_READ(GEN7_OASTATUS1);
1083
1084
1085
1086
1087
1088
1089 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1112 ret = append_oa_status(stream, buf, count, offset,
1113 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1114 if (ret)
1115 return ret;
1116
1117 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
1118 dev_priv->perf.oa.period_exponent);
1119
1120 dev_priv->perf.oa.ops.oa_disable(dev_priv);
1121 dev_priv->perf.oa.ops.oa_enable(dev_priv);
1122
1123 oastatus1 = I915_READ(GEN7_OASTATUS1);
1124 }
1125
1126 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1127 ret = append_oa_status(stream, buf, count, offset,
1128 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1129 if (ret)
1130 return ret;
1131 dev_priv->perf.oa.gen7_latched_oastatus1 |=
1132 GEN7_OASTATUS1_REPORT_LOST;
1133 }
1134
1135 return gen7_append_oa_reports(stream, buf, count, offset);
1136}
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1153{
1154 struct drm_i915_private *dev_priv = stream->dev_priv;
1155
1156
1157 if (!dev_priv->perf.oa.periodic)
1158 return -EIO;
1159
1160 return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
1161 oa_buffer_check_unlocked(dev_priv));
1162}
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1175 struct file *file,
1176 poll_table *wait)
1177{
1178 struct drm_i915_private *dev_priv = stream->dev_priv;
1179
1180 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
1181}
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195static int i915_oa_read(struct i915_perf_stream *stream,
1196 char __user *buf,
1197 size_t count,
1198 size_t *offset)
1199{
1200 struct drm_i915_private *dev_priv = stream->dev_priv;
1201
1202 return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
1203}
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1216{
1217 struct drm_i915_private *dev_priv = stream->dev_priv;
1218
1219 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
1220 dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
1221 } else {
1222 struct intel_engine_cs *engine = dev_priv->engine[RCS];
1223 struct intel_ring *ring;
1224 int ret;
1225
1226 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1227 if (ret)
1228 return ret;
1229
1230
1231
1232
1233
1234
1235
1236 ring = engine->context_pin(engine, stream->ctx);
1237 mutex_unlock(&dev_priv->drm.struct_mutex);
1238 if (IS_ERR(ring))
1239 return PTR_ERR(ring);
1240
1241
1242
1243
1244
1245
1246
1247 dev_priv->perf.oa.specific_ctx_id =
1248 i915_ggtt_offset(stream->ctx->engine[engine->id].state);
1249 }
1250
1251 return 0;
1252}
1253
1254
1255
1256
1257
1258
1259
1260
1261static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1262{
1263 struct drm_i915_private *dev_priv = stream->dev_priv;
1264
1265 if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
1266 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1267 } else {
1268 struct intel_engine_cs *engine = dev_priv->engine[RCS];
1269
1270 mutex_lock(&dev_priv->drm.struct_mutex);
1271
1272 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1273 engine->context_unpin(engine, stream->ctx);
1274
1275 mutex_unlock(&dev_priv->drm.struct_mutex);
1276 }
1277}
1278
1279static void
1280free_oa_buffer(struct drm_i915_private *i915)
1281{
1282 mutex_lock(&i915->drm.struct_mutex);
1283
1284 i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
1285 i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
1286 i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
1287
1288 i915->perf.oa.oa_buffer.vma = NULL;
1289 i915->perf.oa.oa_buffer.vaddr = NULL;
1290
1291 mutex_unlock(&i915->drm.struct_mutex);
1292}
1293
1294static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1295{
1296 struct drm_i915_private *dev_priv = stream->dev_priv;
1297
1298 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
1299
1300
1301
1302
1303
1304 mutex_lock(&dev_priv->drm.struct_mutex);
1305 dev_priv->perf.oa.exclusive_stream = NULL;
1306 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1307 mutex_unlock(&dev_priv->drm.struct_mutex);
1308
1309 free_oa_buffer(dev_priv);
1310
1311 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1312 intel_runtime_pm_put(dev_priv);
1313
1314 if (stream->ctx)
1315 oa_put_render_ctx_id(stream);
1316
1317 put_oa_config(dev_priv, stream->oa_config);
1318
1319 if (dev_priv->perf.oa.spurious_report_rs.missed) {
1320 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
1321 dev_priv->perf.oa.spurious_report_rs.missed);
1322 }
1323}
1324
1325static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
1326{
1327 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1328 unsigned long flags;
1329
1330 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1331
1332
1333
1334
1335 I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT);
1336 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1337
1338 I915_WRITE(GEN7_OABUFFER, gtt_offset);
1339
1340 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M);
1341
1342
1343 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1344 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1345
1346 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1347
1348
1349
1350
1351
1352 dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1366
1367
1368
1369
1370 dev_priv->perf.oa.pollin = false;
1371}
1372
1373static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
1374{
1375 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1376 unsigned long flags;
1377
1378 spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1379
1380 I915_WRITE(GEN8_OASTATUS, 0);
1381 I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
1382 dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1383
1384 I915_WRITE(GEN8_OABUFFER_UDW, 0);
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394 I915_WRITE(GEN8_OABUFFER, gtt_offset |
1395 OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT);
1396 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1397
1398
1399 dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1400 dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1401
1402
1403
1404
1405
1406
1407 dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
1408
1409 spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1424
1425
1426
1427
1428
1429 dev_priv->perf.oa.pollin = false;
1430}
1431
1432static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
1433{
1434 struct drm_i915_gem_object *bo;
1435 struct i915_vma *vma;
1436 int ret;
1437
1438 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
1439 return -ENODEV;
1440
1441 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1442 if (ret)
1443 return ret;
1444
1445 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1446 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1447
1448 bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
1449 if (IS_ERR(bo)) {
1450 DRM_ERROR("Failed to allocate OA buffer\n");
1451 ret = PTR_ERR(bo);
1452 goto unlock;
1453 }
1454
1455 ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
1456 if (ret)
1457 goto err_unref;
1458
1459
1460 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1461 if (IS_ERR(vma)) {
1462 ret = PTR_ERR(vma);
1463 goto err_unref;
1464 }
1465 dev_priv->perf.oa.oa_buffer.vma = vma;
1466
1467 dev_priv->perf.oa.oa_buffer.vaddr =
1468 i915_gem_object_pin_map(bo, I915_MAP_WB);
1469 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
1470 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
1471 goto err_unpin;
1472 }
1473
1474 dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
1475
1476 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
1477 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
1478 dev_priv->perf.oa.oa_buffer.vaddr);
1479
1480 goto unlock;
1481
1482err_unpin:
1483 __i915_vma_unpin(vma);
1484
1485err_unref:
1486 i915_gem_object_put(bo);
1487
1488 dev_priv->perf.oa.oa_buffer.vaddr = NULL;
1489 dev_priv->perf.oa.oa_buffer.vma = NULL;
1490
1491unlock:
1492 mutex_unlock(&dev_priv->drm.struct_mutex);
1493 return ret;
1494}
1495
1496static void config_oa_regs(struct drm_i915_private *dev_priv,
1497 const struct i915_oa_reg *regs,
1498 u32 n_regs)
1499{
1500 u32 i;
1501
1502 for (i = 0; i < n_regs; i++) {
1503 const struct i915_oa_reg *reg = regs + i;
1504
1505 I915_WRITE(reg->addr, reg->value);
1506 }
1507}
1508
1509static int hsw_enable_metric_set(struct drm_i915_private *dev_priv,
1510 const struct i915_oa_config *oa_config)
1511{
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1522 ~GEN7_DOP_CLOCK_GATE_ENABLE));
1523 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
1524 GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1525
1526 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549 usleep_range(15000, 20000);
1550
1551 config_oa_regs(dev_priv, oa_config->b_counter_regs,
1552 oa_config->b_counter_regs_len);
1553
1554 return 0;
1555}
1556
1557static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
1558{
1559 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
1560 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1561 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
1562 GEN7_DOP_CLOCK_GATE_ENABLE));
1563
1564 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1565 ~GT_NOA_ENABLE));
1566}
1567
1568
1569
1570
1571
1572
1573
1574
1575static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
1576 u32 *reg_state,
1577 const struct i915_oa_config *oa_config)
1578{
1579 struct drm_i915_private *dev_priv = ctx->i915;
1580 u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
1581 u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
1582
1583 u32 flex_mmio[] = {
1584 i915_mmio_reg_offset(EU_PERF_CNTL0),
1585 i915_mmio_reg_offset(EU_PERF_CNTL1),
1586 i915_mmio_reg_offset(EU_PERF_CNTL2),
1587 i915_mmio_reg_offset(EU_PERF_CNTL3),
1588 i915_mmio_reg_offset(EU_PERF_CNTL4),
1589 i915_mmio_reg_offset(EU_PERF_CNTL5),
1590 i915_mmio_reg_offset(EU_PERF_CNTL6),
1591 };
1592 int i;
1593
1594 reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
1595 reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
1596 GEN8_OA_TIMER_PERIOD_SHIFT) |
1597 (dev_priv->perf.oa.periodic ?
1598 GEN8_OA_TIMER_ENABLE : 0) |
1599 GEN8_OA_COUNTER_RESUME;
1600
1601 for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
1602 u32 state_offset = ctx_flexeu0 + i * 2;
1603 u32 mmio = flex_mmio[i];
1604
1605
1606
1607
1608
1609
1610 u32 value = 0;
1611
1612 if (oa_config) {
1613 u32 j;
1614
1615 for (j = 0; j < oa_config->flex_regs_len; j++) {
1616 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1617 value = oa_config->flex_regs[j].value;
1618 break;
1619 }
1620 }
1621 }
1622
1623 reg_state[state_offset] = mmio;
1624 reg_state[state_offset+1] = value;
1625 }
1626}
1627
1628
1629
1630
1631
1632static int gen8_emit_oa_config(struct i915_request *rq,
1633 const struct i915_oa_config *oa_config)
1634{
1635 struct drm_i915_private *dev_priv = rq->i915;
1636
1637 u32 flex_mmio[] = {
1638 i915_mmio_reg_offset(EU_PERF_CNTL0),
1639 i915_mmio_reg_offset(EU_PERF_CNTL1),
1640 i915_mmio_reg_offset(EU_PERF_CNTL2),
1641 i915_mmio_reg_offset(EU_PERF_CNTL3),
1642 i915_mmio_reg_offset(EU_PERF_CNTL4),
1643 i915_mmio_reg_offset(EU_PERF_CNTL5),
1644 i915_mmio_reg_offset(EU_PERF_CNTL6),
1645 };
1646 u32 *cs;
1647 int i;
1648
1649 cs = intel_ring_begin(rq, ARRAY_SIZE(flex_mmio) * 2 + 4);
1650 if (IS_ERR(cs))
1651 return PTR_ERR(cs);
1652
1653 *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
1654
1655 *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
1656 *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
1657 (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
1658 GEN8_OA_COUNTER_RESUME;
1659
1660 for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
1661 u32 mmio = flex_mmio[i];
1662
1663
1664
1665
1666
1667
1668
1669 u32 value = 0;
1670
1671 if (oa_config) {
1672 u32 j;
1673
1674 for (j = 0; j < oa_config->flex_regs_len; j++) {
1675 if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1676 value = oa_config->flex_regs[j].value;
1677 break;
1678 }
1679 }
1680 }
1681
1682 *cs++ = mmio;
1683 *cs++ = value;
1684 }
1685
1686 *cs++ = MI_NOOP;
1687 intel_ring_advance(rq, cs);
1688
1689 return 0;
1690}
1691
1692static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
1693 const struct i915_oa_config *oa_config)
1694{
1695 struct intel_engine_cs *engine = dev_priv->engine[RCS];
1696 struct i915_gem_timeline *timeline;
1697 struct i915_request *rq;
1698 int ret;
1699
1700 lockdep_assert_held(&dev_priv->drm.struct_mutex);
1701
1702 i915_retire_requests(dev_priv);
1703
1704 rq = i915_request_alloc(engine, dev_priv->kernel_context);
1705 if (IS_ERR(rq))
1706 return PTR_ERR(rq);
1707
1708 ret = gen8_emit_oa_config(rq, oa_config);
1709 if (ret) {
1710 i915_request_add(rq);
1711 return ret;
1712 }
1713
1714
1715 list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
1716 struct i915_request *prev;
1717 struct intel_timeline *tl;
1718
1719 tl = &timeline->engine[engine->id];
1720 prev = i915_gem_active_raw(&tl->last_request,
1721 &dev_priv->drm.struct_mutex);
1722 if (prev)
1723 i915_sw_fence_await_sw_fence_gfp(&rq->submit,
1724 &prev->submit,
1725 GFP_KERNEL);
1726 }
1727
1728 i915_request_add(rq);
1729
1730 return 0;
1731}
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
1758 const struct i915_oa_config *oa_config)
1759{
1760 struct i915_gem_context *ctx;
1761 int ret;
1762 unsigned int wait_flags = I915_WAIT_LOCKED;
1763
1764 lockdep_assert_held(&dev_priv->drm.struct_mutex);
1765
1766
1767 ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
1768 if (ret)
1769 goto out;
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784 ret = i915_gem_wait_for_idle(dev_priv, wait_flags);
1785 if (ret)
1786 goto out;
1787
1788
1789 list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
1790 struct intel_context *ce = &ctx->engine[RCS];
1791 u32 *regs;
1792
1793
1794 if (!ce->state)
1795 continue;
1796
1797 regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
1798 if (IS_ERR(regs)) {
1799 ret = PTR_ERR(regs);
1800 goto out;
1801 }
1802
1803 ce->state->obj->mm.dirty = true;
1804 regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
1805
1806 gen8_update_reg_state_unlocked(ctx, regs, oa_config);
1807
1808 i915_gem_object_unpin_map(ce->state->obj);
1809 }
1810
1811 out:
1812 return ret;
1813}
1814
1815static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
1816 const struct i915_oa_config *oa_config)
1817{
1818 int ret;
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843 if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) {
1844 I915_WRITE(GEN8_OA_DEBUG,
1845 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
1846 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
1847 }
1848
1849
1850
1851
1852
1853
1854 ret = gen8_configure_all_contexts(dev_priv, oa_config);
1855 if (ret)
1856 return ret;
1857
1858 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1859
1860 config_oa_regs(dev_priv, oa_config->b_counter_regs,
1861 oa_config->b_counter_regs_len);
1862
1863 return 0;
1864}
1865
1866static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
1867{
1868
1869 gen8_configure_all_contexts(dev_priv, NULL);
1870
1871 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1872 ~GT_NOA_ENABLE));
1873
1874}
1875
1876static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
1877{
1878
1879 gen8_configure_all_contexts(dev_priv, NULL);
1880
1881
1882 I915_WRITE(RPM_CONFIG1,
1883 I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE);
1884}
1885
1886static void gen7_oa_enable(struct drm_i915_private *dev_priv)
1887{
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897 gen7_init_oa_buffer(dev_priv);
1898
1899 if (dev_priv->perf.oa.exclusive_stream->enabled) {
1900 struct i915_gem_context *ctx =
1901 dev_priv->perf.oa.exclusive_stream->ctx;
1902 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
1903
1904 bool periodic = dev_priv->perf.oa.periodic;
1905 u32 period_exponent = dev_priv->perf.oa.period_exponent;
1906 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1907
1908 I915_WRITE(GEN7_OACONTROL,
1909 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
1910 (period_exponent <<
1911 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
1912 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
1913 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
1914 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
1915 GEN7_OACONTROL_ENABLE);
1916 } else
1917 I915_WRITE(GEN7_OACONTROL, 0);
1918}
1919
1920static void gen8_oa_enable(struct drm_i915_private *dev_priv)
1921{
1922 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933 gen8_init_oa_buffer(dev_priv);
1934
1935
1936
1937
1938
1939
1940 I915_WRITE(GEN8_OACONTROL, (report_format <<
1941 GEN8_OA_REPORT_FORMAT_SHIFT) |
1942 GEN8_OA_COUNTER_ENABLE);
1943}
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954static void i915_oa_stream_enable(struct i915_perf_stream *stream)
1955{
1956 struct drm_i915_private *dev_priv = stream->dev_priv;
1957
1958 dev_priv->perf.oa.ops.oa_enable(dev_priv);
1959
1960 if (dev_priv->perf.oa.periodic)
1961 hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
1962 ns_to_ktime(POLL_PERIOD),
1963 HRTIMER_MODE_REL_PINNED);
1964}
1965
1966static void gen7_oa_disable(struct drm_i915_private *dev_priv)
1967{
1968 I915_WRITE(GEN7_OACONTROL, 0);
1969}
1970
1971static void gen8_oa_disable(struct drm_i915_private *dev_priv)
1972{
1973 I915_WRITE(GEN8_OACONTROL, 0);
1974}
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984static void i915_oa_stream_disable(struct i915_perf_stream *stream)
1985{
1986 struct drm_i915_private *dev_priv = stream->dev_priv;
1987
1988 dev_priv->perf.oa.ops.oa_disable(dev_priv);
1989
1990 if (dev_priv->perf.oa.periodic)
1991 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
1992}
1993
1994static const struct i915_perf_stream_ops i915_oa_stream_ops = {
1995 .destroy = i915_oa_stream_destroy,
1996 .enable = i915_oa_stream_enable,
1997 .disable = i915_oa_stream_disable,
1998 .wait_unlocked = i915_oa_wait_unlocked,
1999 .poll_wait = i915_oa_poll_wait,
2000 .read = i915_oa_read,
2001};
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021static int i915_oa_stream_init(struct i915_perf_stream *stream,
2022 struct drm_i915_perf_open_param *param,
2023 struct perf_open_properties *props)
2024{
2025 struct drm_i915_private *dev_priv = stream->dev_priv;
2026 int format_size;
2027 int ret;
2028
2029
2030
2031
2032
2033 if (!dev_priv->perf.metrics_kobj) {
2034 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
2035 return -EINVAL;
2036 }
2037
2038 if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
2039 DRM_DEBUG("Only OA report sampling supported\n");
2040 return -EINVAL;
2041 }
2042
2043 if (!dev_priv->perf.oa.ops.init_oa_buffer) {
2044 DRM_DEBUG("OA unit not supported\n");
2045 return -ENODEV;
2046 }
2047
2048
2049
2050
2051
2052 if (dev_priv->perf.oa.exclusive_stream) {
2053 DRM_DEBUG("OA unit already in use\n");
2054 return -EBUSY;
2055 }
2056
2057 if (!props->oa_format) {
2058 DRM_DEBUG("OA report format not specified\n");
2059 return -EINVAL;
2060 }
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073 ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
2074 5 * HZ, 10);
2075
2076
2077
2078
2079 ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
2080 RATELIMIT_MSG_ON_RELEASE);
2081
2082 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2083
2084 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
2085
2086 stream->sample_flags |= SAMPLE_OA_REPORT;
2087 stream->sample_size += format_size;
2088
2089 dev_priv->perf.oa.oa_buffer.format_size = format_size;
2090 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
2091 return -EINVAL;
2092
2093 dev_priv->perf.oa.oa_buffer.format =
2094 dev_priv->perf.oa.oa_formats[props->oa_format].format;
2095
2096 dev_priv->perf.oa.periodic = props->oa_periodic;
2097 if (dev_priv->perf.oa.periodic)
2098 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
2099
2100 if (stream->ctx) {
2101 ret = oa_get_render_ctx_id(stream);
2102 if (ret)
2103 return ret;
2104 }
2105
2106 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
2107 if (ret)
2108 goto err_config;
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122 intel_runtime_pm_get(dev_priv);
2123 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
2124
2125 ret = alloc_oa_buffer(dev_priv);
2126 if (ret)
2127 goto err_oa_buf_alloc;
2128
2129 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
2130 if (ret)
2131 goto err_lock;
2132
2133 ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
2134 stream->oa_config);
2135 if (ret)
2136 goto err_enable;
2137
2138 stream->ops = &i915_oa_stream_ops;
2139
2140 dev_priv->perf.oa.exclusive_stream = stream;
2141
2142 mutex_unlock(&dev_priv->drm.struct_mutex);
2143
2144 return 0;
2145
2146err_enable:
2147 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
2148 mutex_unlock(&dev_priv->drm.struct_mutex);
2149
2150err_lock:
2151 free_oa_buffer(dev_priv);
2152
2153err_oa_buf_alloc:
2154 put_oa_config(dev_priv, stream->oa_config);
2155
2156 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2157 intel_runtime_pm_put(dev_priv);
2158
2159err_config:
2160 if (stream->ctx)
2161 oa_put_render_ctx_id(stream);
2162
2163 return ret;
2164}
2165
2166void i915_oa_init_reg_state(struct intel_engine_cs *engine,
2167 struct i915_gem_context *ctx,
2168 u32 *reg_state)
2169{
2170 struct i915_perf_stream *stream;
2171
2172 if (engine->id != RCS)
2173 return;
2174
2175 stream = engine->i915->perf.oa.exclusive_stream;
2176 if (stream)
2177 gen8_update_reg_state_unlocked(ctx, reg_state, stream->oa_config);
2178}
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
2206 struct file *file,
2207 char __user *buf,
2208 size_t count,
2209 loff_t *ppos)
2210{
2211
2212
2213
2214
2215
2216
2217 size_t offset = 0;
2218 int ret = stream->ops->read(stream, buf, count, &offset);
2219
2220 return offset ?: (ret ?: -EAGAIN);
2221}
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241static ssize_t i915_perf_read(struct file *file,
2242 char __user *buf,
2243 size_t count,
2244 loff_t *ppos)
2245{
2246 struct i915_perf_stream *stream = file->private_data;
2247 struct drm_i915_private *dev_priv = stream->dev_priv;
2248 ssize_t ret;
2249
2250
2251
2252
2253
2254 if (!stream->enabled)
2255 return -EIO;
2256
2257 if (!(file->f_flags & O_NONBLOCK)) {
2258
2259
2260
2261
2262
2263
2264
2265 do {
2266 ret = stream->ops->wait_unlocked(stream);
2267 if (ret)
2268 return ret;
2269
2270 mutex_lock(&dev_priv->perf.lock);
2271 ret = i915_perf_read_locked(stream, file,
2272 buf, count, ppos);
2273 mutex_unlock(&dev_priv->perf.lock);
2274 } while (ret == -EAGAIN);
2275 } else {
2276 mutex_lock(&dev_priv->perf.lock);
2277 ret = i915_perf_read_locked(stream, file, buf, count, ppos);
2278 mutex_unlock(&dev_priv->perf.lock);
2279 }
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289 if (ret >= 0 || ret == -EAGAIN) {
2290
2291
2292
2293 dev_priv->perf.oa.pollin = false;
2294 }
2295
2296 return ret;
2297}
2298
2299static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
2300{
2301 struct drm_i915_private *dev_priv =
2302 container_of(hrtimer, typeof(*dev_priv),
2303 perf.oa.poll_check_timer);
2304
2305 if (oa_buffer_check_unlocked(dev_priv)) {
2306 dev_priv->perf.oa.pollin = true;
2307 wake_up(&dev_priv->perf.oa.poll_wq);
2308 }
2309
2310 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
2311
2312 return HRTIMER_RESTART;
2313}
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
2332 struct i915_perf_stream *stream,
2333 struct file *file,
2334 poll_table *wait)
2335{
2336 __poll_t events = 0;
2337
2338 stream->ops->poll_wait(stream, file, wait);
2339
2340
2341
2342
2343
2344
2345
2346 if (dev_priv->perf.oa.pollin)
2347 events |= EPOLLIN;
2348
2349 return events;
2350}
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
2366{
2367 struct i915_perf_stream *stream = file->private_data;
2368 struct drm_i915_private *dev_priv = stream->dev_priv;
2369 __poll_t ret;
2370
2371 mutex_lock(&dev_priv->perf.lock);
2372 ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
2373 mutex_unlock(&dev_priv->perf.lock);
2374
2375 return ret;
2376}
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388static void i915_perf_enable_locked(struct i915_perf_stream *stream)
2389{
2390 if (stream->enabled)
2391 return;
2392
2393
2394 stream->enabled = true;
2395
2396 if (stream->ops->enable)
2397 stream->ops->enable(stream);
2398}
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414static void i915_perf_disable_locked(struct i915_perf_stream *stream)
2415{
2416 if (!stream->enabled)
2417 return;
2418
2419
2420 stream->enabled = false;
2421
2422 if (stream->ops->disable)
2423 stream->ops->disable(stream);
2424}
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
2439 unsigned int cmd,
2440 unsigned long arg)
2441{
2442 switch (cmd) {
2443 case I915_PERF_IOCTL_ENABLE:
2444 i915_perf_enable_locked(stream);
2445 return 0;
2446 case I915_PERF_IOCTL_DISABLE:
2447 i915_perf_disable_locked(stream);
2448 return 0;
2449 }
2450
2451 return -EINVAL;
2452}
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465static long i915_perf_ioctl(struct file *file,
2466 unsigned int cmd,
2467 unsigned long arg)
2468{
2469 struct i915_perf_stream *stream = file->private_data;
2470 struct drm_i915_private *dev_priv = stream->dev_priv;
2471 long ret;
2472
2473 mutex_lock(&dev_priv->perf.lock);
2474 ret = i915_perf_ioctl_locked(stream, cmd, arg);
2475 mutex_unlock(&dev_priv->perf.lock);
2476
2477 return ret;
2478}
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
2491{
2492 if (stream->enabled)
2493 i915_perf_disable_locked(stream);
2494
2495 if (stream->ops->destroy)
2496 stream->ops->destroy(stream);
2497
2498 list_del(&stream->link);
2499
2500 if (stream->ctx)
2501 i915_gem_context_put(stream->ctx);
2502
2503 kfree(stream);
2504}
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517static int i915_perf_release(struct inode *inode, struct file *file)
2518{
2519 struct i915_perf_stream *stream = file->private_data;
2520 struct drm_i915_private *dev_priv = stream->dev_priv;
2521
2522 mutex_lock(&dev_priv->perf.lock);
2523 i915_perf_destroy_locked(stream);
2524 mutex_unlock(&dev_priv->perf.lock);
2525
2526 return 0;
2527}
2528
2529
2530static const struct file_operations fops = {
2531 .owner = THIS_MODULE,
2532 .llseek = no_llseek,
2533 .release = i915_perf_release,
2534 .poll = i915_perf_poll,
2535 .read = i915_perf_read,
2536 .unlocked_ioctl = i915_perf_ioctl,
2537
2538
2539
2540 .compat_ioctl = i915_perf_ioctl,
2541};
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568static int
2569i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
2570 struct drm_i915_perf_open_param *param,
2571 struct perf_open_properties *props,
2572 struct drm_file *file)
2573{
2574 struct i915_gem_context *specific_ctx = NULL;
2575 struct i915_perf_stream *stream = NULL;
2576 unsigned long f_flags = 0;
2577 bool privileged_op = true;
2578 int stream_fd;
2579 int ret;
2580
2581 if (props->single_context) {
2582 u32 ctx_handle = props->ctx_handle;
2583 struct drm_i915_file_private *file_priv = file->driver_priv;
2584
2585 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
2586 if (!specific_ctx) {
2587 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
2588 ctx_handle);
2589 ret = -ENOENT;
2590 goto err;
2591 }
2592 }
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608 if (IS_HASWELL(dev_priv) && specific_ctx)
2609 privileged_op = false;
2610
2611
2612
2613
2614
2615
2616 if (privileged_op &&
2617 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
2618 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
2619 ret = -EACCES;
2620 goto err_ctx;
2621 }
2622
2623 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
2624 if (!stream) {
2625 ret = -ENOMEM;
2626 goto err_ctx;
2627 }
2628
2629 stream->dev_priv = dev_priv;
2630 stream->ctx = specific_ctx;
2631
2632 ret = i915_oa_stream_init(stream, param, props);
2633 if (ret)
2634 goto err_alloc;
2635
2636
2637
2638
2639
2640 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
2641 ret = -ENODEV;
2642 goto err_flags;
2643 }
2644
2645 list_add(&stream->link, &dev_priv->perf.streams);
2646
2647 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
2648 f_flags |= O_CLOEXEC;
2649 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
2650 f_flags |= O_NONBLOCK;
2651
2652 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
2653 if (stream_fd < 0) {
2654 ret = stream_fd;
2655 goto err_open;
2656 }
2657
2658 if (!(param->flags & I915_PERF_FLAG_DISABLED))
2659 i915_perf_enable_locked(stream);
2660
2661 return stream_fd;
2662
2663err_open:
2664 list_del(&stream->link);
2665err_flags:
2666 if (stream->ops->destroy)
2667 stream->ops->destroy(stream);
2668err_alloc:
2669 kfree(stream);
2670err_ctx:
2671 if (specific_ctx)
2672 i915_gem_context_put(specific_ctx);
2673err:
2674 return ret;
2675}
2676
2677static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
2678{
2679 return div64_u64(1000000000ULL * (2ULL << exponent),
2680 1000ULL * INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz);
2681}
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698static int read_properties_unlocked(struct drm_i915_private *dev_priv,
2699 u64 __user *uprops,
2700 u32 n_props,
2701 struct perf_open_properties *props)
2702{
2703 u64 __user *uprop = uprops;
2704 u32 i;
2705
2706 memset(props, 0, sizeof(struct perf_open_properties));
2707
2708 if (!n_props) {
2709 DRM_DEBUG("No i915 perf properties given\n");
2710 return -EINVAL;
2711 }
2712
2713
2714
2715
2716
2717
2718
2719 if (n_props >= DRM_I915_PERF_PROP_MAX) {
2720 DRM_DEBUG("More i915 perf properties specified than exist\n");
2721 return -EINVAL;
2722 }
2723
2724 for (i = 0; i < n_props; i++) {
2725 u64 oa_period, oa_freq_hz;
2726 u64 id, value;
2727 int ret;
2728
2729 ret = get_user(id, uprop);
2730 if (ret)
2731 return ret;
2732
2733 ret = get_user(value, uprop + 1);
2734 if (ret)
2735 return ret;
2736
2737 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
2738 DRM_DEBUG("Unknown i915 perf property ID\n");
2739 return -EINVAL;
2740 }
2741
2742 switch ((enum drm_i915_perf_property_id)id) {
2743 case DRM_I915_PERF_PROP_CTX_HANDLE:
2744 props->single_context = 1;
2745 props->ctx_handle = value;
2746 break;
2747 case DRM_I915_PERF_PROP_SAMPLE_OA:
2748 props->sample_flags |= SAMPLE_OA_REPORT;
2749 break;
2750 case DRM_I915_PERF_PROP_OA_METRICS_SET:
2751 if (value == 0) {
2752 DRM_DEBUG("Unknown OA metric set ID\n");
2753 return -EINVAL;
2754 }
2755 props->metrics_set = value;
2756 break;
2757 case DRM_I915_PERF_PROP_OA_FORMAT:
2758 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
2759 DRM_DEBUG("Out-of-range OA report format %llu\n",
2760 value);
2761 return -EINVAL;
2762 }
2763 if (!dev_priv->perf.oa.oa_formats[value].size) {
2764 DRM_DEBUG("Unsupported OA report format %llu\n",
2765 value);
2766 return -EINVAL;
2767 }
2768 props->oa_format = value;
2769 break;
2770 case DRM_I915_PERF_PROP_OA_EXPONENT:
2771 if (value > OA_EXPONENT_MAX) {
2772 DRM_DEBUG("OA timer exponent too high (> %u)\n",
2773 OA_EXPONENT_MAX);
2774 return -EINVAL;
2775 }
2776
2777
2778
2779
2780
2781
2782
2783 BUILD_BUG_ON(sizeof(oa_period) != 8);
2784 oa_period = oa_exponent_to_ns(dev_priv, value);
2785
2786
2787
2788
2789
2790
2791
2792 if (oa_period <= NSEC_PER_SEC) {
2793 u64 tmp = NSEC_PER_SEC;
2794 do_div(tmp, oa_period);
2795 oa_freq_hz = tmp;
2796 } else
2797 oa_freq_hz = 0;
2798
2799 if (oa_freq_hz > i915_oa_max_sample_rate &&
2800 !capable(CAP_SYS_ADMIN)) {
2801 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
2802 i915_oa_max_sample_rate);
2803 return -EACCES;
2804 }
2805
2806 props->oa_periodic = true;
2807 props->oa_period_exponent = value;
2808 break;
2809 case DRM_I915_PERF_PROP_MAX:
2810 MISSING_CASE(id);
2811 return -EINVAL;
2812 }
2813
2814 uprop += 2;
2815 }
2816
2817 return 0;
2818}
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844int i915_perf_open_ioctl(struct drm_device *dev, void *data,
2845 struct drm_file *file)
2846{
2847 struct drm_i915_private *dev_priv = dev->dev_private;
2848 struct drm_i915_perf_open_param *param = data;
2849 struct perf_open_properties props;
2850 u32 known_open_flags;
2851 int ret;
2852
2853 if (!dev_priv->perf.initialized) {
2854 DRM_DEBUG("i915 perf interface not available for this system\n");
2855 return -ENOTSUPP;
2856 }
2857
2858 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
2859 I915_PERF_FLAG_FD_NONBLOCK |
2860 I915_PERF_FLAG_DISABLED;
2861 if (param->flags & ~known_open_flags) {
2862 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
2863 return -EINVAL;
2864 }
2865
2866 ret = read_properties_unlocked(dev_priv,
2867 u64_to_user_ptr(param->properties_ptr),
2868 param->num_properties,
2869 &props);
2870 if (ret)
2871 return ret;
2872
2873 mutex_lock(&dev_priv->perf.lock);
2874 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
2875 mutex_unlock(&dev_priv->perf.lock);
2876
2877 return ret;
2878}
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888void i915_perf_register(struct drm_i915_private *dev_priv)
2889{
2890 int ret;
2891
2892 if (!dev_priv->perf.initialized)
2893 return;
2894
2895
2896
2897
2898
2899 mutex_lock(&dev_priv->perf.lock);
2900
2901 dev_priv->perf.metrics_kobj =
2902 kobject_create_and_add("metrics",
2903 &dev_priv->drm.primary->kdev->kobj);
2904 if (!dev_priv->perf.metrics_kobj)
2905 goto exit;
2906
2907 sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
2908
2909 if (IS_HASWELL(dev_priv)) {
2910 i915_perf_load_test_config_hsw(dev_priv);
2911 } else if (IS_BROADWELL(dev_priv)) {
2912 i915_perf_load_test_config_bdw(dev_priv);
2913 } else if (IS_CHERRYVIEW(dev_priv)) {
2914 i915_perf_load_test_config_chv(dev_priv);
2915 } else if (IS_SKYLAKE(dev_priv)) {
2916 if (IS_SKL_GT2(dev_priv))
2917 i915_perf_load_test_config_sklgt2(dev_priv);
2918 else if (IS_SKL_GT3(dev_priv))
2919 i915_perf_load_test_config_sklgt3(dev_priv);
2920 else if (IS_SKL_GT4(dev_priv))
2921 i915_perf_load_test_config_sklgt4(dev_priv);
2922 } else if (IS_BROXTON(dev_priv)) {
2923 i915_perf_load_test_config_bxt(dev_priv);
2924 } else if (IS_KABYLAKE(dev_priv)) {
2925 if (IS_KBL_GT2(dev_priv))
2926 i915_perf_load_test_config_kblgt2(dev_priv);
2927 else if (IS_KBL_GT3(dev_priv))
2928 i915_perf_load_test_config_kblgt3(dev_priv);
2929 } else if (IS_GEMINILAKE(dev_priv)) {
2930 i915_perf_load_test_config_glk(dev_priv);
2931 } else if (IS_COFFEELAKE(dev_priv)) {
2932 if (IS_CFL_GT2(dev_priv))
2933 i915_perf_load_test_config_cflgt2(dev_priv);
2934 if (IS_CFL_GT3(dev_priv))
2935 i915_perf_load_test_config_cflgt3(dev_priv);
2936 } else if (IS_CANNONLAKE(dev_priv)) {
2937 i915_perf_load_test_config_cnl(dev_priv);
2938 }
2939
2940 if (dev_priv->perf.oa.test_config.id == 0)
2941 goto sysfs_error;
2942
2943 ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
2944 &dev_priv->perf.oa.test_config.sysfs_metric);
2945 if (ret)
2946 goto sysfs_error;
2947
2948 atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
2949
2950 goto exit;
2951
2952sysfs_error:
2953 kobject_put(dev_priv->perf.metrics_kobj);
2954 dev_priv->perf.metrics_kobj = NULL;
2955
2956exit:
2957 mutex_unlock(&dev_priv->perf.lock);
2958}
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969void i915_perf_unregister(struct drm_i915_private *dev_priv)
2970{
2971 if (!dev_priv->perf.metrics_kobj)
2972 return;
2973
2974 sysfs_remove_group(dev_priv->perf.metrics_kobj,
2975 &dev_priv->perf.oa.test_config.sysfs_metric);
2976
2977 kobject_put(dev_priv->perf.metrics_kobj);
2978 dev_priv->perf.metrics_kobj = NULL;
2979}
2980
2981static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr)
2982{
2983 static const i915_reg_t flex_eu_regs[] = {
2984 EU_PERF_CNTL0,
2985 EU_PERF_CNTL1,
2986 EU_PERF_CNTL2,
2987 EU_PERF_CNTL3,
2988 EU_PERF_CNTL4,
2989 EU_PERF_CNTL5,
2990 EU_PERF_CNTL6,
2991 };
2992 int i;
2993
2994 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
2995 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
2996 return true;
2997 }
2998 return false;
2999}
3000
3001static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr)
3002{
3003 return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) &&
3004 addr <= i915_mmio_reg_offset(OASTARTTRIG8)) ||
3005 (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) &&
3006 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) ||
3007 (addr >= i915_mmio_reg_offset(OACEC0_0) &&
3008 addr <= i915_mmio_reg_offset(OACEC7_1));
3009}
3010
3011static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3012{
3013 return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) ||
3014 (addr >= i915_mmio_reg_offset(MICRO_BP0_0) &&
3015 addr <= i915_mmio_reg_offset(NOA_WRITE)) ||
3016 (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) &&
3017 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) ||
3018 (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) &&
3019 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI));
3020}
3021
3022static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3023{
3024 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3025 addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) ||
3026 (addr >= i915_mmio_reg_offset(RPM_CONFIG0) &&
3027 addr <= i915_mmio_reg_offset(NOA_CONFIG(8)));
3028}
3029
3030static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3031{
3032 return gen8_is_valid_mux_addr(dev_priv, addr) ||
3033 (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
3034 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
3035}
3036
3037static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3038{
3039 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3040 (addr >= 0x25100 && addr <= 0x2FF90) ||
3041 (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) &&
3042 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) ||
3043 addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0);
3044}
3045
3046static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
3047{
3048 return gen7_is_valid_mux_addr(dev_priv, addr) ||
3049 (addr >= 0x182300 && addr <= 0x1823A4);
3050}
3051
3052static uint32_t mask_reg_value(u32 reg, u32 val)
3053{
3054
3055
3056
3057
3058 if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg)
3059 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
3060
3061
3062
3063
3064
3065 if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg)
3066 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
3067
3068 return val;
3069}
3070
3071static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
3072 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr),
3073 u32 __user *regs,
3074 u32 n_regs)
3075{
3076 struct i915_oa_reg *oa_regs;
3077 int err;
3078 u32 i;
3079
3080 if (!n_regs)
3081 return NULL;
3082
3083 if (!access_ok(VERIFY_READ, regs, n_regs * sizeof(u32) * 2))
3084 return ERR_PTR(-EFAULT);
3085
3086
3087 GEM_BUG_ON(!is_valid);
3088 if (!is_valid)
3089 return ERR_PTR(-EINVAL);
3090
3091 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
3092 if (!oa_regs)
3093 return ERR_PTR(-ENOMEM);
3094
3095 for (i = 0; i < n_regs; i++) {
3096 u32 addr, value;
3097
3098 err = get_user(addr, regs);
3099 if (err)
3100 goto addr_err;
3101
3102 if (!is_valid(dev_priv, addr)) {
3103 DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
3104 err = -EINVAL;
3105 goto addr_err;
3106 }
3107
3108 err = get_user(value, regs + 1);
3109 if (err)
3110 goto addr_err;
3111
3112 oa_regs[i].addr = _MMIO(addr);
3113 oa_regs[i].value = mask_reg_value(addr, value);
3114
3115 regs += 2;
3116 }
3117
3118 return oa_regs;
3119
3120addr_err:
3121 kfree(oa_regs);
3122 return ERR_PTR(err);
3123}
3124
3125static ssize_t show_dynamic_id(struct device *dev,
3126 struct device_attribute *attr,
3127 char *buf)
3128{
3129 struct i915_oa_config *oa_config =
3130 container_of(attr, typeof(*oa_config), sysfs_metric_id);
3131
3132 return sprintf(buf, "%d\n", oa_config->id);
3133}
3134
3135static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv,
3136 struct i915_oa_config *oa_config)
3137{
3138 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
3139 oa_config->sysfs_metric_id.attr.name = "id";
3140 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
3141 oa_config->sysfs_metric_id.show = show_dynamic_id;
3142 oa_config->sysfs_metric_id.store = NULL;
3143
3144 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
3145 oa_config->attrs[1] = NULL;
3146
3147 oa_config->sysfs_metric.name = oa_config->uuid;
3148 oa_config->sysfs_metric.attrs = oa_config->attrs;
3149
3150 return sysfs_create_group(dev_priv->perf.metrics_kobj,
3151 &oa_config->sysfs_metric);
3152}
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
3168 struct drm_file *file)
3169{
3170 struct drm_i915_private *dev_priv = dev->dev_private;
3171 struct drm_i915_perf_oa_config *args = data;
3172 struct i915_oa_config *oa_config, *tmp;
3173 int err, id;
3174
3175 if (!dev_priv->perf.initialized) {
3176 DRM_DEBUG("i915 perf interface not available for this system\n");
3177 return -ENOTSUPP;
3178 }
3179
3180 if (!dev_priv->perf.metrics_kobj) {
3181 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
3182 return -EINVAL;
3183 }
3184
3185 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
3186 DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
3187 return -EACCES;
3188 }
3189
3190 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
3191 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
3192 (!args->flex_regs_ptr || !args->n_flex_regs)) {
3193 DRM_DEBUG("No OA registers given\n");
3194 return -EINVAL;
3195 }
3196
3197 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
3198 if (!oa_config) {
3199 DRM_DEBUG("Failed to allocate memory for the OA config\n");
3200 return -ENOMEM;
3201 }
3202
3203 atomic_set(&oa_config->ref_count, 1);
3204
3205 if (!uuid_is_valid(args->uuid)) {
3206 DRM_DEBUG("Invalid uuid format for OA config\n");
3207 err = -EINVAL;
3208 goto reg_err;
3209 }
3210
3211
3212
3213
3214 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
3215
3216 oa_config->mux_regs_len = args->n_mux_regs;
3217 oa_config->mux_regs =
3218 alloc_oa_regs(dev_priv,
3219 dev_priv->perf.oa.ops.is_valid_mux_reg,
3220 u64_to_user_ptr(args->mux_regs_ptr),
3221 args->n_mux_regs);
3222
3223 if (IS_ERR(oa_config->mux_regs)) {
3224 DRM_DEBUG("Failed to create OA config for mux_regs\n");
3225 err = PTR_ERR(oa_config->mux_regs);
3226 goto reg_err;
3227 }
3228
3229 oa_config->b_counter_regs_len = args->n_boolean_regs;
3230 oa_config->b_counter_regs =
3231 alloc_oa_regs(dev_priv,
3232 dev_priv->perf.oa.ops.is_valid_b_counter_reg,
3233 u64_to_user_ptr(args->boolean_regs_ptr),
3234 args->n_boolean_regs);
3235
3236 if (IS_ERR(oa_config->b_counter_regs)) {
3237 DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
3238 err = PTR_ERR(oa_config->b_counter_regs);
3239 goto reg_err;
3240 }
3241
3242 if (INTEL_GEN(dev_priv) < 8) {
3243 if (args->n_flex_regs != 0) {
3244 err = -EINVAL;
3245 goto reg_err;
3246 }
3247 } else {
3248 oa_config->flex_regs_len = args->n_flex_regs;
3249 oa_config->flex_regs =
3250 alloc_oa_regs(dev_priv,
3251 dev_priv->perf.oa.ops.is_valid_flex_reg,
3252 u64_to_user_ptr(args->flex_regs_ptr),
3253 args->n_flex_regs);
3254
3255 if (IS_ERR(oa_config->flex_regs)) {
3256 DRM_DEBUG("Failed to create OA config for flex_regs\n");
3257 err = PTR_ERR(oa_config->flex_regs);
3258 goto reg_err;
3259 }
3260 }
3261
3262 err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
3263 if (err)
3264 goto reg_err;
3265
3266
3267
3268
3269 idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) {
3270 if (!strcmp(tmp->uuid, oa_config->uuid)) {
3271 DRM_DEBUG("OA config already exists with this uuid\n");
3272 err = -EADDRINUSE;
3273 goto sysfs_err;
3274 }
3275 }
3276
3277 err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config);
3278 if (err) {
3279 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
3280 goto sysfs_err;
3281 }
3282
3283
3284 oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr,
3285 oa_config, 2,
3286 0, GFP_KERNEL);
3287 if (oa_config->id < 0) {
3288 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
3289 err = oa_config->id;
3290 goto sysfs_err;
3291 }
3292
3293 mutex_unlock(&dev_priv->perf.metrics_lock);
3294
3295 return oa_config->id;
3296
3297sysfs_err:
3298 mutex_unlock(&dev_priv->perf.metrics_lock);
3299reg_err:
3300 put_oa_config(dev_priv, oa_config);
3301 DRM_DEBUG("Failed to add new OA config\n");
3302 return err;
3303}
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
3317 struct drm_file *file)
3318{
3319 struct drm_i915_private *dev_priv = dev->dev_private;
3320 u64 *arg = data;
3321 struct i915_oa_config *oa_config;
3322 int ret;
3323
3324 if (!dev_priv->perf.initialized) {
3325 DRM_DEBUG("i915 perf interface not available for this system\n");
3326 return -ENOTSUPP;
3327 }
3328
3329 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
3330 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
3331 return -EACCES;
3332 }
3333
3334 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
3335 if (ret)
3336 goto lock_err;
3337
3338 oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
3339 if (!oa_config) {
3340 DRM_DEBUG("Failed to remove unknown OA config\n");
3341 ret = -ENOENT;
3342 goto config_err;
3343 }
3344
3345 GEM_BUG_ON(*arg != oa_config->id);
3346
3347 sysfs_remove_group(dev_priv->perf.metrics_kobj,
3348 &oa_config->sysfs_metric);
3349
3350 idr_remove(&dev_priv->perf.metrics_idr, *arg);
3351 put_oa_config(dev_priv, oa_config);
3352
3353config_err:
3354 mutex_unlock(&dev_priv->perf.metrics_lock);
3355lock_err:
3356 return ret;
3357}
3358
3359static struct ctl_table oa_table[] = {
3360 {
3361 .procname = "perf_stream_paranoid",
3362 .data = &i915_perf_stream_paranoid,
3363 .maxlen = sizeof(i915_perf_stream_paranoid),
3364 .mode = 0644,
3365 .proc_handler = proc_dointvec_minmax,
3366 .extra1 = &zero,
3367 .extra2 = &one,
3368 },
3369 {
3370 .procname = "oa_max_sample_rate",
3371 .data = &i915_oa_max_sample_rate,
3372 .maxlen = sizeof(i915_oa_max_sample_rate),
3373 .mode = 0644,
3374 .proc_handler = proc_dointvec_minmax,
3375 .extra1 = &zero,
3376 .extra2 = &oa_sample_rate_hard_limit,
3377 },
3378 {}
3379};
3380
3381static struct ctl_table i915_root[] = {
3382 {
3383 .procname = "i915",
3384 .maxlen = 0,
3385 .mode = 0555,
3386 .child = oa_table,
3387 },
3388 {}
3389};
3390
3391static struct ctl_table dev_root[] = {
3392 {
3393 .procname = "dev",
3394 .maxlen = 0,
3395 .mode = 0555,
3396 .child = i915_root,
3397 },
3398 {}
3399};
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410void i915_perf_init(struct drm_i915_private *dev_priv)
3411{
3412 if (IS_HASWELL(dev_priv)) {
3413 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3414 gen7_is_valid_b_counter_addr;
3415 dev_priv->perf.oa.ops.is_valid_mux_reg =
3416 hsw_is_valid_mux_addr;
3417 dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
3418 dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
3419 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
3420 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
3421 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
3422 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
3423 dev_priv->perf.oa.ops.read = gen7_oa_read;
3424 dev_priv->perf.oa.ops.oa_hw_tail_read =
3425 gen7_oa_hw_tail_read;
3426
3427 dev_priv->perf.oa.oa_formats = hsw_oa_formats;
3428 } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
3429
3430
3431
3432
3433
3434
3435 dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
3436
3437 dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer;
3438 dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
3439 dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
3440 dev_priv->perf.oa.ops.read = gen8_oa_read;
3441 dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
3442
3443 if (IS_GEN8(dev_priv) || IS_GEN9(dev_priv)) {
3444 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3445 gen7_is_valid_b_counter_addr;
3446 dev_priv->perf.oa.ops.is_valid_mux_reg =
3447 gen8_is_valid_mux_addr;
3448 dev_priv->perf.oa.ops.is_valid_flex_reg =
3449 gen8_is_valid_flex_addr;
3450
3451 if (IS_CHERRYVIEW(dev_priv)) {
3452 dev_priv->perf.oa.ops.is_valid_mux_reg =
3453 chv_is_valid_mux_addr;
3454 }
3455
3456 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
3457 dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
3458
3459 if (IS_GEN8(dev_priv)) {
3460 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
3461 dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
3462
3463 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
3464 } else {
3465 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
3466 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
3467
3468 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
3469 }
3470 } else if (IS_GEN10(dev_priv)) {
3471 dev_priv->perf.oa.ops.is_valid_b_counter_reg =
3472 gen7_is_valid_b_counter_addr;
3473 dev_priv->perf.oa.ops.is_valid_mux_reg =
3474 gen10_is_valid_mux_addr;
3475 dev_priv->perf.oa.ops.is_valid_flex_reg =
3476 gen8_is_valid_flex_addr;
3477
3478 dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
3479 dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
3480
3481 dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
3482 dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
3483
3484 dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
3485 }
3486 }
3487
3488 if (dev_priv->perf.oa.ops.enable_metric_set) {
3489 hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
3490 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3491 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
3492 init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
3493
3494 INIT_LIST_HEAD(&dev_priv->perf.streams);
3495 mutex_init(&dev_priv->perf.lock);
3496 spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
3497
3498 oa_sample_rate_hard_limit = 1000 *
3499 (INTEL_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
3500 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
3501
3502 mutex_init(&dev_priv->perf.metrics_lock);
3503 idr_init(&dev_priv->perf.metrics_idr);
3504
3505 dev_priv->perf.initialized = true;
3506 }
3507}
3508
3509static int destroy_config(int id, void *p, void *data)
3510{
3511 struct drm_i915_private *dev_priv = data;
3512 struct i915_oa_config *oa_config = p;
3513
3514 put_oa_config(dev_priv, oa_config);
3515
3516 return 0;
3517}
3518
3519
3520
3521
3522
3523void i915_perf_fini(struct drm_i915_private *dev_priv)
3524{
3525 if (!dev_priv->perf.initialized)
3526 return;
3527
3528 idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
3529 idr_destroy(&dev_priv->perf.metrics_idr);
3530
3531 unregister_sysctl_table(dev_priv->perf.sysctl_header);
3532
3533 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
3534
3535 dev_priv->perf.initialized = false;
3536}
3537