1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194#include <linux/anon_inodes.h>
195#include <linux/sizes.h>
196#include <linux/uuid.h>
197
198#include "gem/i915_gem_context.h"
199#include "gt/intel_engine_pm.h"
200#include "gt/intel_engine_user.h"
201#include "gt/intel_gt.h"
202#include "gt/intel_lrc_reg.h"
203#include "gt/intel_ring.h"
204
205#include "i915_drv.h"
206#include "i915_perf.h"
207
208
209
210
211
212#define OA_BUFFER_SIZE SZ_16M
213
214#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248#define OA_TAIL_MARGIN_NSEC 100000ULL
249#define INVALID_TAIL_PTR 0xffffffff
250
251
252
253
254#define DEFAULT_POLL_FREQUENCY_HZ 200
255#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
256
257
258static u32 i915_perf_stream_paranoid = true;
259
260
261
262
263
264
265
266
267
268#define OA_EXPONENT_MAX 31
269
270#define INVALID_CTX_ID 0xffffffff
271
272
273#define OAREPORT_REASON_MASK 0x3f
274#define OAREPORT_REASON_MASK_EXTENDED 0x7f
275#define OAREPORT_REASON_SHIFT 19
276#define OAREPORT_REASON_TIMER (1<<0)
277#define OAREPORT_REASON_CTX_SWITCH (1<<3)
278#define OAREPORT_REASON_CLK_RATIO (1<<5)
279
280
281
282
283
284
285
286
287
288static int oa_sample_rate_hard_limit;
289
290
291
292
293
294
295
296static u32 i915_oa_max_sample_rate = 100000;
297
298
299
300
301
302static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
303 [I915_OA_FORMAT_A13] = { 0, 64 },
304 [I915_OA_FORMAT_A29] = { 1, 128 },
305 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
306
307 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
308 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
309 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
310 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
311};
312
313static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
314 [I915_OA_FORMAT_A12] = { 0, 64 },
315 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
316 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
317 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
318};
319
320static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
321 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
322};
323
324#define SAMPLE_OA_REPORT (1<<0)
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349struct perf_open_properties {
350 u32 sample_flags;
351
352 u64 single_context:1;
353 u64 hold_preemption:1;
354 u64 ctx_handle;
355
356
357 int metrics_set;
358 int oa_format;
359 bool oa_periodic;
360 int oa_period_exponent;
361
362 struct intel_engine_cs *engine;
363
364 bool has_sseu;
365 struct intel_sseu sseu;
366
367 u64 poll_oa_period;
368};
369
370struct i915_oa_config_bo {
371 struct llist_node node;
372
373 struct i915_oa_config *oa_config;
374 struct i915_vma *vma;
375};
376
377static struct ctl_table_header *sysctl_header;
378
379static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
380
381void i915_oa_config_release(struct kref *ref)
382{
383 struct i915_oa_config *oa_config =
384 container_of(ref, typeof(*oa_config), ref);
385
386 kfree(oa_config->flex_regs);
387 kfree(oa_config->b_counter_regs);
388 kfree(oa_config->mux_regs);
389
390 kfree_rcu(oa_config, rcu);
391}
392
393struct i915_oa_config *
394i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
395{
396 struct i915_oa_config *oa_config;
397
398 rcu_read_lock();
399 oa_config = idr_find(&perf->metrics_idr, metrics_set);
400 if (oa_config)
401 oa_config = i915_oa_config_get(oa_config);
402 rcu_read_unlock();
403
404 return oa_config;
405}
406
407static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
408{
409 i915_oa_config_put(oa_bo->oa_config);
410 i915_vma_put(oa_bo->vma);
411 kfree(oa_bo);
412}
413
414static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
415{
416 struct intel_uncore *uncore = stream->uncore;
417
418 return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
419 GEN12_OAG_OATAILPTR_MASK;
420}
421
422static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
423{
424 struct intel_uncore *uncore = stream->uncore;
425
426 return intel_uncore_read(uncore, GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
427}
428
429static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
430{
431 struct intel_uncore *uncore = stream->uncore;
432 u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
433
434 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
435}
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
462{
463 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
464 int report_size = stream->oa_buffer.format_size;
465 unsigned long flags;
466 bool pollin;
467 u32 hw_tail;
468 u64 now;
469
470
471
472
473
474 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
475
476 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
477
478
479
480
481 hw_tail &= ~(report_size - 1);
482
483 now = ktime_get_mono_fast_ns();
484
485 if (hw_tail == stream->oa_buffer.aging_tail &&
486 (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
487
488
489
490
491 stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
492 } else {
493 u32 head, tail, aged_tail;
494
495
496
497
498
499 head = stream->oa_buffer.head - gtt_offset;
500 aged_tail = stream->oa_buffer.tail - gtt_offset;
501
502 hw_tail -= gtt_offset;
503 tail = hw_tail;
504
505
506
507
508
509
510
511
512
513
514
515
516 while (OA_TAKEN(tail, aged_tail) >= report_size) {
517 u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
518
519 if (report32[0] != 0 || report32[1] != 0)
520 break;
521
522 tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
523 }
524
525 if (OA_TAKEN(hw_tail, tail) > report_size &&
526 __ratelimit(&stream->perf->tail_pointer_race))
527 DRM_NOTE("unlanded report(s) head=0x%x "
528 "tail=0x%x hw_tail=0x%x\n",
529 head, tail, hw_tail);
530
531 stream->oa_buffer.tail = gtt_offset + tail;
532 stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
533 stream->oa_buffer.aging_timestamp = now;
534 }
535
536 pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
537 stream->oa_buffer.head - gtt_offset) >= report_size;
538
539 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
540
541 return pollin;
542}
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559static int append_oa_status(struct i915_perf_stream *stream,
560 char __user *buf,
561 size_t count,
562 size_t *offset,
563 enum drm_i915_perf_record_type type)
564{
565 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
566
567 if ((count - *offset) < header.size)
568 return -ENOSPC;
569
570 if (copy_to_user(buf + *offset, &header, sizeof(header)))
571 return -EFAULT;
572
573 (*offset) += header.size;
574
575 return 0;
576}
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595static int append_oa_sample(struct i915_perf_stream *stream,
596 char __user *buf,
597 size_t count,
598 size_t *offset,
599 const u8 *report)
600{
601 int report_size = stream->oa_buffer.format_size;
602 struct drm_i915_perf_record_header header;
603 u32 sample_flags = stream->sample_flags;
604
605 header.type = DRM_I915_PERF_RECORD_SAMPLE;
606 header.pad = 0;
607 header.size = stream->sample_size;
608
609 if ((count - *offset) < header.size)
610 return -ENOSPC;
611
612 buf += *offset;
613 if (copy_to_user(buf, &header, sizeof(header)))
614 return -EFAULT;
615 buf += sizeof(header);
616
617 if (sample_flags & SAMPLE_OA_REPORT) {
618 if (copy_to_user(buf, report, report_size))
619 return -EFAULT;
620 }
621
622 (*offset) += header.size;
623
624 return 0;
625}
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648static int gen8_append_oa_reports(struct i915_perf_stream *stream,
649 char __user *buf,
650 size_t count,
651 size_t *offset)
652{
653 struct intel_uncore *uncore = stream->uncore;
654 int report_size = stream->oa_buffer.format_size;
655 u8 *oa_buf_base = stream->oa_buffer.vaddr;
656 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
657 u32 mask = (OA_BUFFER_SIZE - 1);
658 size_t start_offset = *offset;
659 unsigned long flags;
660 u32 head, tail;
661 u32 taken;
662 int ret = 0;
663
664 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
665 return -EIO;
666
667 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
668
669 head = stream->oa_buffer.head;
670 tail = stream->oa_buffer.tail;
671
672 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
673
674
675
676
677
678 head -= gtt_offset;
679 tail -= gtt_offset;
680
681
682
683
684
685
686
687
688 if (drm_WARN_ONCE(&uncore->i915->drm,
689 head > OA_BUFFER_SIZE || head % report_size ||
690 tail > OA_BUFFER_SIZE || tail % report_size,
691 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
692 head, tail))
693 return -EIO;
694
695
696 for (;
697 (taken = OA_TAKEN(tail, head));
698 head = (head + report_size) & mask) {
699 u8 *report = oa_buf_base + head;
700 u32 *report32 = (void *)report;
701 u32 ctx_id;
702 u32 reason;
703
704
705
706
707
708
709
710
711
712
713 if (drm_WARN_ON(&uncore->i915->drm,
714 (OA_BUFFER_SIZE - head) < report_size)) {
715 drm_err(&uncore->i915->drm,
716 "Spurious OA head ptr: non-integral report offset\n");
717 break;
718 }
719
720
721
722
723
724
725
726
727
728
729 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
730 (IS_GEN(stream->perf->i915, 12) ?
731 OAREPORT_REASON_MASK_EXTENDED :
732 OAREPORT_REASON_MASK));
733 if (reason == 0) {
734 if (__ratelimit(&stream->perf->spurious_report_rs))
735 DRM_NOTE("Skipping spurious, invalid OA report\n");
736 continue;
737 }
738
739 ctx_id = report32[2] & stream->specific_ctx_id_mask;
740
741
742
743
744
745
746
747
748
749 if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
750 INTEL_GEN(stream->perf->i915) <= 11)
751 ctx_id = report32[2] = INVALID_CTX_ID;
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784 if (!stream->perf->exclusive_stream->ctx ||
785 stream->specific_ctx_id == ctx_id ||
786 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
787 reason & OAREPORT_REASON_CTX_SWITCH) {
788
789
790
791
792
793 if (stream->perf->exclusive_stream->ctx &&
794 stream->specific_ctx_id != ctx_id) {
795 report32[2] = INVALID_CTX_ID;
796 }
797
798 ret = append_oa_sample(stream, buf, count, offset,
799 report);
800 if (ret)
801 break;
802
803 stream->oa_buffer.last_ctx_id = ctx_id;
804 }
805
806
807
808
809
810 report32[0] = 0;
811 report32[1] = 0;
812 }
813
814 if (start_offset != *offset) {
815 i915_reg_t oaheadptr;
816
817 oaheadptr = IS_GEN(stream->perf->i915, 12) ?
818 GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
819
820 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
821
822
823
824
825
826 head += gtt_offset;
827 intel_uncore_write(uncore, oaheadptr,
828 head & GEN12_OAG_OAHEADPTR_MASK);
829 stream->oa_buffer.head = head;
830
831 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
832 }
833
834 return ret;
835}
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857static int gen8_oa_read(struct i915_perf_stream *stream,
858 char __user *buf,
859 size_t count,
860 size_t *offset)
861{
862 struct intel_uncore *uncore = stream->uncore;
863 u32 oastatus;
864 i915_reg_t oastatus_reg;
865 int ret;
866
867 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
868 return -EIO;
869
870 oastatus_reg = IS_GEN(stream->perf->i915, 12) ?
871 GEN12_OAG_OASTATUS : GEN8_OASTATUS;
872
873 oastatus = intel_uncore_read(uncore, oastatus_reg);
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
890 ret = append_oa_status(stream, buf, count, offset,
891 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
892 if (ret)
893 return ret;
894
895 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
896 stream->period_exponent);
897
898 stream->perf->ops.oa_disable(stream);
899 stream->perf->ops.oa_enable(stream);
900
901
902
903
904
905 oastatus = intel_uncore_read(uncore, oastatus_reg);
906 }
907
908 if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
909 ret = append_oa_status(stream, buf, count, offset,
910 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
911 if (ret)
912 return ret;
913
914 intel_uncore_rmw(uncore, oastatus_reg,
915 GEN8_OASTATUS_COUNTER_OVERFLOW |
916 GEN8_OASTATUS_REPORT_LOST,
917 IS_GEN_RANGE(uncore->i915, 8, 11) ?
918 (GEN8_OASTATUS_HEAD_POINTER_WRAP |
919 GEN8_OASTATUS_TAIL_POINTER_WRAP) : 0);
920 }
921
922 return gen8_append_oa_reports(stream, buf, count, offset);
923}
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946static int gen7_append_oa_reports(struct i915_perf_stream *stream,
947 char __user *buf,
948 size_t count,
949 size_t *offset)
950{
951 struct intel_uncore *uncore = stream->uncore;
952 int report_size = stream->oa_buffer.format_size;
953 u8 *oa_buf_base = stream->oa_buffer.vaddr;
954 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
955 u32 mask = (OA_BUFFER_SIZE - 1);
956 size_t start_offset = *offset;
957 unsigned long flags;
958 u32 head, tail;
959 u32 taken;
960 int ret = 0;
961
962 if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
963 return -EIO;
964
965 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
966
967 head = stream->oa_buffer.head;
968 tail = stream->oa_buffer.tail;
969
970 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
971
972
973
974
975 head -= gtt_offset;
976 tail -= gtt_offset;
977
978
979
980
981
982
983
984 if (drm_WARN_ONCE(&uncore->i915->drm,
985 head > OA_BUFFER_SIZE || head % report_size ||
986 tail > OA_BUFFER_SIZE || tail % report_size,
987 "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
988 head, tail))
989 return -EIO;
990
991
992 for (;
993 (taken = OA_TAKEN(tail, head));
994 head = (head + report_size) & mask) {
995 u8 *report = oa_buf_base + head;
996 u32 *report32 = (void *)report;
997
998
999
1000
1001
1002
1003
1004
1005
1006 if (drm_WARN_ON(&uncore->i915->drm,
1007 (OA_BUFFER_SIZE - head) < report_size)) {
1008 drm_err(&uncore->i915->drm,
1009 "Spurious OA head ptr: non-integral report offset\n");
1010 break;
1011 }
1012
1013
1014
1015
1016
1017
1018
1019 if (report32[0] == 0) {
1020 if (__ratelimit(&stream->perf->spurious_report_rs))
1021 DRM_NOTE("Skipping spurious, invalid OA report\n");
1022 continue;
1023 }
1024
1025 ret = append_oa_sample(stream, buf, count, offset, report);
1026 if (ret)
1027 break;
1028
1029
1030
1031
1032 report32[0] = 0;
1033 report32[1] = 0;
1034 }
1035
1036 if (start_offset != *offset) {
1037 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1038
1039
1040
1041
1042 head += gtt_offset;
1043
1044 intel_uncore_write(uncore, GEN7_OASTATUS2,
1045 (head & GEN7_OASTATUS2_HEAD_MASK) |
1046 GEN7_OASTATUS2_MEM_SELECT_GGTT);
1047 stream->oa_buffer.head = head;
1048
1049 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1050 }
1051
1052 return ret;
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071static int gen7_oa_read(struct i915_perf_stream *stream,
1072 char __user *buf,
1073 size_t count,
1074 size_t *offset)
1075{
1076 struct intel_uncore *uncore = stream->uncore;
1077 u32 oastatus1;
1078 int ret;
1079
1080 if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr))
1081 return -EIO;
1082
1083 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
1084
1085
1086
1087
1088
1089
1090 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
1113 ret = append_oa_status(stream, buf, count, offset,
1114 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1115 if (ret)
1116 return ret;
1117
1118 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
1119 stream->period_exponent);
1120
1121 stream->perf->ops.oa_disable(stream);
1122 stream->perf->ops.oa_enable(stream);
1123
1124 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1);
1125 }
1126
1127 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
1128 ret = append_oa_status(stream, buf, count, offset,
1129 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1130 if (ret)
1131 return ret;
1132 stream->perf->gen7_latched_oastatus1 |=
1133 GEN7_OASTATUS1_REPORT_LOST;
1134 }
1135
1136 return gen7_append_oa_reports(stream, buf, count, offset);
1137}
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1154{
1155
1156 if (!stream->periodic)
1157 return -EIO;
1158
1159 return wait_event_interruptible(stream->poll_wq,
1160 oa_buffer_check_unlocked(stream));
1161}
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1174 struct file *file,
1175 poll_table *wait)
1176{
1177 poll_wait(file, &stream->poll_wq, wait);
1178}
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192static int i915_oa_read(struct i915_perf_stream *stream,
1193 char __user *buf,
1194 size_t count,
1195 size_t *offset)
1196{
1197 return stream->perf->ops.read(stream, buf, count, offset);
1198}
1199
1200static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
1201{
1202 struct i915_gem_engines_iter it;
1203 struct i915_gem_context *ctx = stream->ctx;
1204 struct intel_context *ce;
1205 struct i915_gem_ww_ctx ww;
1206 int err = -ENODEV;
1207
1208 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
1209 if (ce->engine != stream->engine)
1210 continue;
1211
1212 err = 0;
1213 break;
1214 }
1215 i915_gem_context_unlock_engines(ctx);
1216
1217 if (err)
1218 return ERR_PTR(err);
1219
1220 i915_gem_ww_ctx_init(&ww, true);
1221retry:
1222
1223
1224
1225
1226 err = intel_context_pin_ww(ce, &ww);
1227 if (err == -EDEADLK) {
1228 err = i915_gem_ww_ctx_backoff(&ww);
1229 if (!err)
1230 goto retry;
1231 }
1232 i915_gem_ww_ctx_fini(&ww);
1233
1234 if (err)
1235 return ERR_PTR(err);
1236
1237 stream->pinned_ctx = ce;
1238 return stream->pinned_ctx;
1239}
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1252{
1253 struct intel_context *ce;
1254
1255 ce = oa_pin_context(stream);
1256 if (IS_ERR(ce))
1257 return PTR_ERR(ce);
1258
1259 switch (INTEL_GEN(ce->engine->i915)) {
1260 case 7: {
1261
1262
1263
1264
1265 stream->specific_ctx_id = i915_ggtt_offset(ce->state);
1266 stream->specific_ctx_id_mask = 0;
1267 break;
1268 }
1269
1270 case 8:
1271 case 9:
1272 case 10:
1273 if (intel_engine_in_execlists_submission_mode(ce->engine)) {
1274 stream->specific_ctx_id_mask =
1275 (1U << GEN8_CTX_ID_WIDTH) - 1;
1276 stream->specific_ctx_id = stream->specific_ctx_id_mask;
1277 } else {
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288 stream->specific_ctx_id = ce->lrc.lrca >> 12;
1289
1290
1291
1292
1293
1294 stream->specific_ctx_id_mask =
1295 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
1296 }
1297 break;
1298
1299 case 11:
1300 case 12: {
1301 stream->specific_ctx_id_mask =
1302 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
1303
1304
1305
1306
1307
1308 stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
1309 break;
1310 }
1311
1312 default:
1313 MISSING_CASE(INTEL_GEN(ce->engine->i915));
1314 }
1315
1316 ce->tag = stream->specific_ctx_id;
1317
1318 drm_dbg(&stream->perf->i915->drm,
1319 "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
1320 stream->specific_ctx_id,
1321 stream->specific_ctx_id_mask);
1322
1323 return 0;
1324}
1325
1326
1327
1328
1329
1330
1331
1332
1333static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1334{
1335 struct intel_context *ce;
1336
1337 ce = fetch_and_zero(&stream->pinned_ctx);
1338 if (ce) {
1339 ce->tag = 0;
1340 intel_context_unpin(ce);
1341 }
1342
1343 stream->specific_ctx_id = INVALID_CTX_ID;
1344 stream->specific_ctx_id_mask = 0;
1345}
1346
1347static void
1348free_oa_buffer(struct i915_perf_stream *stream)
1349{
1350 i915_vma_unpin_and_release(&stream->oa_buffer.vma,
1351 I915_VMA_RELEASE_MAP);
1352
1353 stream->oa_buffer.vaddr = NULL;
1354}
1355
1356static void
1357free_oa_configs(struct i915_perf_stream *stream)
1358{
1359 struct i915_oa_config_bo *oa_bo, *tmp;
1360
1361 i915_oa_config_put(stream->oa_config);
1362 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
1363 free_oa_config_bo(oa_bo);
1364}
1365
1366static void
1367free_noa_wait(struct i915_perf_stream *stream)
1368{
1369 i915_vma_unpin_and_release(&stream->noa_wait, 0);
1370}
1371
1372static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1373{
1374 struct i915_perf *perf = stream->perf;
1375
1376 BUG_ON(stream != perf->exclusive_stream);
1377
1378
1379
1380
1381
1382
1383
1384 WRITE_ONCE(perf->exclusive_stream, NULL);
1385 perf->ops.disable_metric_set(stream);
1386
1387 free_oa_buffer(stream);
1388
1389 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
1390 intel_engine_pm_put(stream->engine);
1391
1392 if (stream->ctx)
1393 oa_put_render_ctx_id(stream);
1394
1395 free_oa_configs(stream);
1396 free_noa_wait(stream);
1397
1398 if (perf->spurious_report_rs.missed) {
1399 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
1400 perf->spurious_report_rs.missed);
1401 }
1402}
1403
1404static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
1405{
1406 struct intel_uncore *uncore = stream->uncore;
1407 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1408 unsigned long flags;
1409
1410 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1411
1412
1413
1414
1415 intel_uncore_write(uncore, GEN7_OASTATUS2,
1416 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT);
1417 stream->oa_buffer.head = gtt_offset;
1418
1419 intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset);
1420
1421 intel_uncore_write(uncore, GEN7_OASTATUS1,
1422 gtt_offset | OABUFFER_SIZE_16M);
1423
1424
1425 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1426 stream->oa_buffer.tail = gtt_offset;
1427
1428 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1429
1430
1431
1432
1433
1434 stream->perf->gen7_latched_oastatus1 = 0;
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1448}
1449
1450static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
1451{
1452 struct intel_uncore *uncore = stream->uncore;
1453 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1454 unsigned long flags;
1455
1456 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1457
1458 intel_uncore_write(uncore, GEN8_OASTATUS, 0);
1459 intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset);
1460 stream->oa_buffer.head = gtt_offset;
1461
1462 intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0);
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472 intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
1473 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1474 intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1475
1476
1477 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1478 stream->oa_buffer.tail = gtt_offset;
1479
1480
1481
1482
1483
1484
1485 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
1486
1487 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1502}
1503
1504static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
1505{
1506 struct intel_uncore *uncore = stream->uncore;
1507 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1508 unsigned long flags;
1509
1510 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
1511
1512 intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
1513 intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
1514 gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
1515 stream->oa_buffer.head = gtt_offset;
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
1526 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
1527 intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
1528 gtt_offset & GEN12_OAG_OATAILPTR_MASK);
1529
1530
1531 stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
1532 stream->oa_buffer.tail = gtt_offset;
1533
1534
1535
1536
1537
1538
1539 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID;
1540
1541 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555 memset(stream->oa_buffer.vaddr, 0,
1556 stream->oa_buffer.vma->size);
1557}
1558
1559static int alloc_oa_buffer(struct i915_perf_stream *stream)
1560{
1561 struct drm_i915_private *i915 = stream->perf->i915;
1562 struct drm_i915_gem_object *bo;
1563 struct i915_vma *vma;
1564 int ret;
1565
1566 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma))
1567 return -ENODEV;
1568
1569 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1570 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1571
1572 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
1573 if (IS_ERR(bo)) {
1574 drm_err(&i915->drm, "Failed to allocate OA buffer\n");
1575 return PTR_ERR(bo);
1576 }
1577
1578 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
1579
1580
1581 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1582 if (IS_ERR(vma)) {
1583 ret = PTR_ERR(vma);
1584 goto err_unref;
1585 }
1586 stream->oa_buffer.vma = vma;
1587
1588 stream->oa_buffer.vaddr =
1589 i915_gem_object_pin_map(bo, I915_MAP_WB);
1590 if (IS_ERR(stream->oa_buffer.vaddr)) {
1591 ret = PTR_ERR(stream->oa_buffer.vaddr);
1592 goto err_unpin;
1593 }
1594
1595 return 0;
1596
1597err_unpin:
1598 __i915_vma_unpin(vma);
1599
1600err_unref:
1601 i915_gem_object_put(bo);
1602
1603 stream->oa_buffer.vaddr = NULL;
1604 stream->oa_buffer.vma = NULL;
1605
1606 return ret;
1607}
1608
1609static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
1610 bool save, i915_reg_t reg, u32 offset,
1611 u32 dword_count)
1612{
1613 u32 cmd;
1614 u32 d;
1615
1616 cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
1617 cmd |= MI_SRM_LRM_GLOBAL_GTT;
1618 if (INTEL_GEN(stream->perf->i915) >= 8)
1619 cmd++;
1620
1621 for (d = 0; d < dword_count; d++) {
1622 *cs++ = cmd;
1623 *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
1624 *cs++ = intel_gt_scratch_offset(stream->engine->gt,
1625 offset) + 4 * d;
1626 *cs++ = 0;
1627 }
1628
1629 return cs;
1630}
1631
1632static int alloc_noa_wait(struct i915_perf_stream *stream)
1633{
1634 struct drm_i915_private *i915 = stream->perf->i915;
1635 struct drm_i915_gem_object *bo;
1636 struct i915_vma *vma;
1637 const u64 delay_ticks = 0xffffffffffffffff -
1638 i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay));
1639 const u32 base = stream->engine->mmio_base;
1640#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
1641 u32 *batch, *ts0, *cs, *jump;
1642 int ret, i;
1643 enum {
1644 START_TS,
1645 NOW_TS,
1646 DELTA_TS,
1647 JUMP_PREDICATE,
1648 DELTA_TARGET,
1649 N_CS_GPR
1650 };
1651
1652 bo = i915_gem_object_create_internal(i915, 4096);
1653 if (IS_ERR(bo)) {
1654 drm_err(&i915->drm,
1655 "Failed to allocate NOA wait batchbuffer\n");
1656 return PTR_ERR(bo);
1657 }
1658
1659
1660
1661
1662
1663
1664 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
1665 if (IS_ERR(vma)) {
1666 ret = PTR_ERR(vma);
1667 goto err_unref;
1668 }
1669
1670 batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
1671 if (IS_ERR(batch)) {
1672 ret = PTR_ERR(batch);
1673 goto err_unpin;
1674 }
1675
1676
1677 for (i = 0; i < N_CS_GPR; i++)
1678 cs = save_restore_register(
1679 stream, cs, true , CS_GPR(i),
1680 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1681 cs = save_restore_register(
1682 stream, cs, true , MI_PREDICATE_RESULT_1,
1683 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1684
1685
1686 ts0 = cs;
1687
1688
1689
1690
1691
1692
1693 *cs++ = MI_LOAD_REGISTER_IMM(1);
1694 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
1695 *cs++ = 0;
1696 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1697 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
1698 *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
1699
1700
1701
1702
1703
1704 jump = cs;
1705
1706
1707
1708
1709
1710
1711 *cs++ = MI_LOAD_REGISTER_IMM(1);
1712 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
1713 *cs++ = 0;
1714 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1715 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
1716 *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
1717
1718
1719
1720
1721
1722 *cs++ = MI_MATH(5);
1723 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
1724 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
1725 *cs++ = MI_MATH_SUB;
1726 *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
1727 *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
1728
1729
1730
1731
1732
1733
1734 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1735 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
1736 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
1737
1738
1739 *cs++ = (INTEL_GEN(i915) < 8 ?
1740 MI_BATCH_BUFFER_START :
1741 MI_BATCH_BUFFER_START_GEN8) |
1742 MI_BATCH_PREDICATE;
1743 *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
1744 *cs++ = 0;
1745
1746
1747
1748
1749
1750
1751
1752
1753 *cs++ = MI_LOAD_REGISTER_IMM(2);
1754 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
1755 *cs++ = lower_32_bits(delay_ticks);
1756 *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
1757 *cs++ = upper_32_bits(delay_ticks);
1758
1759 *cs++ = MI_MATH(4);
1760 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
1761 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
1762 *cs++ = MI_MATH_ADD;
1763 *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
1764
1765 *cs++ = MI_ARB_CHECK;
1766
1767
1768
1769
1770
1771 *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
1772 *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
1773 *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
1774
1775
1776 *cs++ = (INTEL_GEN(i915) < 8 ?
1777 MI_BATCH_BUFFER_START :
1778 MI_BATCH_BUFFER_START_GEN8) |
1779 MI_BATCH_PREDICATE;
1780 *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
1781 *cs++ = 0;
1782
1783
1784 for (i = 0; i < N_CS_GPR; i++)
1785 cs = save_restore_register(
1786 stream, cs, false , CS_GPR(i),
1787 INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1788 cs = save_restore_register(
1789 stream, cs, false , MI_PREDICATE_RESULT_1,
1790 INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1791
1792
1793 *cs++ = MI_BATCH_BUFFER_END;
1794
1795 GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
1796
1797 i915_gem_object_flush_map(bo);
1798 __i915_gem_object_release_map(bo);
1799
1800 stream->noa_wait = vma;
1801 return 0;
1802
1803err_unpin:
1804 i915_vma_unpin_and_release(&vma, 0);
1805err_unref:
1806 i915_gem_object_put(bo);
1807 return ret;
1808}
1809
1810static u32 *write_cs_mi_lri(u32 *cs,
1811 const struct i915_oa_reg *reg_data,
1812 u32 n_regs)
1813{
1814 u32 i;
1815
1816 for (i = 0; i < n_regs; i++) {
1817 if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
1818 u32 n_lri = min_t(u32,
1819 n_regs - i,
1820 MI_LOAD_REGISTER_IMM_MAX_REGS);
1821
1822 *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
1823 }
1824 *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
1825 *cs++ = reg_data[i].value;
1826 }
1827
1828 return cs;
1829}
1830
1831static int num_lri_dwords(int num_regs)
1832{
1833 int count = 0;
1834
1835 if (num_regs > 0) {
1836 count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
1837 count += num_regs * 2;
1838 }
1839
1840 return count;
1841}
1842
1843static struct i915_oa_config_bo *
1844alloc_oa_config_buffer(struct i915_perf_stream *stream,
1845 struct i915_oa_config *oa_config)
1846{
1847 struct drm_i915_gem_object *obj;
1848 struct i915_oa_config_bo *oa_bo;
1849 size_t config_length = 0;
1850 u32 *cs;
1851 int err;
1852
1853 oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
1854 if (!oa_bo)
1855 return ERR_PTR(-ENOMEM);
1856
1857 config_length += num_lri_dwords(oa_config->mux_regs_len);
1858 config_length += num_lri_dwords(oa_config->b_counter_regs_len);
1859 config_length += num_lri_dwords(oa_config->flex_regs_len);
1860 config_length += 3;
1861 config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
1862
1863 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
1864 if (IS_ERR(obj)) {
1865 err = PTR_ERR(obj);
1866 goto err_free;
1867 }
1868
1869 cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
1870 if (IS_ERR(cs)) {
1871 err = PTR_ERR(cs);
1872 goto err_oa_bo;
1873 }
1874
1875 cs = write_cs_mi_lri(cs,
1876 oa_config->mux_regs,
1877 oa_config->mux_regs_len);
1878 cs = write_cs_mi_lri(cs,
1879 oa_config->b_counter_regs,
1880 oa_config->b_counter_regs_len);
1881 cs = write_cs_mi_lri(cs,
1882 oa_config->flex_regs,
1883 oa_config->flex_regs_len);
1884
1885
1886 *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
1887 MI_BATCH_BUFFER_START :
1888 MI_BATCH_BUFFER_START_GEN8);
1889 *cs++ = i915_ggtt_offset(stream->noa_wait);
1890 *cs++ = 0;
1891
1892 i915_gem_object_flush_map(obj);
1893 __i915_gem_object_release_map(obj);
1894
1895 oa_bo->vma = i915_vma_instance(obj,
1896 &stream->engine->gt->ggtt->vm,
1897 NULL);
1898 if (IS_ERR(oa_bo->vma)) {
1899 err = PTR_ERR(oa_bo->vma);
1900 goto err_oa_bo;
1901 }
1902
1903 oa_bo->oa_config = i915_oa_config_get(oa_config);
1904 llist_add(&oa_bo->node, &stream->oa_config_bos);
1905
1906 return oa_bo;
1907
1908err_oa_bo:
1909 i915_gem_object_put(obj);
1910err_free:
1911 kfree(oa_bo);
1912 return ERR_PTR(err);
1913}
1914
1915static struct i915_vma *
1916get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
1917{
1918 struct i915_oa_config_bo *oa_bo;
1919
1920
1921
1922
1923
1924 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
1925 if (oa_bo->oa_config == oa_config &&
1926 memcmp(oa_bo->oa_config->uuid,
1927 oa_config->uuid,
1928 sizeof(oa_config->uuid)) == 0)
1929 goto out;
1930 }
1931
1932 oa_bo = alloc_oa_config_buffer(stream, oa_config);
1933 if (IS_ERR(oa_bo))
1934 return ERR_CAST(oa_bo);
1935
1936out:
1937 return i915_vma_get(oa_bo->vma);
1938}
1939
1940static int
1941emit_oa_config(struct i915_perf_stream *stream,
1942 struct i915_oa_config *oa_config,
1943 struct intel_context *ce,
1944 struct i915_active *active)
1945{
1946 struct i915_request *rq;
1947 struct i915_vma *vma;
1948 struct i915_gem_ww_ctx ww;
1949 int err;
1950
1951 vma = get_oa_vma(stream, oa_config);
1952 if (IS_ERR(vma))
1953 return PTR_ERR(vma);
1954
1955 i915_gem_ww_ctx_init(&ww, true);
1956retry:
1957 err = i915_gem_object_lock(vma->obj, &ww);
1958 if (err)
1959 goto err;
1960
1961 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
1962 if (err)
1963 goto err;
1964
1965 intel_engine_pm_get(ce->engine);
1966 rq = i915_request_create(ce);
1967 intel_engine_pm_put(ce->engine);
1968 if (IS_ERR(rq)) {
1969 err = PTR_ERR(rq);
1970 goto err_vma_unpin;
1971 }
1972
1973 if (!IS_ERR_OR_NULL(active)) {
1974
1975 err = i915_request_await_active(rq, active,
1976 I915_ACTIVE_AWAIT_ACTIVE);
1977 if (err)
1978 goto err_add_request;
1979
1980 err = i915_active_add_request(active, rq);
1981 if (err)
1982 goto err_add_request;
1983 }
1984
1985 err = i915_request_await_object(rq, vma->obj, 0);
1986 if (!err)
1987 err = i915_vma_move_to_active(vma, rq, 0);
1988 if (err)
1989 goto err_add_request;
1990
1991 err = rq->engine->emit_bb_start(rq,
1992 vma->node.start, 0,
1993 I915_DISPATCH_SECURE);
1994 if (err)
1995 goto err_add_request;
1996
1997err_add_request:
1998 i915_request_add(rq);
1999err_vma_unpin:
2000 i915_vma_unpin(vma);
2001err:
2002 if (err == -EDEADLK) {
2003 err = i915_gem_ww_ctx_backoff(&ww);
2004 if (!err)
2005 goto retry;
2006 }
2007
2008 i915_gem_ww_ctx_fini(&ww);
2009 i915_vma_put(vma);
2010 return err;
2011}
2012
2013static struct intel_context *oa_context(struct i915_perf_stream *stream)
2014{
2015 return stream->pinned_ctx ?: stream->engine->kernel_context;
2016}
2017
2018static int
2019hsw_enable_metric_set(struct i915_perf_stream *stream,
2020 struct i915_active *active)
2021{
2022 struct intel_uncore *uncore = stream->uncore;
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2035 GEN7_DOP_CLOCK_GATE_ENABLE, 0);
2036 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2037 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
2038
2039 return emit_oa_config(stream,
2040 stream->oa_config, oa_context(stream),
2041 active);
2042}
2043
2044static void hsw_disable_metric_set(struct i915_perf_stream *stream)
2045{
2046 struct intel_uncore *uncore = stream->uncore;
2047
2048 intel_uncore_rmw(uncore, GEN6_UCGCTL1,
2049 GEN6_CSUNIT_CLOCK_GATE_DISABLE, 0);
2050 intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
2051 0, GEN7_DOP_CLOCK_GATE_ENABLE);
2052
2053 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
2054}
2055
2056static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
2057 i915_reg_t reg)
2058{
2059 u32 mmio = i915_mmio_reg_offset(reg);
2060 int i;
2061
2062
2063
2064
2065
2066
2067 if (!oa_config)
2068 return 0;
2069
2070 for (i = 0; i < oa_config->flex_regs_len; i++) {
2071 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
2072 return oa_config->flex_regs[i].value;
2073 }
2074
2075 return 0;
2076}
2077
2078
2079
2080
2081
2082
2083
2084static void
2085gen8_update_reg_state_unlocked(const struct intel_context *ce,
2086 const struct i915_perf_stream *stream)
2087{
2088 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2089 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2090
2091 i915_reg_t flex_regs[] = {
2092 EU_PERF_CNTL0,
2093 EU_PERF_CNTL1,
2094 EU_PERF_CNTL2,
2095 EU_PERF_CNTL3,
2096 EU_PERF_CNTL4,
2097 EU_PERF_CNTL5,
2098 EU_PERF_CNTL6,
2099 };
2100 u32 *reg_state = ce->lrc_reg_state;
2101 int i;
2102
2103 reg_state[ctx_oactxctrl + 1] =
2104 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2105 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2106 GEN8_OA_COUNTER_RESUME;
2107
2108 for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
2109 reg_state[ctx_flexeu0 + i * 2 + 1] =
2110 oa_config_flex_reg(stream->oa_config, flex_regs[i]);
2111}
2112
2113struct flex {
2114 i915_reg_t reg;
2115 u32 offset;
2116 u32 value;
2117};
2118
2119static int
2120gen8_store_flex(struct i915_request *rq,
2121 struct intel_context *ce,
2122 const struct flex *flex, unsigned int count)
2123{
2124 u32 offset;
2125 u32 *cs;
2126
2127 cs = intel_ring_begin(rq, 4 * count);
2128 if (IS_ERR(cs))
2129 return PTR_ERR(cs);
2130
2131 offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET;
2132 do {
2133 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
2134 *cs++ = offset + flex->offset * sizeof(u32);
2135 *cs++ = 0;
2136 *cs++ = flex->value;
2137 } while (flex++, --count);
2138
2139 intel_ring_advance(rq, cs);
2140
2141 return 0;
2142}
2143
2144static int
2145gen8_load_flex(struct i915_request *rq,
2146 struct intel_context *ce,
2147 const struct flex *flex, unsigned int count)
2148{
2149 u32 *cs;
2150
2151 GEM_BUG_ON(!count || count > 63);
2152
2153 cs = intel_ring_begin(rq, 2 * count + 2);
2154 if (IS_ERR(cs))
2155 return PTR_ERR(cs);
2156
2157 *cs++ = MI_LOAD_REGISTER_IMM(count);
2158 do {
2159 *cs++ = i915_mmio_reg_offset(flex->reg);
2160 *cs++ = flex->value;
2161 } while (flex++, --count);
2162 *cs++ = MI_NOOP;
2163
2164 intel_ring_advance(rq, cs);
2165
2166 return 0;
2167}
2168
2169static int gen8_modify_context(struct intel_context *ce,
2170 const struct flex *flex, unsigned int count)
2171{
2172 struct i915_request *rq;
2173 int err;
2174
2175 rq = intel_engine_create_kernel_request(ce->engine);
2176 if (IS_ERR(rq))
2177 return PTR_ERR(rq);
2178
2179
2180 err = intel_context_prepare_remote_request(ce, rq);
2181 if (err == 0)
2182 err = gen8_store_flex(rq, ce, flex, count);
2183
2184 i915_request_add(rq);
2185 return err;
2186}
2187
2188static int
2189gen8_modify_self(struct intel_context *ce,
2190 const struct flex *flex, unsigned int count,
2191 struct i915_active *active)
2192{
2193 struct i915_request *rq;
2194 int err;
2195
2196 intel_engine_pm_get(ce->engine);
2197 rq = i915_request_create(ce);
2198 intel_engine_pm_put(ce->engine);
2199 if (IS_ERR(rq))
2200 return PTR_ERR(rq);
2201
2202 if (!IS_ERR_OR_NULL(active)) {
2203 err = i915_active_add_request(active, rq);
2204 if (err)
2205 goto err_add_request;
2206 }
2207
2208 err = gen8_load_flex(rq, ce, flex, count);
2209 if (err)
2210 goto err_add_request;
2211
2212err_add_request:
2213 i915_request_add(rq);
2214 return err;
2215}
2216
2217static int gen8_configure_context(struct i915_gem_context *ctx,
2218 struct flex *flex, unsigned int count)
2219{
2220 struct i915_gem_engines_iter it;
2221 struct intel_context *ce;
2222 int err = 0;
2223
2224 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
2225 GEM_BUG_ON(ce == ce->engine->kernel_context);
2226
2227 if (ce->engine->class != RENDER_CLASS)
2228 continue;
2229
2230
2231 if (!intel_context_pin_if_active(ce))
2232 continue;
2233
2234 flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu);
2235 err = gen8_modify_context(ce, flex, count);
2236
2237 intel_context_unpin(ce);
2238 if (err)
2239 break;
2240 }
2241 i915_gem_context_unlock_engines(ctx);
2242
2243 return err;
2244}
2245
2246static int gen12_configure_oar_context(struct i915_perf_stream *stream,
2247 struct i915_active *active)
2248{
2249 int err;
2250 struct intel_context *ce = stream->pinned_ctx;
2251 u32 format = stream->oa_buffer.format;
2252 struct flex regs_context[] = {
2253 {
2254 GEN8_OACTXCONTROL,
2255 stream->perf->ctx_oactxctrl_offset + 1,
2256 active ? GEN8_OA_COUNTER_RESUME : 0,
2257 },
2258 };
2259
2260
2261
2262#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
2263 struct flex regs_lri[] = {
2264 {
2265 GEN12_OAR_OACONTROL,
2266 GEN12_OAR_OACONTROL_OFFSET + 1,
2267 (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
2268 (active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
2269 },
2270 {
2271 RING_CONTEXT_CONTROL(ce->engine->mmio_base),
2272 CTX_CONTEXT_CONTROL,
2273 _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
2274 active ?
2275 GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
2276 0)
2277 },
2278 };
2279
2280
2281 err = intel_context_lock_pinned(ce);
2282 if (err)
2283 return err;
2284
2285 err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
2286 intel_context_unlock_pinned(ce);
2287 if (err)
2288 return err;
2289
2290
2291 return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
2292}
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319static int
2320oa_configure_all_contexts(struct i915_perf_stream *stream,
2321 struct flex *regs,
2322 size_t num_regs,
2323 struct i915_active *active)
2324{
2325 struct drm_i915_private *i915 = stream->perf->i915;
2326 struct intel_engine_cs *engine;
2327 struct i915_gem_context *ctx, *cn;
2328 int err;
2329
2330 lockdep_assert_held(&stream->perf->lock);
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348 spin_lock(&i915->gem.contexts.lock);
2349 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) {
2350 if (!kref_get_unless_zero(&ctx->ref))
2351 continue;
2352
2353 spin_unlock(&i915->gem.contexts.lock);
2354
2355 err = gen8_configure_context(ctx, regs, num_regs);
2356 if (err) {
2357 i915_gem_context_put(ctx);
2358 return err;
2359 }
2360
2361 spin_lock(&i915->gem.contexts.lock);
2362 list_safe_reset_next(ctx, cn, link);
2363 i915_gem_context_put(ctx);
2364 }
2365 spin_unlock(&i915->gem.contexts.lock);
2366
2367
2368
2369
2370
2371
2372 for_each_uabi_engine(engine, i915) {
2373 struct intel_context *ce = engine->kernel_context;
2374
2375 if (engine->class != RENDER_CLASS)
2376 continue;
2377
2378 regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu);
2379
2380 err = gen8_modify_self(ce, regs, num_regs, active);
2381 if (err)
2382 return err;
2383 }
2384
2385 return 0;
2386}
2387
2388static int
2389gen12_configure_all_contexts(struct i915_perf_stream *stream,
2390 const struct i915_oa_config *oa_config,
2391 struct i915_active *active)
2392{
2393 struct flex regs[] = {
2394 {
2395 GEN8_R_PWR_CLK_STATE,
2396 CTX_R_PWR_CLK_STATE,
2397 },
2398 };
2399
2400 return oa_configure_all_contexts(stream,
2401 regs, ARRAY_SIZE(regs),
2402 active);
2403}
2404
2405static int
2406lrc_configure_all_contexts(struct i915_perf_stream *stream,
2407 const struct i915_oa_config *oa_config,
2408 struct i915_active *active)
2409{
2410
2411 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2412#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2413 struct flex regs[] = {
2414 {
2415 GEN8_R_PWR_CLK_STATE,
2416 CTX_R_PWR_CLK_STATE,
2417 },
2418 {
2419 GEN8_OACTXCONTROL,
2420 stream->perf->ctx_oactxctrl_offset + 1,
2421 },
2422 { EU_PERF_CNTL0, ctx_flexeuN(0) },
2423 { EU_PERF_CNTL1, ctx_flexeuN(1) },
2424 { EU_PERF_CNTL2, ctx_flexeuN(2) },
2425 { EU_PERF_CNTL3, ctx_flexeuN(3) },
2426 { EU_PERF_CNTL4, ctx_flexeuN(4) },
2427 { EU_PERF_CNTL5, ctx_flexeuN(5) },
2428 { EU_PERF_CNTL6, ctx_flexeuN(6) },
2429 };
2430#undef ctx_flexeuN
2431 int i;
2432
2433 regs[1].value =
2434 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
2435 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
2436 GEN8_OA_COUNTER_RESUME;
2437
2438 for (i = 2; i < ARRAY_SIZE(regs); i++)
2439 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2440
2441 return oa_configure_all_contexts(stream,
2442 regs, ARRAY_SIZE(regs),
2443 active);
2444}
2445
2446static int
2447gen8_enable_metric_set(struct i915_perf_stream *stream,
2448 struct i915_active *active)
2449{
2450 struct intel_uncore *uncore = stream->uncore;
2451 struct i915_oa_config *oa_config = stream->oa_config;
2452 int ret;
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477 if (IS_GEN_RANGE(stream->perf->i915, 9, 11)) {
2478 intel_uncore_write(uncore, GEN8_OA_DEBUG,
2479 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2480 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
2481 }
2482
2483
2484
2485
2486
2487
2488 ret = lrc_configure_all_contexts(stream, oa_config, active);
2489 if (ret)
2490 return ret;
2491
2492 return emit_oa_config(stream,
2493 stream->oa_config, oa_context(stream),
2494 active);
2495}
2496
2497static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
2498{
2499 return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,
2500 (stream->sample_flags & SAMPLE_OA_REPORT) ?
2501 0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
2502}
2503
2504static int
2505gen12_enable_metric_set(struct i915_perf_stream *stream,
2506 struct i915_active *active)
2507{
2508 struct intel_uncore *uncore = stream->uncore;
2509 struct i915_oa_config *oa_config = stream->oa_config;
2510 bool periodic = stream->periodic;
2511 u32 period_exponent = stream->period_exponent;
2512 int ret;
2513
2514 intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
2515
2516 _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
2517 GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
2518
2519
2520
2521
2522 oag_report_ctx_switches(stream));
2523
2524 intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
2525 (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
2526 GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
2527 (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
2528 : 0);
2529
2530
2531
2532
2533
2534
2535 ret = gen12_configure_all_contexts(stream, oa_config, active);
2536 if (ret)
2537 return ret;
2538
2539
2540
2541
2542
2543
2544 if (stream->ctx) {
2545 ret = gen12_configure_oar_context(stream, active);
2546 if (ret)
2547 return ret;
2548 }
2549
2550 return emit_oa_config(stream,
2551 stream->oa_config, oa_context(stream),
2552 active);
2553}
2554
2555static void gen8_disable_metric_set(struct i915_perf_stream *stream)
2556{
2557 struct intel_uncore *uncore = stream->uncore;
2558
2559
2560 lrc_configure_all_contexts(stream, NULL, NULL);
2561
2562 intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
2563}
2564
2565static void gen10_disable_metric_set(struct i915_perf_stream *stream)
2566{
2567 struct intel_uncore *uncore = stream->uncore;
2568
2569
2570 lrc_configure_all_contexts(stream, NULL, NULL);
2571
2572
2573 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
2574}
2575
2576static void gen12_disable_metric_set(struct i915_perf_stream *stream)
2577{
2578 struct intel_uncore *uncore = stream->uncore;
2579
2580
2581 gen12_configure_all_contexts(stream, NULL, NULL);
2582
2583
2584 if (stream->ctx)
2585 gen12_configure_oar_context(stream, NULL);
2586
2587
2588 intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
2589}
2590
2591static void gen7_oa_enable(struct i915_perf_stream *stream)
2592{
2593 struct intel_uncore *uncore = stream->uncore;
2594 struct i915_gem_context *ctx = stream->ctx;
2595 u32 ctx_id = stream->specific_ctx_id;
2596 bool periodic = stream->periodic;
2597 u32 period_exponent = stream->period_exponent;
2598 u32 report_format = stream->oa_buffer.format;
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609 gen7_init_oa_buffer(stream);
2610
2611 intel_uncore_write(uncore, GEN7_OACONTROL,
2612 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
2613 (period_exponent <<
2614 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
2615 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
2616 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
2617 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
2618 GEN7_OACONTROL_ENABLE);
2619}
2620
2621static void gen8_oa_enable(struct i915_perf_stream *stream)
2622{
2623 struct intel_uncore *uncore = stream->uncore;
2624 u32 report_format = stream->oa_buffer.format;
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635 gen8_init_oa_buffer(stream);
2636
2637
2638
2639
2640
2641
2642 intel_uncore_write(uncore, GEN8_OACONTROL,
2643 (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
2644 GEN8_OA_COUNTER_ENABLE);
2645}
2646
2647static void gen12_oa_enable(struct i915_perf_stream *stream)
2648{
2649 struct intel_uncore *uncore = stream->uncore;
2650 u32 report_format = stream->oa_buffer.format;
2651
2652
2653
2654
2655
2656 if (!(stream->sample_flags & SAMPLE_OA_REPORT))
2657 return;
2658
2659 gen12_init_oa_buffer(stream);
2660
2661 intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
2662 (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
2663 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
2664}
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675static void i915_oa_stream_enable(struct i915_perf_stream *stream)
2676{
2677 stream->pollin = false;
2678
2679 stream->perf->ops.oa_enable(stream);
2680
2681 if (stream->periodic)
2682 hrtimer_start(&stream->poll_check_timer,
2683 ns_to_ktime(stream->poll_oa_period),
2684 HRTIMER_MODE_REL_PINNED);
2685}
2686
2687static void gen7_oa_disable(struct i915_perf_stream *stream)
2688{
2689 struct intel_uncore *uncore = stream->uncore;
2690
2691 intel_uncore_write(uncore, GEN7_OACONTROL, 0);
2692 if (intel_wait_for_register(uncore,
2693 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
2694 50))
2695 drm_err(&stream->perf->i915->drm,
2696 "wait for OA to be disabled timed out\n");
2697}
2698
2699static void gen8_oa_disable(struct i915_perf_stream *stream)
2700{
2701 struct intel_uncore *uncore = stream->uncore;
2702
2703 intel_uncore_write(uncore, GEN8_OACONTROL, 0);
2704 if (intel_wait_for_register(uncore,
2705 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
2706 50))
2707 drm_err(&stream->perf->i915->drm,
2708 "wait for OA to be disabled timed out\n");
2709}
2710
2711static void gen12_oa_disable(struct i915_perf_stream *stream)
2712{
2713 struct intel_uncore *uncore = stream->uncore;
2714
2715 intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
2716 if (intel_wait_for_register(uncore,
2717 GEN12_OAG_OACONTROL,
2718 GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
2719 50))
2720 drm_err(&stream->perf->i915->drm,
2721 "wait for OA to be disabled timed out\n");
2722
2723 intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR, 1);
2724 if (intel_wait_for_register(uncore,
2725 GEN12_OA_TLB_INV_CR,
2726 1, 0,
2727 50))
2728 drm_err(&stream->perf->i915->drm,
2729 "wait for OA tlb invalidate timed out\n");
2730}
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740static void i915_oa_stream_disable(struct i915_perf_stream *stream)
2741{
2742 stream->perf->ops.oa_disable(stream);
2743
2744 if (stream->periodic)
2745 hrtimer_cancel(&stream->poll_check_timer);
2746}
2747
2748static const struct i915_perf_stream_ops i915_oa_stream_ops = {
2749 .destroy = i915_oa_stream_destroy,
2750 .enable = i915_oa_stream_enable,
2751 .disable = i915_oa_stream_disable,
2752 .wait_unlocked = i915_oa_wait_unlocked,
2753 .poll_wait = i915_oa_poll_wait,
2754 .read = i915_oa_read,
2755};
2756
2757static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
2758{
2759 struct i915_active *active;
2760 int err;
2761
2762 active = i915_active_create();
2763 if (!active)
2764 return -ENOMEM;
2765
2766 err = stream->perf->ops.enable_metric_set(stream, active);
2767 if (err == 0)
2768 __i915_active_wait(active, TASK_UNINTERRUPTIBLE);
2769
2770 i915_active_put(active);
2771 return err;
2772}
2773
2774static void
2775get_default_sseu_config(struct intel_sseu *out_sseu,
2776 struct intel_engine_cs *engine)
2777{
2778 const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu;
2779
2780 *out_sseu = intel_sseu_from_device_info(devinfo_sseu);
2781
2782 if (IS_GEN(engine->i915, 11)) {
2783
2784
2785
2786
2787
2788 out_sseu->subslice_mask =
2789 ~(~0 << (hweight8(out_sseu->subslice_mask) / 2));
2790 out_sseu->slice_mask = 0x1;
2791 }
2792}
2793
2794static int
2795get_sseu_config(struct intel_sseu *out_sseu,
2796 struct intel_engine_cs *engine,
2797 const struct drm_i915_gem_context_param_sseu *drm_sseu)
2798{
2799 if (drm_sseu->engine.engine_class != engine->uabi_class ||
2800 drm_sseu->engine.engine_instance != engine->uabi_instance)
2801 return -EINVAL;
2802
2803 return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
2804}
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824static int i915_oa_stream_init(struct i915_perf_stream *stream,
2825 struct drm_i915_perf_open_param *param,
2826 struct perf_open_properties *props)
2827{
2828 struct drm_i915_private *i915 = stream->perf->i915;
2829 struct i915_perf *perf = stream->perf;
2830 int format_size;
2831 int ret;
2832
2833 if (!props->engine) {
2834 DRM_DEBUG("OA engine not specified\n");
2835 return -EINVAL;
2836 }
2837
2838
2839
2840
2841
2842
2843 if (!perf->metrics_kobj) {
2844 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
2845 return -EINVAL;
2846 }
2847
2848 if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
2849 (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
2850 DRM_DEBUG("Only OA report sampling supported\n");
2851 return -EINVAL;
2852 }
2853
2854 if (!perf->ops.enable_metric_set) {
2855 DRM_DEBUG("OA unit not supported\n");
2856 return -ENODEV;
2857 }
2858
2859
2860
2861
2862
2863
2864 if (perf->exclusive_stream) {
2865 DRM_DEBUG("OA unit already in use\n");
2866 return -EBUSY;
2867 }
2868
2869 if (!props->oa_format) {
2870 DRM_DEBUG("OA report format not specified\n");
2871 return -EINVAL;
2872 }
2873
2874 stream->engine = props->engine;
2875 stream->uncore = stream->engine->gt->uncore;
2876
2877 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2878
2879 format_size = perf->oa_formats[props->oa_format].size;
2880
2881 stream->sample_flags = props->sample_flags;
2882 stream->sample_size += format_size;
2883
2884 stream->oa_buffer.format_size = format_size;
2885 if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
2886 return -EINVAL;
2887
2888 stream->hold_preemption = props->hold_preemption;
2889
2890 stream->oa_buffer.format =
2891 perf->oa_formats[props->oa_format].format;
2892
2893 stream->periodic = props->oa_periodic;
2894 if (stream->periodic)
2895 stream->period_exponent = props->oa_period_exponent;
2896
2897 if (stream->ctx) {
2898 ret = oa_get_render_ctx_id(stream);
2899 if (ret) {
2900 DRM_DEBUG("Invalid context id to filter with\n");
2901 return ret;
2902 }
2903 }
2904
2905 ret = alloc_noa_wait(stream);
2906 if (ret) {
2907 DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
2908 goto err_noa_wait_alloc;
2909 }
2910
2911 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
2912 if (!stream->oa_config) {
2913 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
2914 ret = -EINVAL;
2915 goto err_config;
2916 }
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930 intel_engine_pm_get(stream->engine);
2931 intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
2932
2933 ret = alloc_oa_buffer(stream);
2934 if (ret)
2935 goto err_oa_buf_alloc;
2936
2937 stream->ops = &i915_oa_stream_ops;
2938
2939 perf->sseu = props->sseu;
2940 WRITE_ONCE(perf->exclusive_stream, stream);
2941
2942 ret = i915_perf_stream_enable_sync(stream);
2943 if (ret) {
2944 DRM_DEBUG("Unable to enable metric set\n");
2945 goto err_enable;
2946 }
2947
2948 DRM_DEBUG("opening stream oa config uuid=%s\n",
2949 stream->oa_config->uuid);
2950
2951 hrtimer_init(&stream->poll_check_timer,
2952 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2953 stream->poll_check_timer.function = oa_poll_check_timer_cb;
2954 init_waitqueue_head(&stream->poll_wq);
2955 spin_lock_init(&stream->oa_buffer.ptr_lock);
2956
2957 return 0;
2958
2959err_enable:
2960 WRITE_ONCE(perf->exclusive_stream, NULL);
2961 perf->ops.disable_metric_set(stream);
2962
2963 free_oa_buffer(stream);
2964
2965err_oa_buf_alloc:
2966 free_oa_configs(stream);
2967
2968 intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
2969 intel_engine_pm_put(stream->engine);
2970
2971err_config:
2972 free_noa_wait(stream);
2973
2974err_noa_wait_alloc:
2975 if (stream->ctx)
2976 oa_put_render_ctx_id(stream);
2977
2978 return ret;
2979}
2980
2981void i915_oa_init_reg_state(const struct intel_context *ce,
2982 const struct intel_engine_cs *engine)
2983{
2984 struct i915_perf_stream *stream;
2985
2986 if (engine->class != RENDER_CLASS)
2987 return;
2988
2989
2990 stream = READ_ONCE(engine->i915->perf.exclusive_stream);
2991 if (stream && INTEL_GEN(stream->perf->i915) < 12)
2992 gen8_update_reg_state_unlocked(ce, stream);
2993}
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013static ssize_t i915_perf_read(struct file *file,
3014 char __user *buf,
3015 size_t count,
3016 loff_t *ppos)
3017{
3018 struct i915_perf_stream *stream = file->private_data;
3019 struct i915_perf *perf = stream->perf;
3020 size_t offset = 0;
3021 int ret;
3022
3023
3024
3025
3026
3027 if (!stream->enabled)
3028 return -EIO;
3029
3030 if (!(file->f_flags & O_NONBLOCK)) {
3031
3032
3033
3034
3035
3036
3037
3038 do {
3039 ret = stream->ops->wait_unlocked(stream);
3040 if (ret)
3041 return ret;
3042
3043 mutex_lock(&perf->lock);
3044 ret = stream->ops->read(stream, buf, count, &offset);
3045 mutex_unlock(&perf->lock);
3046 } while (!offset && !ret);
3047 } else {
3048 mutex_lock(&perf->lock);
3049 ret = stream->ops->read(stream, buf, count, &offset);
3050 mutex_unlock(&perf->lock);
3051 }
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064 if (ret != -ENOSPC)
3065 stream->pollin = false;
3066
3067
3068 return offset ?: (ret ?: -EAGAIN);
3069}
3070
3071static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
3072{
3073 struct i915_perf_stream *stream =
3074 container_of(hrtimer, typeof(*stream), poll_check_timer);
3075
3076 if (oa_buffer_check_unlocked(stream)) {
3077 stream->pollin = true;
3078 wake_up(&stream->poll_wq);
3079 }
3080
3081 hrtimer_forward_now(hrtimer,
3082 ns_to_ktime(stream->poll_oa_period));
3083
3084 return HRTIMER_RESTART;
3085}
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
3103 struct file *file,
3104 poll_table *wait)
3105{
3106 __poll_t events = 0;
3107
3108 stream->ops->poll_wait(stream, file, wait);
3109
3110
3111
3112
3113
3114
3115
3116 if (stream->pollin)
3117 events |= EPOLLIN;
3118
3119 return events;
3120}
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
3136{
3137 struct i915_perf_stream *stream = file->private_data;
3138 struct i915_perf *perf = stream->perf;
3139 __poll_t ret;
3140
3141 mutex_lock(&perf->lock);
3142 ret = i915_perf_poll_locked(stream, file, wait);
3143 mutex_unlock(&perf->lock);
3144
3145 return ret;
3146}
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158static void i915_perf_enable_locked(struct i915_perf_stream *stream)
3159{
3160 if (stream->enabled)
3161 return;
3162
3163
3164 stream->enabled = true;
3165
3166 if (stream->ops->enable)
3167 stream->ops->enable(stream);
3168
3169 if (stream->hold_preemption)
3170 intel_context_set_nopreempt(stream->pinned_ctx);
3171}
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187static void i915_perf_disable_locked(struct i915_perf_stream *stream)
3188{
3189 if (!stream->enabled)
3190 return;
3191
3192
3193 stream->enabled = false;
3194
3195 if (stream->hold_preemption)
3196 intel_context_clear_nopreempt(stream->pinned_ctx);
3197
3198 if (stream->ops->disable)
3199 stream->ops->disable(stream);
3200}
3201
3202static long i915_perf_config_locked(struct i915_perf_stream *stream,
3203 unsigned long metrics_set)
3204{
3205 struct i915_oa_config *config;
3206 long ret = stream->oa_config->id;
3207
3208 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3209 if (!config)
3210 return -EINVAL;
3211
3212 if (config != stream->oa_config) {
3213 int err;
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224 err = emit_oa_config(stream, config, oa_context(stream), NULL);
3225 if (!err)
3226 config = xchg(&stream->oa_config, config);
3227 else
3228 ret = err;
3229 }
3230
3231 i915_oa_config_put(config);
3232
3233 return ret;
3234}
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
3249 unsigned int cmd,
3250 unsigned long arg)
3251{
3252 switch (cmd) {
3253 case I915_PERF_IOCTL_ENABLE:
3254 i915_perf_enable_locked(stream);
3255 return 0;
3256 case I915_PERF_IOCTL_DISABLE:
3257 i915_perf_disable_locked(stream);
3258 return 0;
3259 case I915_PERF_IOCTL_CONFIG:
3260 return i915_perf_config_locked(stream, arg);
3261 }
3262
3263 return -EINVAL;
3264}
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277static long i915_perf_ioctl(struct file *file,
3278 unsigned int cmd,
3279 unsigned long arg)
3280{
3281 struct i915_perf_stream *stream = file->private_data;
3282 struct i915_perf *perf = stream->perf;
3283 long ret;
3284
3285 mutex_lock(&perf->lock);
3286 ret = i915_perf_ioctl_locked(stream, cmd, arg);
3287 mutex_unlock(&perf->lock);
3288
3289 return ret;
3290}
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
3303{
3304 if (stream->enabled)
3305 i915_perf_disable_locked(stream);
3306
3307 if (stream->ops->destroy)
3308 stream->ops->destroy(stream);
3309
3310 if (stream->ctx)
3311 i915_gem_context_put(stream->ctx);
3312
3313 kfree(stream);
3314}
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327static int i915_perf_release(struct inode *inode, struct file *file)
3328{
3329 struct i915_perf_stream *stream = file->private_data;
3330 struct i915_perf *perf = stream->perf;
3331
3332 mutex_lock(&perf->lock);
3333 i915_perf_destroy_locked(stream);
3334 mutex_unlock(&perf->lock);
3335
3336
3337 drm_dev_put(&perf->i915->drm);
3338
3339 return 0;
3340}
3341
3342
3343static const struct file_operations fops = {
3344 .owner = THIS_MODULE,
3345 .llseek = no_llseek,
3346 .release = i915_perf_release,
3347 .poll = i915_perf_poll,
3348 .read = i915_perf_read,
3349 .unlocked_ioctl = i915_perf_ioctl,
3350
3351
3352
3353 .compat_ioctl = i915_perf_ioctl,
3354};
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381static int
3382i915_perf_open_ioctl_locked(struct i915_perf *perf,
3383 struct drm_i915_perf_open_param *param,
3384 struct perf_open_properties *props,
3385 struct drm_file *file)
3386{
3387 struct i915_gem_context *specific_ctx = NULL;
3388 struct i915_perf_stream *stream = NULL;
3389 unsigned long f_flags = 0;
3390 bool privileged_op = true;
3391 int stream_fd;
3392 int ret;
3393
3394 if (props->single_context) {
3395 u32 ctx_handle = props->ctx_handle;
3396 struct drm_i915_file_private *file_priv = file->driver_priv;
3397
3398 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
3399 if (!specific_ctx) {
3400 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
3401 ctx_handle);
3402 ret = -ENOENT;
3403 goto err;
3404 }
3405 }
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426 if (IS_HASWELL(perf->i915) && specific_ctx)
3427 privileged_op = false;
3428 else if (IS_GEN(perf->i915, 12) && specific_ctx &&
3429 (props->sample_flags & SAMPLE_OA_REPORT) == 0)
3430 privileged_op = false;
3431
3432 if (props->hold_preemption) {
3433 if (!props->single_context) {
3434 DRM_DEBUG("preemption disable with no context\n");
3435 ret = -EINVAL;
3436 goto err;
3437 }
3438 privileged_op = true;
3439 }
3440
3441
3442
3443
3444 if (props->has_sseu)
3445 privileged_op = true;
3446 else
3447 get_default_sseu_config(&props->sseu, props->engine);
3448
3449
3450
3451
3452
3453
3454 if (privileged_op &&
3455 i915_perf_stream_paranoid && !perfmon_capable()) {
3456 DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
3457 ret = -EACCES;
3458 goto err_ctx;
3459 }
3460
3461 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
3462 if (!stream) {
3463 ret = -ENOMEM;
3464 goto err_ctx;
3465 }
3466
3467 stream->perf = perf;
3468 stream->ctx = specific_ctx;
3469 stream->poll_oa_period = props->poll_oa_period;
3470
3471 ret = i915_oa_stream_init(stream, param, props);
3472 if (ret)
3473 goto err_alloc;
3474
3475
3476
3477
3478
3479 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
3480 ret = -ENODEV;
3481 goto err_flags;
3482 }
3483
3484 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
3485 f_flags |= O_CLOEXEC;
3486 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
3487 f_flags |= O_NONBLOCK;
3488
3489 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
3490 if (stream_fd < 0) {
3491 ret = stream_fd;
3492 goto err_flags;
3493 }
3494
3495 if (!(param->flags & I915_PERF_FLAG_DISABLED))
3496 i915_perf_enable_locked(stream);
3497
3498
3499
3500
3501 drm_dev_get(&perf->i915->drm);
3502
3503 return stream_fd;
3504
3505err_flags:
3506 if (stream->ops->destroy)
3507 stream->ops->destroy(stream);
3508err_alloc:
3509 kfree(stream);
3510err_ctx:
3511 if (specific_ctx)
3512 i915_gem_context_put(specific_ctx);
3513err:
3514 return ret;
3515}
3516
3517static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
3518{
3519 return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent);
3520}
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537static int read_properties_unlocked(struct i915_perf *perf,
3538 u64 __user *uprops,
3539 u32 n_props,
3540 struct perf_open_properties *props)
3541{
3542 u64 __user *uprop = uprops;
3543 u32 i;
3544 int ret;
3545
3546 memset(props, 0, sizeof(struct perf_open_properties));
3547 props->poll_oa_period = DEFAULT_POLL_PERIOD_NS;
3548
3549 if (!n_props) {
3550 DRM_DEBUG("No i915 perf properties given\n");
3551 return -EINVAL;
3552 }
3553
3554
3555 props->engine = intel_engine_lookup_user(perf->i915,
3556 I915_ENGINE_CLASS_RENDER,
3557 0);
3558 if (!props->engine) {
3559 DRM_DEBUG("No RENDER-capable engines\n");
3560 return -EINVAL;
3561 }
3562
3563
3564
3565
3566
3567
3568
3569 if (n_props >= DRM_I915_PERF_PROP_MAX) {
3570 DRM_DEBUG("More i915 perf properties specified than exist\n");
3571 return -EINVAL;
3572 }
3573
3574 for (i = 0; i < n_props; i++) {
3575 u64 oa_period, oa_freq_hz;
3576 u64 id, value;
3577
3578 ret = get_user(id, uprop);
3579 if (ret)
3580 return ret;
3581
3582 ret = get_user(value, uprop + 1);
3583 if (ret)
3584 return ret;
3585
3586 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
3587 DRM_DEBUG("Unknown i915 perf property ID\n");
3588 return -EINVAL;
3589 }
3590
3591 switch ((enum drm_i915_perf_property_id)id) {
3592 case DRM_I915_PERF_PROP_CTX_HANDLE:
3593 props->single_context = 1;
3594 props->ctx_handle = value;
3595 break;
3596 case DRM_I915_PERF_PROP_SAMPLE_OA:
3597 if (value)
3598 props->sample_flags |= SAMPLE_OA_REPORT;
3599 break;
3600 case DRM_I915_PERF_PROP_OA_METRICS_SET:
3601 if (value == 0) {
3602 DRM_DEBUG("Unknown OA metric set ID\n");
3603 return -EINVAL;
3604 }
3605 props->metrics_set = value;
3606 break;
3607 case DRM_I915_PERF_PROP_OA_FORMAT:
3608 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
3609 DRM_DEBUG("Out-of-range OA report format %llu\n",
3610 value);
3611 return -EINVAL;
3612 }
3613 if (!perf->oa_formats[value].size) {
3614 DRM_DEBUG("Unsupported OA report format %llu\n",
3615 value);
3616 return -EINVAL;
3617 }
3618 props->oa_format = value;
3619 break;
3620 case DRM_I915_PERF_PROP_OA_EXPONENT:
3621 if (value > OA_EXPONENT_MAX) {
3622 DRM_DEBUG("OA timer exponent too high (> %u)\n",
3623 OA_EXPONENT_MAX);
3624 return -EINVAL;
3625 }
3626
3627
3628
3629
3630
3631
3632
3633 BUILD_BUG_ON(sizeof(oa_period) != 8);
3634 oa_period = oa_exponent_to_ns(perf, value);
3635
3636
3637
3638
3639
3640
3641
3642 if (oa_period <= NSEC_PER_SEC) {
3643 u64 tmp = NSEC_PER_SEC;
3644 do_div(tmp, oa_period);
3645 oa_freq_hz = tmp;
3646 } else
3647 oa_freq_hz = 0;
3648
3649 if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) {
3650 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",
3651 i915_oa_max_sample_rate);
3652 return -EACCES;
3653 }
3654
3655 props->oa_periodic = true;
3656 props->oa_period_exponent = value;
3657 break;
3658 case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
3659 props->hold_preemption = !!value;
3660 break;
3661 case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
3662 struct drm_i915_gem_context_param_sseu user_sseu;
3663
3664 if (copy_from_user(&user_sseu,
3665 u64_to_user_ptr(value),
3666 sizeof(user_sseu))) {
3667 DRM_DEBUG("Unable to copy global sseu parameter\n");
3668 return -EFAULT;
3669 }
3670
3671 ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
3672 if (ret) {
3673 DRM_DEBUG("Invalid SSEU configuration\n");
3674 return ret;
3675 }
3676 props->has_sseu = true;
3677 break;
3678 }
3679 case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
3680 if (value < 100000 ) {
3681 DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",
3682 value);
3683 return -EINVAL;
3684 }
3685 props->poll_oa_period = value;
3686 break;
3687 case DRM_I915_PERF_PROP_MAX:
3688 MISSING_CASE(id);
3689 return -EINVAL;
3690 }
3691
3692 uprop += 2;
3693 }
3694
3695 return 0;
3696}
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722int i915_perf_open_ioctl(struct drm_device *dev, void *data,
3723 struct drm_file *file)
3724{
3725 struct i915_perf *perf = &to_i915(dev)->perf;
3726 struct drm_i915_perf_open_param *param = data;
3727 struct perf_open_properties props;
3728 u32 known_open_flags;
3729 int ret;
3730
3731 if (!perf->i915) {
3732 DRM_DEBUG("i915 perf interface not available for this system\n");
3733 return -ENOTSUPP;
3734 }
3735
3736 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
3737 I915_PERF_FLAG_FD_NONBLOCK |
3738 I915_PERF_FLAG_DISABLED;
3739 if (param->flags & ~known_open_flags) {
3740 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
3741 return -EINVAL;
3742 }
3743
3744 ret = read_properties_unlocked(perf,
3745 u64_to_user_ptr(param->properties_ptr),
3746 param->num_properties,
3747 &props);
3748 if (ret)
3749 return ret;
3750
3751 mutex_lock(&perf->lock);
3752 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
3753 mutex_unlock(&perf->lock);
3754
3755 return ret;
3756}
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766void i915_perf_register(struct drm_i915_private *i915)
3767{
3768 struct i915_perf *perf = &i915->perf;
3769
3770 if (!perf->i915)
3771 return;
3772
3773
3774
3775
3776
3777 mutex_lock(&perf->lock);
3778
3779 perf->metrics_kobj =
3780 kobject_create_and_add("metrics",
3781 &i915->drm.primary->kdev->kobj);
3782
3783 mutex_unlock(&perf->lock);
3784}
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795void i915_perf_unregister(struct drm_i915_private *i915)
3796{
3797 struct i915_perf *perf = &i915->perf;
3798
3799 if (!perf->metrics_kobj)
3800 return;
3801
3802 kobject_put(perf->metrics_kobj);
3803 perf->metrics_kobj = NULL;
3804}
3805
3806static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
3807{
3808 static const i915_reg_t flex_eu_regs[] = {
3809 EU_PERF_CNTL0,
3810 EU_PERF_CNTL1,
3811 EU_PERF_CNTL2,
3812 EU_PERF_CNTL3,
3813 EU_PERF_CNTL4,
3814 EU_PERF_CNTL5,
3815 EU_PERF_CNTL6,
3816 };
3817 int i;
3818
3819 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) {
3820 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
3821 return true;
3822 }
3823 return false;
3824}
3825
3826#define ADDR_IN_RANGE(addr, start, end) \
3827 ((addr) >= (start) && \
3828 (addr) <= (end))
3829
3830#define REG_IN_RANGE(addr, start, end) \
3831 ((addr) >= i915_mmio_reg_offset(start) && \
3832 (addr) <= i915_mmio_reg_offset(end))
3833
3834#define REG_EQUAL(addr, mmio) \
3835 ((addr) == i915_mmio_reg_offset(mmio))
3836
3837static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3838{
3839 return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) ||
3840 REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) ||
3841 REG_IN_RANGE(addr, OACEC0_0, OACEC7_1);
3842}
3843
3844static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3845{
3846 return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) ||
3847 REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) ||
3848 REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) ||
3849 REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI);
3850}
3851
3852static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3853{
3854 return gen7_is_valid_mux_addr(perf, addr) ||
3855 REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
3856 REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8));
3857}
3858
3859static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3860{
3861 return gen8_is_valid_mux_addr(perf, addr) ||
3862 REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
3863 REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI);
3864}
3865
3866static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3867{
3868 return gen7_is_valid_mux_addr(perf, addr) ||
3869 ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) ||
3870 REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) ||
3871 REG_EQUAL(addr, HSW_MBVID2_MISR0);
3872}
3873
3874static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3875{
3876 return gen7_is_valid_mux_addr(perf, addr) ||
3877 ADDR_IN_RANGE(addr, 0x182300, 0x1823A4);
3878}
3879
3880static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3881{
3882 return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) ||
3883 REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) ||
3884 REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) ||
3885 REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) ||
3886 REG_EQUAL(addr, GEN12_OAA_DBG_REG) ||
3887 REG_EQUAL(addr, GEN12_OAG_OA_PESS) ||
3888 REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF);
3889}
3890
3891static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3892{
3893 return REG_EQUAL(addr, NOA_WRITE) ||
3894 REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) ||
3895 REG_EQUAL(addr, GDT_CHICKEN_BITS) ||
3896 REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) ||
3897 REG_EQUAL(addr, RPM_CONFIG0) ||
3898 REG_EQUAL(addr, RPM_CONFIG1) ||
3899 REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8));
3900}
3901
3902static u32 mask_reg_value(u32 reg, u32 val)
3903{
3904
3905
3906
3907
3908 if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2))
3909 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE);
3910
3911
3912
3913
3914
3915 if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT))
3916 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE);
3917
3918 return val;
3919}
3920
3921static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
3922 bool (*is_valid)(struct i915_perf *perf, u32 addr),
3923 u32 __user *regs,
3924 u32 n_regs)
3925{
3926 struct i915_oa_reg *oa_regs;
3927 int err;
3928 u32 i;
3929
3930 if (!n_regs)
3931 return NULL;
3932
3933
3934 GEM_BUG_ON(!is_valid);
3935 if (!is_valid)
3936 return ERR_PTR(-EINVAL);
3937
3938 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL);
3939 if (!oa_regs)
3940 return ERR_PTR(-ENOMEM);
3941
3942 for (i = 0; i < n_regs; i++) {
3943 u32 addr, value;
3944
3945 err = get_user(addr, regs);
3946 if (err)
3947 goto addr_err;
3948
3949 if (!is_valid(perf, addr)) {
3950 DRM_DEBUG("Invalid oa_reg address: %X\n", addr);
3951 err = -EINVAL;
3952 goto addr_err;
3953 }
3954
3955 err = get_user(value, regs + 1);
3956 if (err)
3957 goto addr_err;
3958
3959 oa_regs[i].addr = _MMIO(addr);
3960 oa_regs[i].value = mask_reg_value(addr, value);
3961
3962 regs += 2;
3963 }
3964
3965 return oa_regs;
3966
3967addr_err:
3968 kfree(oa_regs);
3969 return ERR_PTR(err);
3970}
3971
3972static ssize_t show_dynamic_id(struct device *dev,
3973 struct device_attribute *attr,
3974 char *buf)
3975{
3976 struct i915_oa_config *oa_config =
3977 container_of(attr, typeof(*oa_config), sysfs_metric_id);
3978
3979 return sprintf(buf, "%d\n", oa_config->id);
3980}
3981
3982static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
3983 struct i915_oa_config *oa_config)
3984{
3985 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
3986 oa_config->sysfs_metric_id.attr.name = "id";
3987 oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
3988 oa_config->sysfs_metric_id.show = show_dynamic_id;
3989 oa_config->sysfs_metric_id.store = NULL;
3990
3991 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
3992 oa_config->attrs[1] = NULL;
3993
3994 oa_config->sysfs_metric.name = oa_config->uuid;
3995 oa_config->sysfs_metric.attrs = oa_config->attrs;
3996
3997 return sysfs_create_group(perf->metrics_kobj,
3998 &oa_config->sysfs_metric);
3999}
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
4015 struct drm_file *file)
4016{
4017 struct i915_perf *perf = &to_i915(dev)->perf;
4018 struct drm_i915_perf_oa_config *args = data;
4019 struct i915_oa_config *oa_config, *tmp;
4020 struct i915_oa_reg *regs;
4021 int err, id;
4022
4023 if (!perf->i915) {
4024 DRM_DEBUG("i915 perf interface not available for this system\n");
4025 return -ENOTSUPP;
4026 }
4027
4028 if (!perf->metrics_kobj) {
4029 DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
4030 return -EINVAL;
4031 }
4032
4033 if (i915_perf_stream_paranoid && !perfmon_capable()) {
4034 DRM_DEBUG("Insufficient privileges to add i915 OA config\n");
4035 return -EACCES;
4036 }
4037
4038 if ((!args->mux_regs_ptr || !args->n_mux_regs) &&
4039 (!args->boolean_regs_ptr || !args->n_boolean_regs) &&
4040 (!args->flex_regs_ptr || !args->n_flex_regs)) {
4041 DRM_DEBUG("No OA registers given\n");
4042 return -EINVAL;
4043 }
4044
4045 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
4046 if (!oa_config) {
4047 DRM_DEBUG("Failed to allocate memory for the OA config\n");
4048 return -ENOMEM;
4049 }
4050
4051 oa_config->perf = perf;
4052 kref_init(&oa_config->ref);
4053
4054 if (!uuid_is_valid(args->uuid)) {
4055 DRM_DEBUG("Invalid uuid format for OA config\n");
4056 err = -EINVAL;
4057 goto reg_err;
4058 }
4059
4060
4061
4062
4063 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid));
4064
4065 oa_config->mux_regs_len = args->n_mux_regs;
4066 regs = alloc_oa_regs(perf,
4067 perf->ops.is_valid_mux_reg,
4068 u64_to_user_ptr(args->mux_regs_ptr),
4069 args->n_mux_regs);
4070
4071 if (IS_ERR(regs)) {
4072 DRM_DEBUG("Failed to create OA config for mux_regs\n");
4073 err = PTR_ERR(regs);
4074 goto reg_err;
4075 }
4076 oa_config->mux_regs = regs;
4077
4078 oa_config->b_counter_regs_len = args->n_boolean_regs;
4079 regs = alloc_oa_regs(perf,
4080 perf->ops.is_valid_b_counter_reg,
4081 u64_to_user_ptr(args->boolean_regs_ptr),
4082 args->n_boolean_regs);
4083
4084 if (IS_ERR(regs)) {
4085 DRM_DEBUG("Failed to create OA config for b_counter_regs\n");
4086 err = PTR_ERR(regs);
4087 goto reg_err;
4088 }
4089 oa_config->b_counter_regs = regs;
4090
4091 if (INTEL_GEN(perf->i915) < 8) {
4092 if (args->n_flex_regs != 0) {
4093 err = -EINVAL;
4094 goto reg_err;
4095 }
4096 } else {
4097 oa_config->flex_regs_len = args->n_flex_regs;
4098 regs = alloc_oa_regs(perf,
4099 perf->ops.is_valid_flex_reg,
4100 u64_to_user_ptr(args->flex_regs_ptr),
4101 args->n_flex_regs);
4102
4103 if (IS_ERR(regs)) {
4104 DRM_DEBUG("Failed to create OA config for flex_regs\n");
4105 err = PTR_ERR(regs);
4106 goto reg_err;
4107 }
4108 oa_config->flex_regs = regs;
4109 }
4110
4111 err = mutex_lock_interruptible(&perf->metrics_lock);
4112 if (err)
4113 goto reg_err;
4114
4115
4116
4117
4118 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
4119 if (!strcmp(tmp->uuid, oa_config->uuid)) {
4120 DRM_DEBUG("OA config already exists with this uuid\n");
4121 err = -EADDRINUSE;
4122 goto sysfs_err;
4123 }
4124 }
4125
4126 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4127 if (err) {
4128 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
4129 goto sysfs_err;
4130 }
4131
4132
4133 oa_config->id = idr_alloc(&perf->metrics_idr,
4134 oa_config, 2,
4135 0, GFP_KERNEL);
4136 if (oa_config->id < 0) {
4137 DRM_DEBUG("Failed to create sysfs entry for OA config\n");
4138 err = oa_config->id;
4139 goto sysfs_err;
4140 }
4141
4142 mutex_unlock(&perf->metrics_lock);
4143
4144 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
4145
4146 return oa_config->id;
4147
4148sysfs_err:
4149 mutex_unlock(&perf->metrics_lock);
4150reg_err:
4151 i915_oa_config_put(oa_config);
4152 DRM_DEBUG("Failed to add new OA config\n");
4153 return err;
4154}
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
4168 struct drm_file *file)
4169{
4170 struct i915_perf *perf = &to_i915(dev)->perf;
4171 u64 *arg = data;
4172 struct i915_oa_config *oa_config;
4173 int ret;
4174
4175 if (!perf->i915) {
4176 DRM_DEBUG("i915 perf interface not available for this system\n");
4177 return -ENOTSUPP;
4178 }
4179
4180 if (i915_perf_stream_paranoid && !perfmon_capable()) {
4181 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n");
4182 return -EACCES;
4183 }
4184
4185 ret = mutex_lock_interruptible(&perf->metrics_lock);
4186 if (ret)
4187 return ret;
4188
4189 oa_config = idr_find(&perf->metrics_idr, *arg);
4190 if (!oa_config) {
4191 DRM_DEBUG("Failed to remove unknown OA config\n");
4192 ret = -ENOENT;
4193 goto err_unlock;
4194 }
4195
4196 GEM_BUG_ON(*arg != oa_config->id);
4197
4198 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4199
4200 idr_remove(&perf->metrics_idr, *arg);
4201
4202 mutex_unlock(&perf->metrics_lock);
4203
4204 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
4205
4206 i915_oa_config_put(oa_config);
4207
4208 return 0;
4209
4210err_unlock:
4211 mutex_unlock(&perf->metrics_lock);
4212 return ret;
4213}
4214
4215static struct ctl_table oa_table[] = {
4216 {
4217 .procname = "perf_stream_paranoid",
4218 .data = &i915_perf_stream_paranoid,
4219 .maxlen = sizeof(i915_perf_stream_paranoid),
4220 .mode = 0644,
4221 .proc_handler = proc_dointvec_minmax,
4222 .extra1 = SYSCTL_ZERO,
4223 .extra2 = SYSCTL_ONE,
4224 },
4225 {
4226 .procname = "oa_max_sample_rate",
4227 .data = &i915_oa_max_sample_rate,
4228 .maxlen = sizeof(i915_oa_max_sample_rate),
4229 .mode = 0644,
4230 .proc_handler = proc_dointvec_minmax,
4231 .extra1 = SYSCTL_ZERO,
4232 .extra2 = &oa_sample_rate_hard_limit,
4233 },
4234 {}
4235};
4236
4237static struct ctl_table i915_root[] = {
4238 {
4239 .procname = "i915",
4240 .maxlen = 0,
4241 .mode = 0555,
4242 .child = oa_table,
4243 },
4244 {}
4245};
4246
4247static struct ctl_table dev_root[] = {
4248 {
4249 .procname = "dev",
4250 .maxlen = 0,
4251 .mode = 0555,
4252 .child = i915_root,
4253 },
4254 {}
4255};
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266void i915_perf_init(struct drm_i915_private *i915)
4267{
4268 struct i915_perf *perf = &i915->perf;
4269
4270
4271
4272 if (IS_HASWELL(i915)) {
4273 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4274 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4275 perf->ops.is_valid_flex_reg = NULL;
4276 perf->ops.enable_metric_set = hsw_enable_metric_set;
4277 perf->ops.disable_metric_set = hsw_disable_metric_set;
4278 perf->ops.oa_enable = gen7_oa_enable;
4279 perf->ops.oa_disable = gen7_oa_disable;
4280 perf->ops.read = gen7_oa_read;
4281 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
4282
4283 perf->oa_formats = hsw_oa_formats;
4284 } else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
4285
4286
4287
4288
4289
4290
4291 perf->ops.read = gen8_oa_read;
4292
4293 if (IS_GEN_RANGE(i915, 8, 9)) {
4294 perf->oa_formats = gen8_plus_oa_formats;
4295
4296 perf->ops.is_valid_b_counter_reg =
4297 gen7_is_valid_b_counter_addr;
4298 perf->ops.is_valid_mux_reg =
4299 gen8_is_valid_mux_addr;
4300 perf->ops.is_valid_flex_reg =
4301 gen8_is_valid_flex_addr;
4302
4303 if (IS_CHERRYVIEW(i915)) {
4304 perf->ops.is_valid_mux_reg =
4305 chv_is_valid_mux_addr;
4306 }
4307
4308 perf->ops.oa_enable = gen8_oa_enable;
4309 perf->ops.oa_disable = gen8_oa_disable;
4310 perf->ops.enable_metric_set = gen8_enable_metric_set;
4311 perf->ops.disable_metric_set = gen8_disable_metric_set;
4312 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4313
4314 if (IS_GEN(i915, 8)) {
4315 perf->ctx_oactxctrl_offset = 0x120;
4316 perf->ctx_flexeu0_offset = 0x2ce;
4317
4318 perf->gen8_valid_ctx_bit = BIT(25);
4319 } else {
4320 perf->ctx_oactxctrl_offset = 0x128;
4321 perf->ctx_flexeu0_offset = 0x3de;
4322
4323 perf->gen8_valid_ctx_bit = BIT(16);
4324 }
4325 } else if (IS_GEN_RANGE(i915, 10, 11)) {
4326 perf->oa_formats = gen8_plus_oa_formats;
4327
4328 perf->ops.is_valid_b_counter_reg =
4329 gen7_is_valid_b_counter_addr;
4330 perf->ops.is_valid_mux_reg =
4331 gen10_is_valid_mux_addr;
4332 perf->ops.is_valid_flex_reg =
4333 gen8_is_valid_flex_addr;
4334
4335 perf->ops.oa_enable = gen8_oa_enable;
4336 perf->ops.oa_disable = gen8_oa_disable;
4337 perf->ops.enable_metric_set = gen8_enable_metric_set;
4338 perf->ops.disable_metric_set = gen10_disable_metric_set;
4339 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4340
4341 if (IS_GEN(i915, 10)) {
4342 perf->ctx_oactxctrl_offset = 0x128;
4343 perf->ctx_flexeu0_offset = 0x3de;
4344 } else {
4345 perf->ctx_oactxctrl_offset = 0x124;
4346 perf->ctx_flexeu0_offset = 0x78e;
4347 }
4348 perf->gen8_valid_ctx_bit = BIT(16);
4349 } else if (IS_GEN(i915, 12)) {
4350 perf->oa_formats = gen12_oa_formats;
4351
4352 perf->ops.is_valid_b_counter_reg =
4353 gen12_is_valid_b_counter_addr;
4354 perf->ops.is_valid_mux_reg =
4355 gen12_is_valid_mux_addr;
4356 perf->ops.is_valid_flex_reg =
4357 gen8_is_valid_flex_addr;
4358
4359 perf->ops.oa_enable = gen12_oa_enable;
4360 perf->ops.oa_disable = gen12_oa_disable;
4361 perf->ops.enable_metric_set = gen12_enable_metric_set;
4362 perf->ops.disable_metric_set = gen12_disable_metric_set;
4363 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
4364
4365 perf->ctx_flexeu0_offset = 0;
4366 perf->ctx_oactxctrl_offset = 0x144;
4367 }
4368 }
4369
4370 if (perf->ops.enable_metric_set) {
4371 mutex_init(&perf->lock);
4372
4373 oa_sample_rate_hard_limit =
4374 RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2;
4375
4376 mutex_init(&perf->metrics_lock);
4377 idr_init(&perf->metrics_idr);
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
4390
4391
4392
4393
4394 ratelimit_set_flags(&perf->spurious_report_rs,
4395 RATELIMIT_MSG_ON_RELEASE);
4396
4397 ratelimit_state_init(&perf->tail_pointer_race,
4398 5 * HZ, 10);
4399 ratelimit_set_flags(&perf->tail_pointer_race,
4400 RATELIMIT_MSG_ON_RELEASE);
4401
4402 atomic64_set(&perf->noa_programming_delay,
4403 500 * 1000 );
4404
4405 perf->i915 = i915;
4406 }
4407}
4408
4409static int destroy_config(int id, void *p, void *data)
4410{
4411 i915_oa_config_put(p);
4412 return 0;
4413}
4414
4415void i915_perf_sysctl_register(void)
4416{
4417 sysctl_header = register_sysctl_table(dev_root);
4418}
4419
4420void i915_perf_sysctl_unregister(void)
4421{
4422 unregister_sysctl_table(sysctl_header);
4423}
4424
4425
4426
4427
4428
4429void i915_perf_fini(struct drm_i915_private *i915)
4430{
4431 struct i915_perf *perf = &i915->perf;
4432
4433 if (!perf->i915)
4434 return;
4435
4436 idr_for_each(&perf->metrics_idr, destroy_config, perf);
4437 idr_destroy(&perf->metrics_idr);
4438
4439 memset(&perf->ops, 0, sizeof(perf->ops));
4440 perf->i915 = NULL;
4441}
4442
4443
4444
4445
4446
4447
4448int i915_perf_ioctl_version(void)
4449{
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470 return 5;
4471}
4472
4473#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4474#include "selftests/i915_perf.c"
4475#endif
4476