1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include "amdgpu.h"
24#include "df_v3_6.h"
25
26#include "df/df_3_6_default.h"
27#include "df/df_3_6_offset.h"
28#include "df/df_3_6_sh_mask.h"
29
30#define DF_3_6_SMN_REG_INST_DIST 0x8
31#define DF_3_6_INST_CNT 8
32
33
34#define DF_V3_6_MAX_COUNTERS 4
35
36
37#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL)
38#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL)
39#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL)
40#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL
41
42static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
43 16, 32, 0, 0, 0, 2, 4, 8};
44
45static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
46 uint32_t ficaa_val)
47{
48 unsigned long flags, address, data;
49 uint32_t ficadl_val, ficadh_val;
50
51 address = adev->nbio.funcs->get_pcie_index_offset(adev);
52 data = adev->nbio.funcs->get_pcie_data_offset(adev);
53
54 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
55 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
56 WREG32(data, ficaa_val);
57
58 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
59 ficadl_val = RREG32(data);
60
61 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
62 ficadh_val = RREG32(data);
63
64 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
65
66 return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);
67}
68
69static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
70 uint32_t ficadl_val, uint32_t ficadh_val)
71{
72 unsigned long flags, address, data;
73
74 address = adev->nbio.funcs->get_pcie_index_offset(adev);
75 data = adev->nbio.funcs->get_pcie_data_offset(adev);
76
77 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
78 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
79 WREG32(data, ficaa_val);
80
81 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3);
82 WREG32(data, ficadl_val);
83
84 WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
85 WREG32(data, ficadh_val);
86
87 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
88}
89
90
91
92
93
94
95
96static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
97 uint32_t lo_addr, uint32_t *lo_val,
98 uint32_t hi_addr, uint32_t *hi_val)
99{
100 unsigned long flags, address, data;
101
102 address = adev->nbio.funcs->get_pcie_index_offset(adev);
103 data = adev->nbio.funcs->get_pcie_data_offset(adev);
104
105 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
106 WREG32(address, lo_addr);
107 *lo_val = RREG32(data);
108 WREG32(address, hi_addr);
109 *hi_val = RREG32(data);
110 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
111}
112
113
114
115
116
117
118
119static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
120 uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val)
121{
122 unsigned long flags, address, data;
123
124 address = adev->nbio.funcs->get_pcie_index_offset(adev);
125 data = adev->nbio.funcs->get_pcie_data_offset(adev);
126
127 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
128 WREG32(address, lo_addr);
129 WREG32(data, lo_val);
130 WREG32(address, hi_addr);
131 WREG32(data, hi_val);
132 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
133}
134
135
136static int df_v3_6_perfmon_arm_with_status(struct amdgpu_device *adev,
137 uint32_t lo_addr, uint32_t lo_val,
138 uint32_t hi_addr, uint32_t hi_val)
139{
140 unsigned long flags, address, data;
141 uint32_t lo_val_rb, hi_val_rb;
142
143 address = adev->nbio.funcs->get_pcie_index_offset(adev);
144 data = adev->nbio.funcs->get_pcie_data_offset(adev);
145
146 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
147 WREG32(address, lo_addr);
148 WREG32(data, lo_val);
149 WREG32(address, hi_addr);
150 WREG32(data, hi_val);
151
152 WREG32(address, lo_addr);
153 lo_val_rb = RREG32(data);
154 WREG32(address, hi_addr);
155 hi_val_rb = RREG32(data);
156 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
157
158 if (!(lo_val == lo_val_rb && hi_val == hi_val_rb))
159 return -EBUSY;
160
161 return 0;
162}
163
164
165
166
167
168
169#define ARM_RETRY_USEC_TIMEOUT 1000
170#define ARM_RETRY_USEC_INTERVAL 100
171static int df_v3_6_perfmon_arm_with_retry(struct amdgpu_device *adev,
172 uint32_t lo_addr, uint32_t lo_val,
173 uint32_t hi_addr, uint32_t hi_val)
174{
175 int countdown = ARM_RETRY_USEC_TIMEOUT;
176
177 while (countdown) {
178
179 if (!df_v3_6_perfmon_arm_with_status(adev, lo_addr, lo_val,
180 hi_addr, hi_val))
181 break;
182
183 countdown -= ARM_RETRY_USEC_INTERVAL;
184 udelay(ARM_RETRY_USEC_INTERVAL);
185 }
186
187 return countdown > 0 ? 0 : -ETIME;
188}
189
190
191static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev,
192 struct device_attribute *attr,
193 char *buf)
194{
195 struct amdgpu_device *adev;
196 struct drm_device *ddev;
197 int i, count;
198
199 ddev = dev_get_drvdata(dev);
200 adev = drm_to_adev(ddev);
201 count = 0;
202
203 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
204 if (adev->df_perfmon_config_assign_mask[i] == 0)
205 count++;
206 }
207
208 return sysfs_emit(buf, "%i\n", count);
209}
210
211
212static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
213
214static void df_v3_6_query_hashes(struct amdgpu_device *adev)
215{
216 u32 tmp;
217
218 adev->df.hash_status.hash_64k = false;
219 adev->df.hash_status.hash_2m = false;
220 adev->df.hash_status.hash_1g = false;
221
222
223 if ((adev->asic_type == CHIP_ARCTURUS &&
224 adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
225 (adev->asic_type == CHIP_ALDEBARAN &&
226 adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
227 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
228 adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
229 DF_CS_UMC_AON0_DfGlobalCtrl,
230 GlbHashIntlvCtl64K);
231 adev->df.hash_status.hash_2m = REG_GET_FIELD(tmp,
232 DF_CS_UMC_AON0_DfGlobalCtrl,
233 GlbHashIntlvCtl2M);
234 adev->df.hash_status.hash_1g = REG_GET_FIELD(tmp,
235 DF_CS_UMC_AON0_DfGlobalCtrl,
236 GlbHashIntlvCtl1G);
237 }
238}
239
240
241static void df_v3_6_sw_init(struct amdgpu_device *adev)
242{
243 int i, ret;
244
245 ret = device_create_file(adev->dev, &dev_attr_df_cntr_avail);
246 if (ret)
247 DRM_ERROR("failed to create file for available df counters\n");
248
249 for (i = 0; i < AMDGPU_MAX_DF_PERFMONS; i++)
250 adev->df_perfmon_config_assign_mask[i] = 0;
251
252 df_v3_6_query_hashes(adev);
253}
254
255static void df_v3_6_sw_fini(struct amdgpu_device *adev)
256{
257
258 device_remove_file(adev->dev, &dev_attr_df_cntr_avail);
259
260}
261
262static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
263 bool enable)
264{
265 u32 tmp;
266
267 if (enable) {
268 tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
269 tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
270 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
271 } else
272 WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
273 mmFabricConfigAccessControl_DEFAULT);
274}
275
276static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
277{
278 u32 tmp;
279
280 if (adev->asic_type == CHIP_ALDEBARAN) {
281 tmp = RREG32_SOC15(DF, 0, mmDF_GCM_AON0_DramMegaBaseAddress0);
282 tmp &=
283 ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
284 } else {
285 tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
286 tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
287 }
288 tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
289
290 return tmp;
291}
292
293static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
294{
295 int fb_channel_number;
296
297 fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
298 if (fb_channel_number >= ARRAY_SIZE(df_v3_6_channel_number))
299 fb_channel_number = 0;
300
301 return df_v3_6_channel_number[fb_channel_number];
302}
303
304static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
305 bool enable)
306{
307 u32 tmp;
308
309 if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
310
311 adev->df.funcs->enable_broadcast_mode(adev, true);
312
313 if (enable) {
314 tmp = RREG32_SOC15(DF, 0,
315 mmDF_PIE_AON0_DfGlobalClkGater);
316 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
317 tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
318 WREG32_SOC15(DF, 0,
319 mmDF_PIE_AON0_DfGlobalClkGater, tmp);
320 } else {
321 tmp = RREG32_SOC15(DF, 0,
322 mmDF_PIE_AON0_DfGlobalClkGater);
323 tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
324 tmp |= DF_V3_6_MGCG_DISABLE;
325 WREG32_SOC15(DF, 0,
326 mmDF_PIE_AON0_DfGlobalClkGater, tmp);
327 }
328
329
330 adev->df.funcs->enable_broadcast_mode(adev, false);
331 }
332}
333
334static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
335 u32 *flags)
336{
337 u32 tmp;
338
339
340 tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
341 if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
342 *flags |= AMD_CG_SUPPORT_DF_MGCG;
343}
344
345
346static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev,
347 uint64_t config,
348 int counter_idx)
349{
350
351 return ((config & 0x0FFFFFFUL) ==
352 adev->df_perfmon_config_assign_mask[counter_idx]);
353
354}
355
356
357static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
358 uint64_t config,
359 int counter_idx,
360 int is_ctrl,
361 uint32_t *lo_base_addr,
362 uint32_t *hi_base_addr)
363{
364 if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
365 return;
366
367 switch (counter_idx) {
368
369 case 0:
370 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4;
371 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi4 : smnPerfMonCtrHi4;
372 break;
373 case 1:
374 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo5 : smnPerfMonCtrLo5;
375 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi5 : smnPerfMonCtrHi5;
376 break;
377 case 2:
378 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo6 : smnPerfMonCtrLo6;
379 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi6 : smnPerfMonCtrHi6;
380 break;
381 case 3:
382 *lo_base_addr = is_ctrl ? smnPerfMonCtlLo7 : smnPerfMonCtrLo7;
383 *hi_base_addr = is_ctrl ? smnPerfMonCtlHi7 : smnPerfMonCtrHi7;
384 break;
385
386 }
387
388}
389
390
391static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
392 uint64_t config,
393 int counter_idx,
394 uint32_t *lo_base_addr,
395 uint32_t *hi_base_addr)
396{
397 df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr,
398 hi_base_addr);
399}
400
401
402static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
403 uint64_t config,
404 int counter_idx,
405 uint32_t *lo_base_addr,
406 uint32_t *hi_base_addr,
407 uint32_t *lo_val,
408 uint32_t *hi_val,
409 bool is_enable)
410{
411
412 uint32_t eventsel, instance, unitmask;
413 uint32_t instance_10, instance_5432, instance_76;
414
415 df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr,
416 hi_base_addr);
417
418 if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
419 DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x",
420 *lo_base_addr, *hi_base_addr);
421 return -ENXIO;
422 }
423
424 eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
425 unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
426 instance = DF_V3_6_GET_INSTANCE(config);
427
428 instance_10 = instance & 0x3;
429 instance_5432 = (instance >> 2) & 0xf;
430 instance_76 = (instance >> 6) & 0x3;
431
432 *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
433 *lo_val = is_enable ? *lo_val | (1 << 22) : *lo_val & ~(1 << 22);
434 *hi_val = (instance_76 << 29) | instance_5432;
435
436 DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
437 config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val);
438
439 return 0;
440}
441
442
443static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
444 uint64_t config)
445{
446 int i;
447
448 for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
449 if (adev->df_perfmon_config_assign_mask[i] == 0U) {
450 adev->df_perfmon_config_assign_mask[i] =
451 config & 0x0FFFFFFUL;
452 return i;
453 }
454 }
455
456 return -ENOSPC;
457}
458
459#define DEFERRED_ARM_MASK (1 << 31)
460static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
461 int counter_idx, uint64_t config,
462 bool is_deferred)
463{
464
465 if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
466 return -EINVAL;
467
468 if (is_deferred)
469 adev->df_perfmon_config_assign_mask[counter_idx] |=
470 DEFERRED_ARM_MASK;
471 else
472 adev->df_perfmon_config_assign_mask[counter_idx] &=
473 ~DEFERRED_ARM_MASK;
474
475 return 0;
476}
477
478static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
479 int counter_idx,
480 uint64_t config)
481{
482 return (df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
483 (adev->df_perfmon_config_assign_mask[counter_idx]
484 & DEFERRED_ARM_MASK));
485
486}
487
488
489static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
490 uint64_t config,
491 int counter_idx)
492{
493 if (df_v3_6_pmc_has_counter(adev, config, counter_idx))
494 adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL;
495}
496
497
498static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
499 uint64_t config,
500 int counter_idx)
501{
502 uint32_t lo_base_addr = 0, hi_base_addr = 0;
503
504 df_v3_6_pmc_get_read_settings(adev, config, counter_idx, &lo_base_addr,
505 &hi_base_addr);
506
507 if ((lo_base_addr == 0) || (hi_base_addr == 0))
508 return;
509
510 df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
511}
512
513
514static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
515 int counter_idx, int is_add)
516{
517 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
518 int err = 0, ret = 0;
519
520 switch (adev->asic_type) {
521 case CHIP_VEGA20:
522 case CHIP_ARCTURUS:
523 if (is_add)
524 return df_v3_6_pmc_add_cntr(adev, config);
525
526 ret = df_v3_6_pmc_get_ctrl_settings(adev,
527 config,
528 counter_idx,
529 &lo_base_addr,
530 &hi_base_addr,
531 &lo_val,
532 &hi_val,
533 true);
534
535 if (ret)
536 return ret;
537
538 err = df_v3_6_perfmon_arm_with_retry(adev,
539 lo_base_addr,
540 lo_val,
541 hi_base_addr,
542 hi_val);
543
544 if (err)
545 ret = df_v3_6_pmc_set_deferred(adev, config,
546 counter_idx, true);
547
548 break;
549 default:
550 break;
551 }
552
553 return ret;
554}
555
556static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
557 int counter_idx, int is_remove)
558{
559 uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
560 int ret = 0;
561
562 switch (adev->asic_type) {
563 case CHIP_VEGA20:
564 case CHIP_ARCTURUS:
565 ret = df_v3_6_pmc_get_ctrl_settings(adev,
566 config,
567 counter_idx,
568 &lo_base_addr,
569 &hi_base_addr,
570 &lo_val,
571 &hi_val,
572 false);
573
574 if (ret)
575 return ret;
576
577 df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
578 hi_base_addr, hi_val);
579
580 if (is_remove) {
581 df_v3_6_reset_perfmon_cntr(adev, config, counter_idx);
582 df_v3_6_pmc_release_cntr(adev, config, counter_idx);
583 }
584
585 break;
586 default:
587 break;
588 }
589
590 return ret;
591}
592
593static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
594 uint64_t config,
595 int counter_idx,
596 uint64_t *count)
597{
598 uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
599 *count = 0;
600
601 switch (adev->asic_type) {
602 case CHIP_VEGA20:
603 case CHIP_ARCTURUS:
604 df_v3_6_pmc_get_read_settings(adev, config, counter_idx,
605 &lo_base_addr, &hi_base_addr);
606
607 if ((lo_base_addr == 0) || (hi_base_addr == 0))
608 return;
609
610
611 if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) {
612 int rearm_err = df_v3_6_perfmon_arm_with_status(adev,
613 lo_base_addr, lo_val,
614 hi_base_addr, hi_val);
615
616 if (rearm_err)
617 return;
618
619 df_v3_6_pmc_set_deferred(adev, config, counter_idx,
620 false);
621 }
622
623 df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
624 hi_base_addr, &hi_val);
625
626 *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
627
628 if (*count >= DF_V3_6_PERFMON_OVERFLOW)
629 *count = 0;
630
631 DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
632 config, lo_base_addr, hi_base_addr, lo_val, hi_val);
633
634 break;
635 default:
636 break;
637 }
638}
639
640const struct amdgpu_df_funcs df_v3_6_funcs = {
641 .sw_init = df_v3_6_sw_init,
642 .sw_fini = df_v3_6_sw_fini,
643 .enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
644 .get_fb_channel_number = df_v3_6_get_fb_channel_number,
645 .get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
646 .update_medium_grain_clock_gating =
647 df_v3_6_update_medium_grain_clock_gating,
648 .get_clockgating_state = df_v3_6_get_clockgating_state,
649 .pmc_start = df_v3_6_pmc_start,
650 .pmc_stop = df_v3_6_pmc_stop,
651 .pmc_get_count = df_v3_6_pmc_get_count,
652 .get_fica = df_v3_6_get_fica,
653 .set_fica = df_v3_6_set_fica,
654};
655