1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/power_supply.h>
29#include <linux/kthread.h>
30#include <linux/module.h>
31#include <linux/console.h>
32#include <linux/slab.h>
33#include <linux/iommu.h>
34#include <linux/pci.h>
35
36#include <drm/drm_atomic_helper.h>
37#include <drm/drm_probe_helper.h>
38#include <drm/amdgpu_drm.h>
39#include <linux/vgaarb.h>
40#include <linux/vga_switcheroo.h>
41#include <linux/efi.h>
42#include "amdgpu.h"
43#include "amdgpu_trace.h"
44#include "amdgpu_i2c.h"
45#include "atom.h"
46#include "amdgpu_atombios.h"
47#include "amdgpu_atomfirmware.h"
48#include "amd_pcie.h"
49#ifdef CONFIG_DRM_AMDGPU_SI
50#include "si.h"
51#endif
52#ifdef CONFIG_DRM_AMDGPU_CIK
53#include "cik.h"
54#endif
55#include "vi.h"
56#include "soc15.h"
57#include "nv.h"
58#include "bif/bif_4_1_d.h"
59#include <linux/firmware.h>
60#include "amdgpu_vf_error.h"
61
62#include "amdgpu_amdkfd.h"
63#include "amdgpu_pm.h"
64
65#include "amdgpu_xgmi.h"
66#include "amdgpu_ras.h"
67#include "amdgpu_pmu.h"
68#include "amdgpu_fru_eeprom.h"
69#include "amdgpu_reset.h"
70
71#include <linux/suspend.h>
72#include <drm/task_barrier.h>
73#include <linux/pm_runtime.h>
74
75#include <drm/drm_drv.h>
76
77MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
78MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
79MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
80MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
81MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
82MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
83MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
84
85#define AMDGPU_RESUME_MS 2000
86#define AMDGPU_MAX_RETRY_LIMIT 2
87#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
88
89const char *amdgpu_asic_name[] = {
90 "TAHITI",
91 "PITCAIRN",
92 "VERDE",
93 "OLAND",
94 "HAINAN",
95 "BONAIRE",
96 "KAVERI",
97 "KABINI",
98 "HAWAII",
99 "MULLINS",
100 "TOPAZ",
101 "TONGA",
102 "FIJI",
103 "CARRIZO",
104 "STONEY",
105 "POLARIS10",
106 "POLARIS11",
107 "POLARIS12",
108 "VEGAM",
109 "VEGA10",
110 "VEGA12",
111 "VEGA20",
112 "RAVEN",
113 "ARCTURUS",
114 "RENOIR",
115 "ALDEBARAN",
116 "NAVI10",
117 "CYAN_SKILLFISH",
118 "NAVI14",
119 "NAVI12",
120 "SIENNA_CICHLID",
121 "NAVY_FLOUNDER",
122 "VANGOGH",
123 "DIMGREY_CAVEFISH",
124 "BEIGE_GOBY",
125 "YELLOW_CARP",
126 "IP DISCOVERY",
127 "LAST",
128};
129
130
131
132
133
134
135
136
137
138
139static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
140 struct device_attribute *attr, char *buf)
141{
142 struct drm_device *ddev = dev_get_drvdata(dev);
143 struct amdgpu_device *adev = drm_to_adev(ddev);
144 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
145
146 return sysfs_emit(buf, "%llu\n", cnt);
147}
148
149static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
150 amdgpu_device_get_pcie_replay_count, NULL);
151
152static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
153
154
155
156
157
158
159
160
161
162
163
164static ssize_t amdgpu_device_get_product_name(struct device *dev,
165 struct device_attribute *attr, char *buf)
166{
167 struct drm_device *ddev = dev_get_drvdata(dev);
168 struct amdgpu_device *adev = drm_to_adev(ddev);
169
170 return sysfs_emit(buf, "%s\n", adev->product_name);
171}
172
173static DEVICE_ATTR(product_name, S_IRUGO,
174 amdgpu_device_get_product_name, NULL);
175
176
177
178
179
180
181
182
183
184
185
186static ssize_t amdgpu_device_get_product_number(struct device *dev,
187 struct device_attribute *attr, char *buf)
188{
189 struct drm_device *ddev = dev_get_drvdata(dev);
190 struct amdgpu_device *adev = drm_to_adev(ddev);
191
192 return sysfs_emit(buf, "%s\n", adev->product_number);
193}
194
195static DEVICE_ATTR(product_number, S_IRUGO,
196 amdgpu_device_get_product_number, NULL);
197
198
199
200
201
202
203
204
205
206
207
208static ssize_t amdgpu_device_get_serial_number(struct device *dev,
209 struct device_attribute *attr, char *buf)
210{
211 struct drm_device *ddev = dev_get_drvdata(dev);
212 struct amdgpu_device *adev = drm_to_adev(ddev);
213
214 return sysfs_emit(buf, "%s\n", adev->serial);
215}
216
217static DEVICE_ATTR(serial_number, S_IRUGO,
218 amdgpu_device_get_serial_number, NULL);
219
220
221
222
223
224
225
226
227
228bool amdgpu_device_supports_px(struct drm_device *dev)
229{
230 struct amdgpu_device *adev = drm_to_adev(dev);
231
232 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
233 return true;
234 return false;
235}
236
237
238
239
240
241
242
243
244
245bool amdgpu_device_supports_boco(struct drm_device *dev)
246{
247 struct amdgpu_device *adev = drm_to_adev(dev);
248
249 if (adev->has_pr3 ||
250 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
251 return true;
252 return false;
253}
254
255
256
257
258
259
260
261
262
263bool amdgpu_device_supports_baco(struct drm_device *dev)
264{
265 struct amdgpu_device *adev = drm_to_adev(dev);
266
267 return amdgpu_asic_supports_baco(adev);
268}
269
270
271
272
273
274
275
276
277
278
279bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
280{
281 return (amdgpu_device_supports_boco(dev) &&
282 amdgpu_acpi_is_power_shift_control_supported());
283}
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
299 void *buf, size_t size, bool write)
300{
301 unsigned long flags;
302 uint32_t hi = ~0, tmp = 0;
303 uint32_t *data = buf;
304 uint64_t last;
305 int idx;
306
307 if (!drm_dev_enter(adev_to_drm(adev), &idx))
308 return;
309
310 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
311
312 spin_lock_irqsave(&adev->mmio_idx_lock, flags);
313 for (last = pos + size; pos < last; pos += 4) {
314 tmp = pos >> 31;
315
316 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
317 if (tmp != hi) {
318 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
319 hi = tmp;
320 }
321 if (write)
322 WREG32_NO_KIQ(mmMM_DATA, *data++);
323 else
324 *data++ = RREG32_NO_KIQ(mmMM_DATA);
325 }
326
327 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
328 drm_dev_exit(idx);
329}
330
331
332
333
334
335
336
337
338
339
340
341
342size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
343 void *buf, size_t size, bool write)
344{
345#ifdef CONFIG_64BIT
346 void __iomem *addr;
347 size_t count = 0;
348 uint64_t last;
349
350 if (!adev->mman.aper_base_kaddr)
351 return 0;
352
353 last = min(pos + size, adev->gmc.visible_vram_size);
354 if (last > pos) {
355 addr = adev->mman.aper_base_kaddr + pos;
356 count = last - pos;
357
358 if (write) {
359 memcpy_toio(addr, buf, count);
360 mb();
361 amdgpu_device_flush_hdp(adev, NULL);
362 } else {
363 amdgpu_device_invalidate_hdp(adev, NULL);
364 mb();
365 memcpy_fromio(buf, addr, count);
366 }
367
368 }
369
370 return count;
371#else
372 return 0;
373#endif
374}
375
376
377
378
379
380
381
382
383
384
385void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
386 void *buf, size_t size, bool write)
387{
388 size_t count;
389
390
391 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
392 size -= count;
393 if (size) {
394
395 pos += count;
396 buf += count;
397 amdgpu_device_mm_access(adev, pos, buf, size, write);
398 }
399}
400
401
402
403
404
405
406bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
407{
408 if (adev->no_hw_access)
409 return true;
410
411#ifdef CONFIG_LOCKDEP
412
413
414
415
416
417
418
419
420
421
422
423 if (in_task()) {
424 if (down_read_trylock(&adev->reset_domain->sem))
425 up_read(&adev->reset_domain->sem);
426 else
427 lockdep_assert_held(&adev->reset_domain->sem);
428 }
429#endif
430 return false;
431}
432
433
434
435
436
437
438
439
440
441
442uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
443 uint32_t reg, uint32_t acc_flags)
444{
445 uint32_t ret;
446
447 if (amdgpu_device_skip_hw_access(adev))
448 return 0;
449
450 if ((reg * 4) < adev->rmmio_size) {
451 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
452 amdgpu_sriov_runtime(adev) &&
453 down_read_trylock(&adev->reset_domain->sem)) {
454 ret = amdgpu_kiq_rreg(adev, reg);
455 up_read(&adev->reset_domain->sem);
456 } else {
457 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
458 }
459 } else {
460 ret = adev->pcie_rreg(adev, reg * 4);
461 }
462
463 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
464
465 return ret;
466}
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
483{
484 if (amdgpu_device_skip_hw_access(adev))
485 return 0;
486
487 if (offset < adev->rmmio_size)
488 return (readb(adev->rmmio + offset));
489 BUG();
490}
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
508{
509 if (amdgpu_device_skip_hw_access(adev))
510 return;
511
512 if (offset < adev->rmmio_size)
513 writeb(value, adev->rmmio + offset);
514 else
515 BUG();
516}
517
518
519
520
521
522
523
524
525
526
527
528void amdgpu_device_wreg(struct amdgpu_device *adev,
529 uint32_t reg, uint32_t v,
530 uint32_t acc_flags)
531{
532 if (amdgpu_device_skip_hw_access(adev))
533 return;
534
535 if ((reg * 4) < adev->rmmio_size) {
536 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
537 amdgpu_sriov_runtime(adev) &&
538 down_read_trylock(&adev->reset_domain->sem)) {
539 amdgpu_kiq_wreg(adev, reg, v);
540 up_read(&adev->reset_domain->sem);
541 } else {
542 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
543 }
544 } else {
545 adev->pcie_wreg(adev, reg * 4, v);
546 }
547
548 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
549}
550
551
552
553
554
555
556
557
558
559
560void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
561 uint32_t reg, uint32_t v)
562{
563 if (amdgpu_device_skip_hw_access(adev))
564 return;
565
566 if (amdgpu_sriov_fullaccess(adev) &&
567 adev->gfx.rlc.funcs &&
568 adev->gfx.rlc.funcs->is_rlcg_access_range) {
569 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
570 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
571 } else if ((reg * 4) >= adev->rmmio_size) {
572 adev->pcie_wreg(adev, reg * 4, v);
573 } else {
574 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
575 }
576}
577
578
579
580
581
582
583
584
585
586
587u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
588{
589 if (amdgpu_device_skip_hw_access(adev))
590 return 0;
591
592 if (index < adev->doorbell.num_doorbells) {
593 return readl(adev->doorbell.ptr + index);
594 } else {
595 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
596 return 0;
597 }
598}
599
600
601
602
603
604
605
606
607
608
609
610void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
611{
612 if (amdgpu_device_skip_hw_access(adev))
613 return;
614
615 if (index < adev->doorbell.num_doorbells) {
616 writel(v, adev->doorbell.ptr + index);
617 } else {
618 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
619 }
620}
621
622
623
624
625
626
627
628
629
630
631u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
632{
633 if (amdgpu_device_skip_hw_access(adev))
634 return 0;
635
636 if (index < adev->doorbell.num_doorbells) {
637 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
638 } else {
639 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
640 return 0;
641 }
642}
643
644
645
646
647
648
649
650
651
652
653
654void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
655{
656 if (amdgpu_device_skip_hw_access(adev))
657 return;
658
659 if (index < adev->doorbell.num_doorbells) {
660 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
661 } else {
662 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
663 }
664}
665
666
667
668
669
670
671
672
673
674
675
676u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
677 u32 pcie_index, u32 pcie_data,
678 u32 reg_addr)
679{
680 unsigned long flags;
681 u32 r;
682 void __iomem *pcie_index_offset;
683 void __iomem *pcie_data_offset;
684
685 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
686 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
687 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
688
689 writel(reg_addr, pcie_index_offset);
690 readl(pcie_index_offset);
691 r = readl(pcie_data_offset);
692 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
693
694 return r;
695}
696
697
698
699
700
701
702
703
704
705
706
707u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
708 u32 pcie_index, u32 pcie_data,
709 u32 reg_addr)
710{
711 unsigned long flags;
712 u64 r;
713 void __iomem *pcie_index_offset;
714 void __iomem *pcie_data_offset;
715
716 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
717 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
718 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
719
720
721 writel(reg_addr, pcie_index_offset);
722 readl(pcie_index_offset);
723 r = readl(pcie_data_offset);
724
725 writel(reg_addr + 4, pcie_index_offset);
726 readl(pcie_index_offset);
727 r |= ((u64)readl(pcie_data_offset) << 32);
728 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
729
730 return r;
731}
732
733
734
735
736
737
738
739
740
741
742
743void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
744 u32 pcie_index, u32 pcie_data,
745 u32 reg_addr, u32 reg_data)
746{
747 unsigned long flags;
748 void __iomem *pcie_index_offset;
749 void __iomem *pcie_data_offset;
750
751 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
752 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
753 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
754
755 writel(reg_addr, pcie_index_offset);
756 readl(pcie_index_offset);
757 writel(reg_data, pcie_data_offset);
758 readl(pcie_data_offset);
759 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
760}
761
762
763
764
765
766
767
768
769
770
771
772void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
773 u32 pcie_index, u32 pcie_data,
774 u32 reg_addr, u64 reg_data)
775{
776 unsigned long flags;
777 void __iomem *pcie_index_offset;
778 void __iomem *pcie_data_offset;
779
780 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
781 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
782 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
783
784
785 writel(reg_addr, pcie_index_offset);
786 readl(pcie_index_offset);
787 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
788 readl(pcie_data_offset);
789
790 writel(reg_addr + 4, pcie_index_offset);
791 readl(pcie_index_offset);
792 writel((u32)(reg_data >> 32), pcie_data_offset);
793 readl(pcie_data_offset);
794 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
795}
796
797
798
799
800
801
802
803
804
805
806
807static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
808{
809 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
810 BUG();
811 return 0;
812}
813
814
815
816
817
818
819
820
821
822
823
824static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
825{
826 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
827 reg, v);
828 BUG();
829}
830
831
832
833
834
835
836
837
838
839
840
841static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
842{
843 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
844 BUG();
845 return 0;
846}
847
848
849
850
851
852
853
854
855
856
857
858static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
859{
860 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
861 reg, v);
862 BUG();
863}
864
865
866
867
868
869
870
871
872
873
874
875
876static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
877 uint32_t block, uint32_t reg)
878{
879 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
880 reg, block);
881 BUG();
882 return 0;
883}
884
885
886
887
888
889
890
891
892
893
894
895
896static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
897 uint32_t block,
898 uint32_t reg, uint32_t v)
899{
900 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
901 reg, block, v);
902 BUG();
903}
904
905
906
907
908
909
910
911
912static int amdgpu_device_asic_init(struct amdgpu_device *adev)
913{
914 amdgpu_asic_pre_asic_init(adev);
915
916 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
917 return amdgpu_atomfirmware_asic_init(adev, true);
918 else
919 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
920}
921
922
923
924
925
926
927
928
929
930static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
931{
932 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
933 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
934 &adev->vram_scratch.robj,
935 &adev->vram_scratch.gpu_addr,
936 (void **)&adev->vram_scratch.ptr);
937}
938
939
940
941
942
943
944
945
946static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
947{
948 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
949}
950
951
952
953
954
955
956
957
958
959
960
961void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
962 const u32 *registers,
963 const u32 array_size)
964{
965 u32 tmp, reg, and_mask, or_mask;
966 int i;
967
968 if (array_size % 3)
969 return;
970
971 for (i = 0; i < array_size; i +=3) {
972 reg = registers[i + 0];
973 and_mask = registers[i + 1];
974 or_mask = registers[i + 2];
975
976 if (and_mask == 0xffffffff) {
977 tmp = or_mask;
978 } else {
979 tmp = RREG32(reg);
980 tmp &= ~and_mask;
981 if (adev->family >= AMDGPU_FAMILY_AI)
982 tmp |= (or_mask & and_mask);
983 else
984 tmp |= or_mask;
985 }
986 WREG32(reg, tmp);
987 }
988}
989
990
991
992
993
994
995
996
997
998void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
999{
1000 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1001}
1002
1003
1004
1005
1006
1007
1008
1009
1010int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1011{
1012 return pci_reset_function(adev->pdev);
1013}
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1027{
1028
1029
1030 if (adev->asic_type < CHIP_BONAIRE) {
1031 adev->doorbell.base = 0;
1032 adev->doorbell.size = 0;
1033 adev->doorbell.num_doorbells = 0;
1034 adev->doorbell.ptr = NULL;
1035 return 0;
1036 }
1037
1038 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1039 return -EINVAL;
1040
1041 amdgpu_asic_init_doorbell_index(adev);
1042
1043
1044 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1045 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1046
1047 if (adev->enable_mes) {
1048 adev->doorbell.num_doorbells =
1049 adev->doorbell.size / sizeof(u32);
1050 } else {
1051 adev->doorbell.num_doorbells =
1052 min_t(u32, adev->doorbell.size / sizeof(u32),
1053 adev->doorbell_index.max_assignment+1);
1054 if (adev->doorbell.num_doorbells == 0)
1055 return -EINVAL;
1056
1057
1058
1059
1060
1061
1062
1063 if (adev->asic_type >= CHIP_VEGA10)
1064 adev->doorbell.num_doorbells += 0x400;
1065 }
1066
1067 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1068 adev->doorbell.num_doorbells *
1069 sizeof(u32));
1070 if (adev->doorbell.ptr == NULL)
1071 return -ENOMEM;
1072
1073 return 0;
1074}
1075
1076
1077
1078
1079
1080
1081
1082
1083static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1084{
1085 iounmap(adev->doorbell.ptr);
1086 adev->doorbell.ptr = NULL;
1087}
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1106{
1107 if (adev->wb.wb_obj) {
1108 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1109 &adev->wb.gpu_addr,
1110 (void **)&adev->wb.wb);
1111 adev->wb.wb_obj = NULL;
1112 }
1113}
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1125{
1126 int r;
1127
1128 if (adev->wb.wb_obj == NULL) {
1129
1130 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1131 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1132 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1133 (void **)&adev->wb.wb);
1134 if (r) {
1135 dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1136 return r;
1137 }
1138
1139 adev->wb.num_wb = AMDGPU_MAX_WB;
1140 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1141
1142
1143 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1144 }
1145
1146 return 0;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1159{
1160 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1161
1162 if (offset < adev->wb.num_wb) {
1163 __set_bit(offset, adev->wb.used);
1164 *wb = offset << 3;
1165 return 0;
1166 } else {
1167 return -EINVAL;
1168 }
1169}
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1180{
1181 wb >>= 3;
1182 if (wb < adev->wb.num_wb)
1183 __clear_bit(wb, adev->wb.used);
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1196{
1197 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1198 struct pci_bus *root;
1199 struct resource *res;
1200 unsigned i;
1201 u16 cmd;
1202 int r;
1203
1204
1205 if (amdgpu_sriov_vf(adev))
1206 return 0;
1207
1208
1209 if (adev->gmc.real_vram_size &&
1210 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1211 return 0;
1212
1213
1214 root = adev->pdev->bus;
1215 while (root->parent)
1216 root = root->parent;
1217
1218 pci_bus_for_each_resource(root, res, i) {
1219 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1220 res->start > 0x100000000ull)
1221 break;
1222 }
1223
1224
1225 if (!res)
1226 return 0;
1227
1228
1229 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1230 rbar_size);
1231
1232
1233 pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1234 pci_write_config_word(adev->pdev, PCI_COMMAND,
1235 cmd & ~PCI_COMMAND_MEMORY);
1236
1237
1238 amdgpu_device_doorbell_fini(adev);
1239 if (adev->asic_type >= CHIP_BONAIRE)
1240 pci_release_resource(adev->pdev, 2);
1241
1242 pci_release_resource(adev->pdev, 0);
1243
1244 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1245 if (r == -ENOSPC)
1246 DRM_INFO("Not enough PCI address space for a large BAR.");
1247 else if (r && r != -ENOTSUPP)
1248 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1249
1250 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1251
1252
1253
1254
1255 r = amdgpu_device_doorbell_init(adev);
1256 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1257 return -ENODEV;
1258
1259 pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1260
1261 return 0;
1262}
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276bool amdgpu_device_need_post(struct amdgpu_device *adev)
1277{
1278 uint32_t reg;
1279
1280 if (amdgpu_sriov_vf(adev))
1281 return false;
1282
1283 if (amdgpu_passthrough(adev)) {
1284
1285
1286
1287
1288
1289 if (adev->asic_type == CHIP_FIJI) {
1290 int err;
1291 uint32_t fw_ver;
1292 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1293
1294 if (err)
1295 return true;
1296
1297 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1298 if (fw_ver < 0x00160e00)
1299 return true;
1300 }
1301 }
1302
1303
1304 if (adev->gmc.xgmi.pending_reset)
1305 return false;
1306
1307 if (adev->has_hw_reset) {
1308 adev->has_hw_reset = false;
1309 return true;
1310 }
1311
1312
1313 if (adev->asic_type >= CHIP_BONAIRE)
1314 return amdgpu_atombios_scratch_need_asic_init(adev);
1315
1316
1317 reg = amdgpu_asic_get_config_memsize(adev);
1318
1319 if ((reg != 0) && (reg != 0xffffffff))
1320 return false;
1321
1322 return true;
1323}
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1336{
1337 switch (amdgpu_aspm) {
1338 case -1:
1339 break;
1340 case 0:
1341 return false;
1342 case 1:
1343 return true;
1344 default:
1345 return false;
1346 }
1347 return pcie_aspm_enabled(adev->pdev);
1348}
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1361 bool state)
1362{
1363 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1364 amdgpu_asic_set_vga_state(adev, state);
1365 if (state)
1366 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1367 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1368 else
1369 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1370}
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1383{
1384
1385
1386
1387 if (amdgpu_vm_block_size == -1)
1388 return;
1389
1390 if (amdgpu_vm_block_size < 9) {
1391 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1392 amdgpu_vm_block_size);
1393 amdgpu_vm_block_size = -1;
1394 }
1395}
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1406{
1407
1408 if (amdgpu_vm_size == -1)
1409 return;
1410
1411 if (amdgpu_vm_size < 1) {
1412 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1413 amdgpu_vm_size);
1414 amdgpu_vm_size = -1;
1415 }
1416}
1417
1418static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1419{
1420 struct sysinfo si;
1421 bool is_os_64 = (sizeof(void *) == 8);
1422 uint64_t total_memory;
1423 uint64_t dram_size_seven_GB = 0x1B8000000;
1424 uint64_t dram_size_three_GB = 0xB8000000;
1425
1426 if (amdgpu_smu_memory_pool_size == 0)
1427 return;
1428
1429 if (!is_os_64) {
1430 DRM_WARN("Not 64-bit OS, feature not supported\n");
1431 goto def_value;
1432 }
1433 si_meminfo(&si);
1434 total_memory = (uint64_t)si.totalram * si.mem_unit;
1435
1436 if ((amdgpu_smu_memory_pool_size == 1) ||
1437 (amdgpu_smu_memory_pool_size == 2)) {
1438 if (total_memory < dram_size_three_GB)
1439 goto def_value1;
1440 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1441 (amdgpu_smu_memory_pool_size == 8)) {
1442 if (total_memory < dram_size_seven_GB)
1443 goto def_value1;
1444 } else {
1445 DRM_WARN("Smu memory pool size not supported\n");
1446 goto def_value;
1447 }
1448 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1449
1450 return;
1451
1452def_value1:
1453 DRM_WARN("No enough system memory\n");
1454def_value:
1455 adev->pm.smu_prv_buffer_size = 0;
1456}
1457
1458static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1459{
1460 if (!(adev->flags & AMD_IS_APU) ||
1461 adev->asic_type < CHIP_RAVEN)
1462 return 0;
1463
1464 switch (adev->asic_type) {
1465 case CHIP_RAVEN:
1466 if (adev->pdev->device == 0x15dd)
1467 adev->apu_flags |= AMD_APU_IS_RAVEN;
1468 if (adev->pdev->device == 0x15d8)
1469 adev->apu_flags |= AMD_APU_IS_PICASSO;
1470 break;
1471 case CHIP_RENOIR:
1472 if ((adev->pdev->device == 0x1636) ||
1473 (adev->pdev->device == 0x164c))
1474 adev->apu_flags |= AMD_APU_IS_RENOIR;
1475 else
1476 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1477 break;
1478 case CHIP_VANGOGH:
1479 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1480 break;
1481 case CHIP_YELLOW_CARP:
1482 break;
1483 case CHIP_CYAN_SKILLFISH:
1484 if ((adev->pdev->device == 0x13FE) ||
1485 (adev->pdev->device == 0x143F))
1486 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1487 break;
1488 default:
1489 break;
1490 }
1491
1492 return 0;
1493}
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1504{
1505 if (amdgpu_sched_jobs < 4) {
1506 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1507 amdgpu_sched_jobs);
1508 amdgpu_sched_jobs = 4;
1509 } else if (!is_power_of_2(amdgpu_sched_jobs)){
1510 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1511 amdgpu_sched_jobs);
1512 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1513 }
1514
1515 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1516
1517 dev_warn(adev->dev, "gart size (%d) too small\n",
1518 amdgpu_gart_size);
1519 amdgpu_gart_size = -1;
1520 }
1521
1522 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1523
1524 dev_warn(adev->dev, "gtt size (%d) too small\n",
1525 amdgpu_gtt_size);
1526 amdgpu_gtt_size = -1;
1527 }
1528
1529
1530 if (amdgpu_vm_fragment_size != -1 &&
1531 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1532 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1533 amdgpu_vm_fragment_size = -1;
1534 }
1535
1536 if (amdgpu_sched_hw_submission < 2) {
1537 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1538 amdgpu_sched_hw_submission);
1539 amdgpu_sched_hw_submission = 2;
1540 } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1541 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1542 amdgpu_sched_hw_submission);
1543 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1544 }
1545
1546 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1547 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1548 amdgpu_reset_method = -1;
1549 }
1550
1551 amdgpu_device_check_smu_prv_buffer_size(adev);
1552
1553 amdgpu_device_check_vm_size(adev);
1554
1555 amdgpu_device_check_block_size(adev);
1556
1557 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1558
1559 return 0;
1560}
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1572 enum vga_switcheroo_state state)
1573{
1574 struct drm_device *dev = pci_get_drvdata(pdev);
1575 int r;
1576
1577 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1578 return;
1579
1580 if (state == VGA_SWITCHEROO_ON) {
1581 pr_info("switched on\n");
1582
1583 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1584
1585 pci_set_power_state(pdev, PCI_D0);
1586 amdgpu_device_load_pci_state(pdev);
1587 r = pci_enable_device(pdev);
1588 if (r)
1589 DRM_WARN("pci_enable_device failed (%d)\n", r);
1590 amdgpu_device_resume(dev, true);
1591
1592 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1593 } else {
1594 pr_info("switched off\n");
1595 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1596 amdgpu_device_suspend(dev, true);
1597 amdgpu_device_cache_pci_state(pdev);
1598
1599 pci_disable_device(pdev);
1600 pci_set_power_state(pdev, PCI_D3cold);
1601 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1602 }
1603}
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1615{
1616 struct drm_device *dev = pci_get_drvdata(pdev);
1617
1618
1619
1620
1621
1622
1623 return atomic_read(&dev->open_count) == 0;
1624}
1625
1626static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1627 .set_gpu_state = amdgpu_switcheroo_set_state,
1628 .reprobe = NULL,
1629 .can_switch = amdgpu_switcheroo_can_switch,
1630};
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643int amdgpu_device_ip_set_clockgating_state(void *dev,
1644 enum amd_ip_block_type block_type,
1645 enum amd_clockgating_state state)
1646{
1647 struct amdgpu_device *adev = dev;
1648 int i, r = 0;
1649
1650 for (i = 0; i < adev->num_ip_blocks; i++) {
1651 if (!adev->ip_blocks[i].status.valid)
1652 continue;
1653 if (adev->ip_blocks[i].version->type != block_type)
1654 continue;
1655 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1656 continue;
1657 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1658 (void *)adev, state);
1659 if (r)
1660 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1661 adev->ip_blocks[i].version->funcs->name, r);
1662 }
1663 return r;
1664}
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677int amdgpu_device_ip_set_powergating_state(void *dev,
1678 enum amd_ip_block_type block_type,
1679 enum amd_powergating_state state)
1680{
1681 struct amdgpu_device *adev = dev;
1682 int i, r = 0;
1683
1684 for (i = 0; i < adev->num_ip_blocks; i++) {
1685 if (!adev->ip_blocks[i].status.valid)
1686 continue;
1687 if (adev->ip_blocks[i].version->type != block_type)
1688 continue;
1689 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1690 continue;
1691 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1692 (void *)adev, state);
1693 if (r)
1694 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1695 adev->ip_blocks[i].version->funcs->name, r);
1696 }
1697 return r;
1698}
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1712 u64 *flags)
1713{
1714 int i;
1715
1716 for (i = 0; i < adev->num_ip_blocks; i++) {
1717 if (!adev->ip_blocks[i].status.valid)
1718 continue;
1719 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1720 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1721 }
1722}
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1734 enum amd_ip_block_type block_type)
1735{
1736 int i, r;
1737
1738 for (i = 0; i < adev->num_ip_blocks; i++) {
1739 if (!adev->ip_blocks[i].status.valid)
1740 continue;
1741 if (adev->ip_blocks[i].version->type == block_type) {
1742 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1743 if (r)
1744 return r;
1745 break;
1746 }
1747 }
1748 return 0;
1749
1750}
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1762 enum amd_ip_block_type block_type)
1763{
1764 int i;
1765
1766 for (i = 0; i < adev->num_ip_blocks; i++) {
1767 if (!adev->ip_blocks[i].status.valid)
1768 continue;
1769 if (adev->ip_blocks[i].version->type == block_type)
1770 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1771 }
1772 return true;
1773
1774}
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785struct amdgpu_ip_block *
1786amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1787 enum amd_ip_block_type type)
1788{
1789 int i;
1790
1791 for (i = 0; i < adev->num_ip_blocks; i++)
1792 if (adev->ip_blocks[i].version->type == type)
1793 return &adev->ip_blocks[i];
1794
1795 return NULL;
1796}
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1810 enum amd_ip_block_type type,
1811 u32 major, u32 minor)
1812{
1813 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1814
1815 if (ip_block && ((ip_block->version->major > major) ||
1816 ((ip_block->version->major == major) &&
1817 (ip_block->version->minor >= minor))))
1818 return 0;
1819
1820 return 1;
1821}
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1833 const struct amdgpu_ip_block_version *ip_block_version)
1834{
1835 if (!ip_block_version)
1836 return -EINVAL;
1837
1838 switch (ip_block_version->type) {
1839 case AMD_IP_BLOCK_TYPE_VCN:
1840 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1841 return 0;
1842 break;
1843 case AMD_IP_BLOCK_TYPE_JPEG:
1844 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1845 return 0;
1846 break;
1847 default:
1848 break;
1849 }
1850
1851 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1852 ip_block_version->funcs->name);
1853
1854 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1855
1856 return 0;
1857}
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1872{
1873 adev->enable_virtual_display = false;
1874
1875 if (amdgpu_virtual_display) {
1876 const char *pci_address_name = pci_name(adev->pdev);
1877 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1878
1879 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1880 pciaddstr_tmp = pciaddstr;
1881 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1882 pciaddname = strsep(&pciaddname_tmp, ",");
1883 if (!strcmp("all", pciaddname)
1884 || !strcmp(pci_address_name, pciaddname)) {
1885 long num_crtc;
1886 int res = -1;
1887
1888 adev->enable_virtual_display = true;
1889
1890 if (pciaddname_tmp)
1891 res = kstrtol(pciaddname_tmp, 10,
1892 &num_crtc);
1893
1894 if (!res) {
1895 if (num_crtc < 1)
1896 num_crtc = 1;
1897 if (num_crtc > 6)
1898 num_crtc = 6;
1899 adev->mode_info.num_crtc = num_crtc;
1900 } else {
1901 adev->mode_info.num_crtc = 1;
1902 }
1903 break;
1904 }
1905 }
1906
1907 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1908 amdgpu_virtual_display, pci_address_name,
1909 adev->enable_virtual_display, adev->mode_info.num_crtc);
1910
1911 kfree(pciaddstr);
1912 }
1913}
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1926{
1927 const char *chip_name;
1928 char fw_name[40];
1929 int err;
1930 const struct gpu_info_firmware_header_v1_0 *hdr;
1931
1932 adev->firmware.gpu_info_fw = NULL;
1933
1934 if (adev->mman.discovery_bin) {
1935
1936
1937
1938
1939
1940 if (adev->asic_type != CHIP_NAVI12)
1941 return 0;
1942 }
1943
1944 switch (adev->asic_type) {
1945#ifdef CONFIG_DRM_AMDGPU_SI
1946 case CHIP_VERDE:
1947 case CHIP_TAHITI:
1948 case CHIP_PITCAIRN:
1949 case CHIP_OLAND:
1950 case CHIP_HAINAN:
1951#endif
1952#ifdef CONFIG_DRM_AMDGPU_CIK
1953 case CHIP_BONAIRE:
1954 case CHIP_HAWAII:
1955 case CHIP_KAVERI:
1956 case CHIP_KABINI:
1957 case CHIP_MULLINS:
1958#endif
1959 case CHIP_TOPAZ:
1960 case CHIP_TONGA:
1961 case CHIP_FIJI:
1962 case CHIP_POLARIS10:
1963 case CHIP_POLARIS11:
1964 case CHIP_POLARIS12:
1965 case CHIP_VEGAM:
1966 case CHIP_CARRIZO:
1967 case CHIP_STONEY:
1968 case CHIP_VEGA20:
1969 case CHIP_ALDEBARAN:
1970 case CHIP_SIENNA_CICHLID:
1971 case CHIP_NAVY_FLOUNDER:
1972 case CHIP_DIMGREY_CAVEFISH:
1973 case CHIP_BEIGE_GOBY:
1974 default:
1975 return 0;
1976 case CHIP_VEGA10:
1977 chip_name = "vega10";
1978 break;
1979 case CHIP_VEGA12:
1980 chip_name = "vega12";
1981 break;
1982 case CHIP_RAVEN:
1983 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1984 chip_name = "raven2";
1985 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1986 chip_name = "picasso";
1987 else
1988 chip_name = "raven";
1989 break;
1990 case CHIP_ARCTURUS:
1991 chip_name = "arcturus";
1992 break;
1993 case CHIP_NAVI12:
1994 chip_name = "navi12";
1995 break;
1996 }
1997
1998 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
1999 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
2000 if (err) {
2001 dev_err(adev->dev,
2002 "Failed to load gpu_info firmware \"%s\"\n",
2003 fw_name);
2004 goto out;
2005 }
2006 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
2007 if (err) {
2008 dev_err(adev->dev,
2009 "Failed to validate gpu_info firmware \"%s\"\n",
2010 fw_name);
2011 goto out;
2012 }
2013
2014 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2015 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2016
2017 switch (hdr->version_major) {
2018 case 1:
2019 {
2020 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2021 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2022 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2023
2024
2025
2026
2027 if (adev->asic_type == CHIP_NAVI12)
2028 goto parse_soc_bounding_box;
2029
2030 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2031 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2032 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2033 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2034 adev->gfx.config.max_texture_channel_caches =
2035 le32_to_cpu(gpu_info_fw->gc_num_tccs);
2036 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2037 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2038 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2039 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2040 adev->gfx.config.double_offchip_lds_buf =
2041 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2042 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2043 adev->gfx.cu_info.max_waves_per_simd =
2044 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2045 adev->gfx.cu_info.max_scratch_slots_per_cu =
2046 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2047 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2048 if (hdr->version_minor >= 1) {
2049 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2050 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2051 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2052 adev->gfx.config.num_sc_per_sh =
2053 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2054 adev->gfx.config.num_packer_per_sc =
2055 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2056 }
2057
2058parse_soc_bounding_box:
2059
2060
2061
2062
2063 if (hdr->version_minor == 2) {
2064 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2065 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2066 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2067 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2068 }
2069 break;
2070 }
2071 default:
2072 dev_err(adev->dev,
2073 "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2074 err = -EINVAL;
2075 goto out;
2076 }
2077out:
2078 return err;
2079}
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2092{
2093 struct drm_device *dev = adev_to_drm(adev);
2094 struct pci_dev *parent;
2095 int i, r;
2096
2097 amdgpu_device_enable_virtual_display(adev);
2098
2099 if (amdgpu_sriov_vf(adev)) {
2100 r = amdgpu_virt_request_full_gpu(adev, true);
2101 if (r)
2102 return r;
2103 }
2104
2105 switch (adev->asic_type) {
2106#ifdef CONFIG_DRM_AMDGPU_SI
2107 case CHIP_VERDE:
2108 case CHIP_TAHITI:
2109 case CHIP_PITCAIRN:
2110 case CHIP_OLAND:
2111 case CHIP_HAINAN:
2112 adev->family = AMDGPU_FAMILY_SI;
2113 r = si_set_ip_blocks(adev);
2114 if (r)
2115 return r;
2116 break;
2117#endif
2118#ifdef CONFIG_DRM_AMDGPU_CIK
2119 case CHIP_BONAIRE:
2120 case CHIP_HAWAII:
2121 case CHIP_KAVERI:
2122 case CHIP_KABINI:
2123 case CHIP_MULLINS:
2124 if (adev->flags & AMD_IS_APU)
2125 adev->family = AMDGPU_FAMILY_KV;
2126 else
2127 adev->family = AMDGPU_FAMILY_CI;
2128
2129 r = cik_set_ip_blocks(adev);
2130 if (r)
2131 return r;
2132 break;
2133#endif
2134 case CHIP_TOPAZ:
2135 case CHIP_TONGA:
2136 case CHIP_FIJI:
2137 case CHIP_POLARIS10:
2138 case CHIP_POLARIS11:
2139 case CHIP_POLARIS12:
2140 case CHIP_VEGAM:
2141 case CHIP_CARRIZO:
2142 case CHIP_STONEY:
2143 if (adev->flags & AMD_IS_APU)
2144 adev->family = AMDGPU_FAMILY_CZ;
2145 else
2146 adev->family = AMDGPU_FAMILY_VI;
2147
2148 r = vi_set_ip_blocks(adev);
2149 if (r)
2150 return r;
2151 break;
2152 default:
2153 r = amdgpu_discovery_set_ip_blocks(adev);
2154 if (r)
2155 return r;
2156 break;
2157 }
2158
2159 if (amdgpu_has_atpx() &&
2160 (amdgpu_is_atpx_hybrid() ||
2161 amdgpu_has_atpx_dgpu_power_cntl()) &&
2162 ((adev->flags & AMD_IS_APU) == 0) &&
2163 !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2164 adev->flags |= AMD_IS_PX;
2165
2166 if (!(adev->flags & AMD_IS_APU)) {
2167 parent = pci_upstream_bridge(adev->pdev);
2168 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2169 }
2170
2171 amdgpu_amdkfd_device_probe(adev);
2172
2173 adev->pm.pp_feature = amdgpu_pp_feature_mask;
2174 if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2175 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2176 if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2177 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2178
2179 for (i = 0; i < adev->num_ip_blocks; i++) {
2180 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2181 DRM_ERROR("disabled ip block: %d <%s>\n",
2182 i, adev->ip_blocks[i].version->funcs->name);
2183 adev->ip_blocks[i].status.valid = false;
2184 } else {
2185 if (adev->ip_blocks[i].version->funcs->early_init) {
2186 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2187 if (r == -ENOENT) {
2188 adev->ip_blocks[i].status.valid = false;
2189 } else if (r) {
2190 DRM_ERROR("early_init of IP block <%s> failed %d\n",
2191 adev->ip_blocks[i].version->funcs->name, r);
2192 return r;
2193 } else {
2194 adev->ip_blocks[i].status.valid = true;
2195 }
2196 } else {
2197 adev->ip_blocks[i].status.valid = true;
2198 }
2199 }
2200
2201 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2202 r = amdgpu_device_parse_gpu_info_fw(adev);
2203 if (r)
2204 return r;
2205
2206
2207 if (!amdgpu_get_bios(adev))
2208 return -EINVAL;
2209
2210 r = amdgpu_atombios_init(adev);
2211 if (r) {
2212 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2213 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2214 return r;
2215 }
2216
2217
2218 if (amdgpu_sriov_vf(adev))
2219 amdgpu_virt_init_data_exchange(adev);
2220
2221 }
2222 }
2223
2224 adev->cg_flags &= amdgpu_cg_mask;
2225 adev->pg_flags &= amdgpu_pg_mask;
2226
2227 return 0;
2228}
2229
2230static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2231{
2232 int i, r;
2233
2234 for (i = 0; i < adev->num_ip_blocks; i++) {
2235 if (!adev->ip_blocks[i].status.sw)
2236 continue;
2237 if (adev->ip_blocks[i].status.hw)
2238 continue;
2239 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2240 (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2241 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2242 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2243 if (r) {
2244 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2245 adev->ip_blocks[i].version->funcs->name, r);
2246 return r;
2247 }
2248 adev->ip_blocks[i].status.hw = true;
2249 }
2250 }
2251
2252 return 0;
2253}
2254
2255static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2256{
2257 int i, r;
2258
2259 for (i = 0; i < adev->num_ip_blocks; i++) {
2260 if (!adev->ip_blocks[i].status.sw)
2261 continue;
2262 if (adev->ip_blocks[i].status.hw)
2263 continue;
2264 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2265 if (r) {
2266 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2267 adev->ip_blocks[i].version->funcs->name, r);
2268 return r;
2269 }
2270 adev->ip_blocks[i].status.hw = true;
2271 }
2272
2273 return 0;
2274}
2275
2276static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2277{
2278 int r = 0;
2279 int i;
2280 uint32_t smu_version;
2281
2282 if (adev->asic_type >= CHIP_VEGA10) {
2283 for (i = 0; i < adev->num_ip_blocks; i++) {
2284 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2285 continue;
2286
2287 if (!adev->ip_blocks[i].status.sw)
2288 continue;
2289
2290
2291 if (adev->ip_blocks[i].status.hw == true)
2292 break;
2293
2294 if (amdgpu_in_reset(adev) || adev->in_suspend) {
2295 r = adev->ip_blocks[i].version->funcs->resume(adev);
2296 if (r) {
2297 DRM_ERROR("resume of IP block <%s> failed %d\n",
2298 adev->ip_blocks[i].version->funcs->name, r);
2299 return r;
2300 }
2301 } else {
2302 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2303 if (r) {
2304 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2305 adev->ip_blocks[i].version->funcs->name, r);
2306 return r;
2307 }
2308 }
2309
2310 adev->ip_blocks[i].status.hw = true;
2311 break;
2312 }
2313 }
2314
2315 if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2316 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2317
2318 return r;
2319}
2320
2321static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2322{
2323 long timeout;
2324 int r, i;
2325
2326 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2327 struct amdgpu_ring *ring = adev->rings[i];
2328
2329
2330 if (!ring || ring->no_scheduler)
2331 continue;
2332
2333 switch (ring->funcs->type) {
2334 case AMDGPU_RING_TYPE_GFX:
2335 timeout = adev->gfx_timeout;
2336 break;
2337 case AMDGPU_RING_TYPE_COMPUTE:
2338 timeout = adev->compute_timeout;
2339 break;
2340 case AMDGPU_RING_TYPE_SDMA:
2341 timeout = adev->sdma_timeout;
2342 break;
2343 default:
2344 timeout = adev->video_timeout;
2345 break;
2346 }
2347
2348 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2349 ring->num_hw_submission, amdgpu_job_hang_limit,
2350 timeout, adev->reset_domain->wq,
2351 ring->sched_score, ring->name,
2352 adev->dev);
2353 if (r) {
2354 DRM_ERROR("Failed to create scheduler on ring %s.\n",
2355 ring->name);
2356 return r;
2357 }
2358 }
2359
2360 return 0;
2361}
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2376{
2377 int i, r;
2378
2379 r = amdgpu_ras_init(adev);
2380 if (r)
2381 return r;
2382
2383 for (i = 0; i < adev->num_ip_blocks; i++) {
2384 if (!adev->ip_blocks[i].status.valid)
2385 continue;
2386 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2387 if (r) {
2388 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2389 adev->ip_blocks[i].version->funcs->name, r);
2390 goto init_failed;
2391 }
2392 adev->ip_blocks[i].status.sw = true;
2393
2394
2395 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2396
2397 if (amdgpu_sriov_vf(adev))
2398 amdgpu_virt_exchange_data(adev);
2399
2400 r = amdgpu_device_vram_scratch_init(adev);
2401 if (r) {
2402 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
2403 goto init_failed;
2404 }
2405 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2406 if (r) {
2407 DRM_ERROR("hw_init %d failed %d\n", i, r);
2408 goto init_failed;
2409 }
2410 r = amdgpu_device_wb_init(adev);
2411 if (r) {
2412 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2413 goto init_failed;
2414 }
2415 adev->ip_blocks[i].status.hw = true;
2416
2417
2418 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
2419 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2420 AMDGPU_GEM_DOMAIN_VRAM,
2421 AMDGPU_CSA_SIZE);
2422 if (r) {
2423 DRM_ERROR("allocate CSA failed %d\n", r);
2424 goto init_failed;
2425 }
2426 }
2427 }
2428 }
2429
2430 if (amdgpu_sriov_vf(adev))
2431 amdgpu_virt_init_data_exchange(adev);
2432
2433 r = amdgpu_ib_pool_init(adev);
2434 if (r) {
2435 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2436 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2437 goto init_failed;
2438 }
2439
2440 r = amdgpu_ucode_create_bo(adev);
2441 if (r)
2442 goto init_failed;
2443
2444 r = amdgpu_device_ip_hw_init_phase1(adev);
2445 if (r)
2446 goto init_failed;
2447
2448 r = amdgpu_device_fw_loading(adev);
2449 if (r)
2450 goto init_failed;
2451
2452 r = amdgpu_device_ip_hw_init_phase2(adev);
2453 if (r)
2454 goto init_failed;
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471 r = amdgpu_ras_recovery_init(adev);
2472 if (r)
2473 goto init_failed;
2474
2475
2476
2477
2478 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2479 if (amdgpu_xgmi_add_device(adev) == 0) {
2480 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2481
2482 if (!hive->reset_domain ||
2483 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2484 r = -ENOENT;
2485 goto init_failed;
2486 }
2487
2488
2489 amdgpu_reset_put_reset_domain(adev->reset_domain);
2490 adev->reset_domain = hive->reset_domain;
2491 }
2492 }
2493
2494 r = amdgpu_device_init_schedulers(adev);
2495 if (r)
2496 goto init_failed;
2497
2498
2499 if (!adev->gmc.xgmi.pending_reset)
2500 amdgpu_amdkfd_device_init(adev);
2501
2502 amdgpu_fru_get_product_info(adev);
2503
2504init_failed:
2505 if (amdgpu_sriov_vf(adev))
2506 amdgpu_virt_release_full_gpu(adev, true);
2507
2508 return r;
2509}
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2521{
2522 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2523}
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2536{
2537 if (memcmp(adev->gart.ptr, adev->reset_magic,
2538 AMDGPU_RESET_MAGIC_NUM))
2539 return true;
2540
2541 if (!amdgpu_in_reset(adev))
2542 return false;
2543
2544
2545
2546
2547
2548 switch (amdgpu_asic_reset_method(adev)) {
2549 case AMD_RESET_METHOD_BACO:
2550 case AMD_RESET_METHOD_MODE1:
2551 return true;
2552 default:
2553 return false;
2554 }
2555}
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2571 enum amd_clockgating_state state)
2572{
2573 int i, j, r;
2574
2575 if (amdgpu_emu_mode == 1)
2576 return 0;
2577
2578 for (j = 0; j < adev->num_ip_blocks; j++) {
2579 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2580 if (!adev->ip_blocks[i].status.late_initialized)
2581 continue;
2582
2583 if (adev->in_s0ix &&
2584 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2585 continue;
2586
2587 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2588 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2589 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2590 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2591 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2592
2593 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2594 state);
2595 if (r) {
2596 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2597 adev->ip_blocks[i].version->funcs->name, r);
2598 return r;
2599 }
2600 }
2601 }
2602
2603 return 0;
2604}
2605
2606int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2607 enum amd_powergating_state state)
2608{
2609 int i, j, r;
2610
2611 if (amdgpu_emu_mode == 1)
2612 return 0;
2613
2614 for (j = 0; j < adev->num_ip_blocks; j++) {
2615 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2616 if (!adev->ip_blocks[i].status.late_initialized)
2617 continue;
2618
2619 if (adev->in_s0ix &&
2620 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2621 continue;
2622
2623 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2624 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2625 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2626 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2627 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2628
2629 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2630 state);
2631 if (r) {
2632 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2633 adev->ip_blocks[i].version->funcs->name, r);
2634 return r;
2635 }
2636 }
2637 }
2638 return 0;
2639}
2640
2641static int amdgpu_device_enable_mgpu_fan_boost(void)
2642{
2643 struct amdgpu_gpu_instance *gpu_ins;
2644 struct amdgpu_device *adev;
2645 int i, ret = 0;
2646
2647 mutex_lock(&mgpu_info.mutex);
2648
2649
2650
2651
2652
2653
2654 if (mgpu_info.num_dgpu < 2)
2655 goto out;
2656
2657 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2658 gpu_ins = &(mgpu_info.gpu_ins[i]);
2659 adev = gpu_ins->adev;
2660 if (!(adev->flags & AMD_IS_APU) &&
2661 !gpu_ins->mgpu_fan_enabled) {
2662 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2663 if (ret)
2664 break;
2665
2666 gpu_ins->mgpu_fan_enabled = 1;
2667 }
2668 }
2669
2670out:
2671 mutex_unlock(&mgpu_info.mutex);
2672
2673 return ret;
2674}
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2689{
2690 struct amdgpu_gpu_instance *gpu_instance;
2691 int i = 0, r;
2692
2693 for (i = 0; i < adev->num_ip_blocks; i++) {
2694 if (!adev->ip_blocks[i].status.hw)
2695 continue;
2696 if (adev->ip_blocks[i].version->funcs->late_init) {
2697 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2698 if (r) {
2699 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2700 adev->ip_blocks[i].version->funcs->name, r);
2701 return r;
2702 }
2703 }
2704 adev->ip_blocks[i].status.late_initialized = true;
2705 }
2706
2707 r = amdgpu_ras_late_init(adev);
2708 if (r) {
2709 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2710 return r;
2711 }
2712
2713 amdgpu_ras_set_error_query_ready(adev, true);
2714
2715 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2716 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2717
2718 amdgpu_device_fill_reset_magic(adev);
2719
2720 r = amdgpu_device_enable_mgpu_fan_boost();
2721 if (r)
2722 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2723
2724
2725 if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2726 adev->asic_type == CHIP_ALDEBARAN ))
2727 amdgpu_dpm_handle_passthrough_sbr(adev, true);
2728
2729 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2730 mutex_lock(&mgpu_info.mutex);
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2746 for (i = 0; i < mgpu_info.num_gpu; i++) {
2747 gpu_instance = &(mgpu_info.gpu_ins[i]);
2748 if (gpu_instance->adev->flags & AMD_IS_APU)
2749 continue;
2750
2751 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2752 AMDGPU_XGMI_PSTATE_MIN);
2753 if (r) {
2754 DRM_ERROR("pstate setting failed (%d).\n", r);
2755 break;
2756 }
2757 }
2758 }
2759
2760 mutex_unlock(&mgpu_info.mutex);
2761 }
2762
2763 return 0;
2764}
2765
2766
2767
2768
2769
2770
2771
2772
2773static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2774{
2775 int i, r;
2776
2777 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2778 return;
2779
2780 for (i = 0; i < adev->num_ip_blocks; i++) {
2781 if (!adev->ip_blocks[i].status.hw)
2782 continue;
2783 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2784 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2785
2786 if (r) {
2787 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2788 adev->ip_blocks[i].version->funcs->name, r);
2789 }
2790 adev->ip_blocks[i].status.hw = false;
2791 break;
2792 }
2793 }
2794}
2795
2796static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2797{
2798 int i, r;
2799
2800 for (i = 0; i < adev->num_ip_blocks; i++) {
2801 if (!adev->ip_blocks[i].version->funcs->early_fini)
2802 continue;
2803
2804 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2805 if (r) {
2806 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2807 adev->ip_blocks[i].version->funcs->name, r);
2808 }
2809 }
2810
2811 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2812 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2813
2814 amdgpu_amdkfd_suspend(adev, false);
2815
2816
2817 amdgpu_device_smu_fini_early(adev);
2818
2819 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2820 if (!adev->ip_blocks[i].status.hw)
2821 continue;
2822
2823 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2824
2825 if (r) {
2826 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2827 adev->ip_blocks[i].version->funcs->name, r);
2828 }
2829
2830 adev->ip_blocks[i].status.hw = false;
2831 }
2832
2833 if (amdgpu_sriov_vf(adev)) {
2834 if (amdgpu_virt_release_full_gpu(adev, false))
2835 DRM_ERROR("failed to release exclusive mode on fini\n");
2836 }
2837
2838 return 0;
2839}
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2853{
2854 int i, r;
2855
2856 if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2857 amdgpu_virt_release_ras_err_handler_data(adev);
2858
2859 if (adev->gmc.xgmi.num_physical_nodes > 1)
2860 amdgpu_xgmi_remove_device(adev);
2861
2862 amdgpu_amdkfd_device_fini_sw(adev);
2863
2864 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2865 if (!adev->ip_blocks[i].status.sw)
2866 continue;
2867
2868 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2869 amdgpu_ucode_free_bo(adev);
2870 amdgpu_free_static_csa(&adev->virt.csa_obj);
2871 amdgpu_device_wb_fini(adev);
2872 amdgpu_device_vram_scratch_fini(adev);
2873 amdgpu_ib_pool_fini(adev);
2874 }
2875
2876 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2877
2878 if (r) {
2879 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2880 adev->ip_blocks[i].version->funcs->name, r);
2881 }
2882 adev->ip_blocks[i].status.sw = false;
2883 adev->ip_blocks[i].status.valid = false;
2884 }
2885
2886 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2887 if (!adev->ip_blocks[i].status.late_initialized)
2888 continue;
2889 if (adev->ip_blocks[i].version->funcs->late_fini)
2890 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2891 adev->ip_blocks[i].status.late_initialized = false;
2892 }
2893
2894 amdgpu_ras_fini(adev);
2895
2896 return 0;
2897}
2898
2899
2900
2901
2902
2903
2904static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2905{
2906 struct amdgpu_device *adev =
2907 container_of(work, struct amdgpu_device, delayed_init_work.work);
2908 int r;
2909
2910 r = amdgpu_ib_ring_tests(adev);
2911 if (r)
2912 DRM_ERROR("ib ring test failed (%d).\n", r);
2913}
2914
2915static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2916{
2917 struct amdgpu_device *adev =
2918 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2919
2920 WARN_ON_ONCE(adev->gfx.gfx_off_state);
2921 WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2922
2923 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2924 adev->gfx.gfx_off_state = true;
2925}
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2939{
2940 int i, r;
2941
2942 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2943 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2944
2945 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2946 if (!adev->ip_blocks[i].status.valid)
2947 continue;
2948
2949
2950 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2951 continue;
2952
2953
2954 r = adev->ip_blocks[i].version->funcs->suspend(adev);
2955
2956 if (r) {
2957 DRM_ERROR("suspend of IP block <%s> failed %d\n",
2958 adev->ip_blocks[i].version->funcs->name, r);
2959 return r;
2960 }
2961
2962 adev->ip_blocks[i].status.hw = false;
2963 }
2964
2965 return 0;
2966}
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
2980{
2981 int i, r;
2982
2983 if (adev->in_s0ix)
2984 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
2985
2986 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2987 if (!adev->ip_blocks[i].status.valid)
2988 continue;
2989
2990 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
2991 continue;
2992
2993 if (amdgpu_ras_intr_triggered() &&
2994 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
2995 adev->ip_blocks[i].status.hw = false;
2996 continue;
2997 }
2998
2999
3000 if (adev->gmc.xgmi.pending_reset &&
3001 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3002 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3003 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3004 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3005 adev->ip_blocks[i].status.hw = false;
3006 continue;
3007 }
3008
3009
3010
3011
3012
3013
3014 if (adev->in_s0ix &&
3015 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3016 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
3017 continue;
3018
3019
3020 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3021
3022 if (r) {
3023 DRM_ERROR("suspend of IP block <%s> failed %d\n",
3024 adev->ip_blocks[i].version->funcs->name, r);
3025 }
3026 adev->ip_blocks[i].status.hw = false;
3027
3028 if(!amdgpu_sriov_vf(adev)){
3029 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3030 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3031 if (r) {
3032 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3033 adev->mp1_state, r);
3034 return r;
3035 }
3036 }
3037 }
3038 }
3039
3040 return 0;
3041}
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3055{
3056 int r;
3057
3058 if (amdgpu_sriov_vf(adev)) {
3059 amdgpu_virt_fini_data_exchange(adev);
3060 amdgpu_virt_request_full_gpu(adev, false);
3061 }
3062
3063 r = amdgpu_device_ip_suspend_phase1(adev);
3064 if (r)
3065 return r;
3066 r = amdgpu_device_ip_suspend_phase2(adev);
3067
3068 if (amdgpu_sriov_vf(adev))
3069 amdgpu_virt_release_full_gpu(adev, false);
3070
3071 return r;
3072}
3073
3074static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3075{
3076 int i, r;
3077
3078 static enum amd_ip_block_type ip_order[] = {
3079 AMD_IP_BLOCK_TYPE_GMC,
3080 AMD_IP_BLOCK_TYPE_COMMON,
3081 AMD_IP_BLOCK_TYPE_PSP,
3082 AMD_IP_BLOCK_TYPE_IH,
3083 };
3084
3085 for (i = 0; i < adev->num_ip_blocks; i++) {
3086 int j;
3087 struct amdgpu_ip_block *block;
3088
3089 block = &adev->ip_blocks[i];
3090 block->status.hw = false;
3091
3092 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3093
3094 if (block->version->type != ip_order[j] ||
3095 !block->status.valid)
3096 continue;
3097
3098 r = block->version->funcs->hw_init(adev);
3099 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3100 if (r)
3101 return r;
3102 block->status.hw = true;
3103 }
3104 }
3105
3106 return 0;
3107}
3108
3109static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3110{
3111 int i, r;
3112
3113 static enum amd_ip_block_type ip_order[] = {
3114 AMD_IP_BLOCK_TYPE_SMC,
3115 AMD_IP_BLOCK_TYPE_DCE,
3116 AMD_IP_BLOCK_TYPE_GFX,
3117 AMD_IP_BLOCK_TYPE_SDMA,
3118 AMD_IP_BLOCK_TYPE_UVD,
3119 AMD_IP_BLOCK_TYPE_VCE,
3120 AMD_IP_BLOCK_TYPE_VCN
3121 };
3122
3123 for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3124 int j;
3125 struct amdgpu_ip_block *block;
3126
3127 for (j = 0; j < adev->num_ip_blocks; j++) {
3128 block = &adev->ip_blocks[j];
3129
3130 if (block->version->type != ip_order[i] ||
3131 !block->status.valid ||
3132 block->status.hw)
3133 continue;
3134
3135 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3136 r = block->version->funcs->resume(adev);
3137 else
3138 r = block->version->funcs->hw_init(adev);
3139
3140 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3141 if (r)
3142 return r;
3143 block->status.hw = true;
3144 }
3145 }
3146
3147 return 0;
3148}
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3163{
3164 int i, r;
3165
3166 for (i = 0; i < adev->num_ip_blocks; i++) {
3167 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3168 continue;
3169 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3170 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3171 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
3172
3173 r = adev->ip_blocks[i].version->funcs->resume(adev);
3174 if (r) {
3175 DRM_ERROR("resume of IP block <%s> failed %d\n",
3176 adev->ip_blocks[i].version->funcs->name, r);
3177 return r;
3178 }
3179 adev->ip_blocks[i].status.hw = true;
3180 }
3181 }
3182
3183 return 0;
3184}
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3200{
3201 int i, r;
3202
3203 for (i = 0; i < adev->num_ip_blocks; i++) {
3204 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3205 continue;
3206 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3207 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3208 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3209 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3210 continue;
3211 r = adev->ip_blocks[i].version->funcs->resume(adev);
3212 if (r) {
3213 DRM_ERROR("resume of IP block <%s> failed %d\n",
3214 adev->ip_blocks[i].version->funcs->name, r);
3215 return r;
3216 }
3217 adev->ip_blocks[i].status.hw = true;
3218 }
3219
3220 return 0;
3221}
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3236{
3237 int r;
3238
3239 r = amdgpu_amdkfd_resume_iommu(adev);
3240 if (r)
3241 return r;
3242
3243 r = amdgpu_device_ip_resume_phase1(adev);
3244 if (r)
3245 return r;
3246
3247 r = amdgpu_device_fw_loading(adev);
3248 if (r)
3249 return r;
3250
3251 r = amdgpu_device_ip_resume_phase2(adev);
3252
3253 return r;
3254}
3255
3256
3257
3258
3259
3260
3261
3262
3263static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3264{
3265 if (amdgpu_sriov_vf(adev)) {
3266 if (adev->is_atom_fw) {
3267 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3268 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3269 } else {
3270 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3271 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3272 }
3273
3274 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3275 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3276 }
3277}
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3288{
3289 switch (asic_type) {
3290#ifdef CONFIG_DRM_AMDGPU_SI
3291 case CHIP_HAINAN:
3292#endif
3293 case CHIP_TOPAZ:
3294
3295 return false;
3296#if defined(CONFIG_DRM_AMD_DC)
3297 case CHIP_TAHITI:
3298 case CHIP_PITCAIRN:
3299 case CHIP_VERDE:
3300 case CHIP_OLAND:
3301
3302
3303
3304
3305
3306
3307
3308#if defined(CONFIG_DRM_AMD_DC_SI)
3309 return amdgpu_dc > 0;
3310#else
3311 return false;
3312#endif
3313 case CHIP_BONAIRE:
3314 case CHIP_KAVERI:
3315 case CHIP_KABINI:
3316 case CHIP_MULLINS:
3317
3318
3319
3320
3321
3322
3323
3324 return amdgpu_dc > 0;
3325 case CHIP_HAWAII:
3326 case CHIP_CARRIZO:
3327 case CHIP_STONEY:
3328 case CHIP_POLARIS10:
3329 case CHIP_POLARIS11:
3330 case CHIP_POLARIS12:
3331 case CHIP_VEGAM:
3332 case CHIP_TONGA:
3333 case CHIP_FIJI:
3334 case CHIP_VEGA10:
3335 case CHIP_VEGA12:
3336 case CHIP_VEGA20:
3337#if defined(CONFIG_DRM_AMD_DC_DCN)
3338 case CHIP_RAVEN:
3339 case CHIP_NAVI10:
3340 case CHIP_NAVI14:
3341 case CHIP_NAVI12:
3342 case CHIP_RENOIR:
3343 case CHIP_CYAN_SKILLFISH:
3344 case CHIP_SIENNA_CICHLID:
3345 case CHIP_NAVY_FLOUNDER:
3346 case CHIP_DIMGREY_CAVEFISH:
3347 case CHIP_BEIGE_GOBY:
3348 case CHIP_VANGOGH:
3349 case CHIP_YELLOW_CARP:
3350#endif
3351 default:
3352 return amdgpu_dc != 0;
3353#else
3354 default:
3355 if (amdgpu_dc > 0)
3356 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3357 "but isn't supported by ASIC, ignoring\n");
3358 return false;
3359#endif
3360 }
3361}
3362
3363
3364
3365
3366
3367
3368
3369
3370bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3371{
3372 if (amdgpu_sriov_vf(adev) ||
3373 adev->enable_virtual_display ||
3374 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3375 return false;
3376
3377 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3378}
3379
3380static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3381{
3382 struct amdgpu_device *adev =
3383 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3384 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3385
3386
3387 if (WARN_ON(!hive))
3388 return;
3389
3390
3391
3392
3393
3394
3395
3396 if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3397
3398 task_barrier_enter(&hive->tb);
3399 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3400
3401 if (adev->asic_reset_res)
3402 goto fail;
3403
3404 task_barrier_exit(&hive->tb);
3405 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3406
3407 if (adev->asic_reset_res)
3408 goto fail;
3409
3410 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3411 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3412 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3413 } else {
3414
3415 task_barrier_full(&hive->tb);
3416 adev->asic_reset_res = amdgpu_asic_reset(adev);
3417 }
3418
3419fail:
3420 if (adev->asic_reset_res)
3421 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3422 adev->asic_reset_res, adev_to_drm(adev)->unique);
3423 amdgpu_put_xgmi_hive(hive);
3424}
3425
3426static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3427{
3428 char *input = amdgpu_lockup_timeout;
3429 char *timeout_setting = NULL;
3430 int index = 0;
3431 long timeout;
3432 int ret = 0;
3433
3434
3435
3436
3437
3438
3439
3440 adev->gfx_timeout = msecs_to_jiffies(10000);
3441 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3442 if (amdgpu_sriov_vf(adev))
3443 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3444 msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3445 else
3446 adev->compute_timeout = msecs_to_jiffies(60000);
3447
3448 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3449 while ((timeout_setting = strsep(&input, ",")) &&
3450 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3451 ret = kstrtol(timeout_setting, 0, &timeout);
3452 if (ret)
3453 return ret;
3454
3455 if (timeout == 0) {
3456 index++;
3457 continue;
3458 } else if (timeout < 0) {
3459 timeout = MAX_SCHEDULE_TIMEOUT;
3460 dev_warn(adev->dev, "lockup timeout disabled");
3461 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3462 } else {
3463 timeout = msecs_to_jiffies(timeout);
3464 }
3465
3466 switch (index++) {
3467 case 0:
3468 adev->gfx_timeout = timeout;
3469 break;
3470 case 1:
3471 adev->compute_timeout = timeout;
3472 break;
3473 case 2:
3474 adev->sdma_timeout = timeout;
3475 break;
3476 case 3:
3477 adev->video_timeout = timeout;
3478 break;
3479 default:
3480 break;
3481 }
3482 }
3483
3484
3485
3486
3487 if (index == 1) {
3488 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3489 if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3490 adev->compute_timeout = adev->gfx_timeout;
3491 }
3492 }
3493
3494 return ret;
3495}
3496
3497
3498
3499
3500
3501
3502
3503
3504static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3505{
3506 struct iommu_domain *domain;
3507
3508 domain = iommu_get_domain_for_dev(adev->dev);
3509 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3510 adev->ram_is_direct_mapped = true;
3511}
3512
3513static const struct attribute *amdgpu_dev_attributes[] = {
3514 &dev_attr_product_name.attr,
3515 &dev_attr_product_number.attr,
3516 &dev_attr_serial_number.attr,
3517 &dev_attr_pcie_replay_count.attr,
3518 NULL
3519};
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531int amdgpu_device_init(struct amdgpu_device *adev,
3532 uint32_t flags)
3533{
3534 struct drm_device *ddev = adev_to_drm(adev);
3535 struct pci_dev *pdev = adev->pdev;
3536 int r, i;
3537 bool px = false;
3538 u32 max_MBps;
3539
3540 adev->shutdown = false;
3541 adev->flags = flags;
3542
3543 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3544 adev->asic_type = amdgpu_force_asic_type;
3545 else
3546 adev->asic_type = flags & AMD_ASIC_MASK;
3547
3548 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3549 if (amdgpu_emu_mode == 1)
3550 adev->usec_timeout *= 10;
3551 adev->gmc.gart_size = 512 * 1024 * 1024;
3552 adev->accel_working = false;
3553 adev->num_rings = 0;
3554 adev->mman.buffer_funcs = NULL;
3555 adev->mman.buffer_funcs_ring = NULL;
3556 adev->vm_manager.vm_pte_funcs = NULL;
3557 adev->vm_manager.vm_pte_num_scheds = 0;
3558 adev->gmc.gmc_funcs = NULL;
3559 adev->harvest_ip_mask = 0x0;
3560 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3561 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3562
3563 adev->smc_rreg = &amdgpu_invalid_rreg;
3564 adev->smc_wreg = &amdgpu_invalid_wreg;
3565 adev->pcie_rreg = &amdgpu_invalid_rreg;
3566 adev->pcie_wreg = &amdgpu_invalid_wreg;
3567 adev->pciep_rreg = &amdgpu_invalid_rreg;
3568 adev->pciep_wreg = &amdgpu_invalid_wreg;
3569 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3570 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3571 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3572 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3573 adev->didt_rreg = &amdgpu_invalid_rreg;
3574 adev->didt_wreg = &amdgpu_invalid_wreg;
3575 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3576 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3577 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3578 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3579
3580 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3581 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3582 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3583
3584
3585
3586 mutex_init(&adev->firmware.mutex);
3587 mutex_init(&adev->pm.mutex);
3588 mutex_init(&adev->gfx.gpu_clock_mutex);
3589 mutex_init(&adev->srbm_mutex);
3590 mutex_init(&adev->gfx.pipe_reserve_mutex);
3591 mutex_init(&adev->gfx.gfx_off_mutex);
3592 mutex_init(&adev->grbm_idx_mutex);
3593 mutex_init(&adev->mn_lock);
3594 mutex_init(&adev->virt.vf_errors.lock);
3595 hash_init(adev->mn_hash);
3596 mutex_init(&adev->psp.mutex);
3597 mutex_init(&adev->notifier_lock);
3598 mutex_init(&adev->pm.stable_pstate_ctx_lock);
3599 mutex_init(&adev->benchmark_mutex);
3600
3601 amdgpu_device_init_apu_flags(adev);
3602
3603 r = amdgpu_device_check_arguments(adev);
3604 if (r)
3605 return r;
3606
3607 spin_lock_init(&adev->mmio_idx_lock);
3608 spin_lock_init(&adev->smc_idx_lock);
3609 spin_lock_init(&adev->pcie_idx_lock);
3610 spin_lock_init(&adev->uvd_ctx_idx_lock);
3611 spin_lock_init(&adev->didt_idx_lock);
3612 spin_lock_init(&adev->gc_cac_idx_lock);
3613 spin_lock_init(&adev->se_cac_idx_lock);
3614 spin_lock_init(&adev->audio_endpt_idx_lock);
3615 spin_lock_init(&adev->mm_stats.lock);
3616
3617 INIT_LIST_HEAD(&adev->shadow_list);
3618 mutex_init(&adev->shadow_list_lock);
3619
3620 INIT_LIST_HEAD(&adev->reset_list);
3621
3622 INIT_LIST_HEAD(&adev->ras_list);
3623
3624 INIT_DELAYED_WORK(&adev->delayed_init_work,
3625 amdgpu_device_delayed_init_work_handler);
3626 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3627 amdgpu_device_delay_enable_gfx_off);
3628
3629 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3630
3631 adev->gfx.gfx_off_req_count = 1;
3632 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3633
3634 atomic_set(&adev->throttling_logging_enabled, 1);
3635
3636
3637
3638
3639
3640
3641
3642 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3643 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3644
3645
3646
3647 if (adev->asic_type >= CHIP_BONAIRE) {
3648 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3649 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3650 } else {
3651 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3652 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3653 }
3654
3655 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3656 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3657
3658 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3659 if (adev->rmmio == NULL) {
3660 return -ENOMEM;
3661 }
3662 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3663 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3664
3665 amdgpu_device_get_pcie_info(adev);
3666
3667 if (amdgpu_mcbp)
3668 DRM_INFO("MCBP is enabled\n");
3669
3670 if (adev->asic_type >= CHIP_NAVI10) {
3671 if (amdgpu_mes || amdgpu_mes_kiq)
3672 adev->enable_mes = true;
3673
3674 if (amdgpu_mes_kiq)
3675 adev->enable_mes_kiq = true;
3676 }
3677
3678
3679
3680
3681
3682
3683 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3684 if (!adev->reset_domain)
3685 return -ENOMEM;
3686
3687
3688 amdgpu_detect_virtualization(adev);
3689
3690 r = amdgpu_device_get_job_timeout_settings(adev);
3691 if (r) {
3692 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3693 return r;
3694 }
3695
3696
3697 r = amdgpu_device_ip_early_init(adev);
3698 if (r)
3699 return r;
3700
3701
3702 amdgpu_gmc_tmz_set(adev);
3703
3704 amdgpu_gmc_noretry_set(adev);
3705
3706 if (adev->gmc.xgmi.supported) {
3707 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3708 if (r)
3709 return r;
3710 }
3711
3712
3713 if (amdgpu_sriov_vf(adev))
3714 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3715 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3716 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3717 else
3718 adev->have_atomics_support =
3719 !pci_enable_atomic_ops_to_root(adev->pdev,
3720 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3721 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3722 if (!adev->have_atomics_support)
3723 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3724
3725
3726 amdgpu_device_doorbell_init(adev);
3727
3728 if (amdgpu_emu_mode == 1) {
3729
3730 emu_soc_asic_init(adev);
3731 goto fence_driver_init;
3732 }
3733
3734 amdgpu_reset_init(adev);
3735
3736
3737 amdgpu_device_detect_sriov_bios(adev);
3738
3739
3740
3741
3742 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3743 if (adev->gmc.xgmi.num_physical_nodes) {
3744 dev_info(adev->dev, "Pending hive reset.\n");
3745 adev->gmc.xgmi.pending_reset = true;
3746
3747 for (i = 0; i < adev->num_ip_blocks; i++) {
3748 if (!adev->ip_blocks[i].status.valid)
3749 continue;
3750 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3751 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3752 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3753 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3754 DRM_DEBUG("IP %s disabled for hw_init.\n",
3755 adev->ip_blocks[i].version->funcs->name);
3756 adev->ip_blocks[i].status.hw = true;
3757 }
3758 }
3759 } else {
3760 r = amdgpu_asic_reset(adev);
3761 if (r) {
3762 dev_err(adev->dev, "asic reset on init failed\n");
3763 goto failed;
3764 }
3765 }
3766 }
3767
3768 pci_enable_pcie_error_reporting(adev->pdev);
3769
3770
3771 if (amdgpu_device_need_post(adev)) {
3772 if (!adev->bios) {
3773 dev_err(adev->dev, "no vBIOS found\n");
3774 r = -EINVAL;
3775 goto failed;
3776 }
3777 DRM_INFO("GPU posting now...\n");
3778 r = amdgpu_device_asic_init(adev);
3779 if (r) {
3780 dev_err(adev->dev, "gpu post error!\n");
3781 goto failed;
3782 }
3783 }
3784
3785 if (adev->is_atom_fw) {
3786
3787 r = amdgpu_atomfirmware_get_clock_info(adev);
3788 if (r) {
3789 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3790 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3791 goto failed;
3792 }
3793 } else {
3794
3795 r = amdgpu_atombios_get_clock_info(adev);
3796 if (r) {
3797 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3798 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3799 goto failed;
3800 }
3801
3802 if (!amdgpu_device_has_dc_support(adev))
3803 amdgpu_atombios_i2c_init(adev);
3804 }
3805
3806fence_driver_init:
3807
3808 r = amdgpu_fence_driver_sw_init(adev);
3809 if (r) {
3810 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3811 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3812 goto failed;
3813 }
3814
3815
3816 drm_mode_config_init(adev_to_drm(adev));
3817
3818 r = amdgpu_device_ip_init(adev);
3819 if (r) {
3820
3821 if (amdgpu_sriov_vf(adev) &&
3822 !amdgpu_sriov_runtime(adev) &&
3823 amdgpu_virt_mmio_blocked(adev) &&
3824 !amdgpu_virt_wait_reset(adev)) {
3825 dev_err(adev->dev, "VF exclusive mode timeout\n");
3826
3827 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3828 adev->virt.ops = NULL;
3829 r = -EAGAIN;
3830 goto release_ras_con;
3831 }
3832 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3833 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3834 goto release_ras_con;
3835 }
3836
3837 amdgpu_fence_driver_hw_init(adev);
3838
3839 dev_info(adev->dev,
3840 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3841 adev->gfx.config.max_shader_engines,
3842 adev->gfx.config.max_sh_per_se,
3843 adev->gfx.config.max_cu_per_sh,
3844 adev->gfx.cu_info.number);
3845
3846 adev->accel_working = true;
3847
3848 amdgpu_vm_check_compute_bug(adev);
3849
3850
3851 if (amdgpu_moverate >= 0)
3852 max_MBps = amdgpu_moverate;
3853 else
3854 max_MBps = 8;
3855
3856 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3857
3858 r = amdgpu_pm_sysfs_init(adev);
3859 if (r) {
3860 adev->pm_sysfs_en = false;
3861 DRM_ERROR("registering pm debugfs failed (%d).\n", r);
3862 } else
3863 adev->pm_sysfs_en = true;
3864
3865 r = amdgpu_ucode_sysfs_init(adev);
3866 if (r) {
3867 adev->ucode_sysfs_en = false;
3868 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3869 } else
3870 adev->ucode_sysfs_en = true;
3871
3872 r = amdgpu_psp_sysfs_init(adev);
3873 if (r) {
3874 adev->psp_sysfs_en = false;
3875 if (!amdgpu_sriov_vf(adev))
3876 DRM_ERROR("Creating psp sysfs failed\n");
3877 } else
3878 adev->psp_sysfs_en = true;
3879
3880
3881
3882
3883
3884
3885 amdgpu_register_gpu_instance(adev);
3886
3887
3888
3889
3890 if (!adev->gmc.xgmi.pending_reset) {
3891 r = amdgpu_device_ip_late_init(adev);
3892 if (r) {
3893 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3894 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3895 goto release_ras_con;
3896 }
3897
3898 amdgpu_ras_resume(adev);
3899 queue_delayed_work(system_wq, &adev->delayed_init_work,
3900 msecs_to_jiffies(AMDGPU_RESUME_MS));
3901 }
3902
3903 if (amdgpu_sriov_vf(adev))
3904 flush_delayed_work(&adev->delayed_init_work);
3905
3906 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3907 if (r)
3908 dev_err(adev->dev, "Could not create amdgpu device attr\n");
3909
3910 if (IS_ENABLED(CONFIG_PERF_EVENTS))
3911 r = amdgpu_pmu_init(adev);
3912 if (r)
3913 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3914
3915
3916 if (amdgpu_device_cache_pci_state(adev->pdev))
3917 pci_restore_state(pdev);
3918
3919
3920
3921
3922 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3923 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3924
3925 if (amdgpu_device_supports_px(ddev)) {
3926 px = true;
3927 vga_switcheroo_register_client(adev->pdev,
3928 &amdgpu_switcheroo_ops, px);
3929 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3930 }
3931
3932 if (adev->gmc.xgmi.pending_reset)
3933 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3934 msecs_to_jiffies(AMDGPU_RESUME_MS));
3935
3936 amdgpu_device_check_iommu_direct_map(adev);
3937
3938 return 0;
3939
3940release_ras_con:
3941 amdgpu_release_ras_context(adev);
3942
3943failed:
3944 amdgpu_vf_error_trans_all(adev);
3945
3946 return r;
3947}
3948
3949static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
3950{
3951
3952
3953 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
3954
3955
3956 amdgpu_device_doorbell_fini(adev);
3957
3958 iounmap(adev->rmmio);
3959 adev->rmmio = NULL;
3960 if (adev->mman.aper_base_kaddr)
3961 iounmap(adev->mman.aper_base_kaddr);
3962 adev->mman.aper_base_kaddr = NULL;
3963
3964
3965 if (!adev->gmc.xgmi.connected_to_cpu) {
3966 arch_phys_wc_del(adev->gmc.vram_mtrr);
3967 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
3968 }
3969}
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979void amdgpu_device_fini_hw(struct amdgpu_device *adev)
3980{
3981 dev_info(adev->dev, "amdgpu: finishing device.\n");
3982 flush_delayed_work(&adev->delayed_init_work);
3983 adev->shutdown = true;
3984
3985
3986
3987
3988 if (amdgpu_sriov_vf(adev)) {
3989 amdgpu_virt_request_full_gpu(adev, false);
3990 amdgpu_virt_fini_data_exchange(adev);
3991 }
3992
3993
3994 amdgpu_irq_disable_all(adev);
3995 if (adev->mode_info.mode_config_initialized){
3996 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
3997 drm_helper_force_disable_all(adev_to_drm(adev));
3998 else
3999 drm_atomic_helper_shutdown(adev_to_drm(adev));
4000 }
4001 amdgpu_fence_driver_hw_fini(adev);
4002
4003 if (adev->mman.initialized) {
4004 flush_delayed_work(&adev->mman.bdev.wq);
4005 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
4006 }
4007
4008 if (adev->pm_sysfs_en)
4009 amdgpu_pm_sysfs_fini(adev);
4010 if (adev->ucode_sysfs_en)
4011 amdgpu_ucode_sysfs_fini(adev);
4012 if (adev->psp_sysfs_en)
4013 amdgpu_psp_sysfs_fini(adev);
4014 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4015
4016
4017 amdgpu_ras_pre_fini(adev);
4018
4019 amdgpu_device_ip_fini_early(adev);
4020
4021 amdgpu_irq_fini_hw(adev);
4022
4023 if (adev->mman.initialized)
4024 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4025
4026 amdgpu_gart_dummy_page_fini(adev);
4027
4028 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4029 amdgpu_device_unmap_mmio(adev);
4030
4031}
4032
4033void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4034{
4035 int idx;
4036
4037 amdgpu_fence_driver_sw_fini(adev);
4038 amdgpu_device_ip_fini(adev);
4039 release_firmware(adev->firmware.gpu_info_fw);
4040 adev->firmware.gpu_info_fw = NULL;
4041 adev->accel_working = false;
4042
4043 amdgpu_reset_fini(adev);
4044
4045
4046 if (!amdgpu_device_has_dc_support(adev))
4047 amdgpu_i2c_fini(adev);
4048
4049 if (amdgpu_emu_mode != 1)
4050 amdgpu_atombios_fini(adev);
4051
4052 kfree(adev->bios);
4053 adev->bios = NULL;
4054 if (amdgpu_device_supports_px(adev_to_drm(adev))) {
4055 vga_switcheroo_unregister_client(adev->pdev);
4056 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4057 }
4058 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4059 vga_client_unregister(adev->pdev);
4060
4061 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4062
4063 iounmap(adev->rmmio);
4064 adev->rmmio = NULL;
4065 amdgpu_device_doorbell_fini(adev);
4066 drm_dev_exit(idx);
4067 }
4068
4069 if (IS_ENABLED(CONFIG_PERF_EVENTS))
4070 amdgpu_pmu_fini(adev);
4071 if (adev->mman.discovery_bin)
4072 amdgpu_discovery_fini(adev);
4073
4074 amdgpu_reset_put_reset_domain(adev->reset_domain);
4075 adev->reset_domain = NULL;
4076
4077 kfree(adev->pci_state);
4078
4079}
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
4091{
4092
4093 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4094 return;
4095
4096 if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
4097 DRM_WARN("evicting device resources failed\n");
4098
4099}
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4115{
4116 struct amdgpu_device *adev = drm_to_adev(dev);
4117
4118 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4119 return 0;
4120
4121 adev->in_suspend = true;
4122
4123 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4124 DRM_WARN("smart shift update failed\n");
4125
4126 drm_kms_helper_poll_disable(dev);
4127
4128 if (fbcon)
4129 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4130
4131 cancel_delayed_work_sync(&adev->delayed_init_work);
4132
4133 amdgpu_ras_suspend(adev);
4134
4135 amdgpu_device_ip_suspend_phase1(adev);
4136
4137 if (!adev->in_s0ix)
4138 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4139
4140 amdgpu_device_evict_resources(adev);
4141
4142 amdgpu_fence_driver_hw_fini(adev);
4143
4144 amdgpu_device_ip_suspend_phase2(adev);
4145
4146 return 0;
4147}
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4160{
4161 struct amdgpu_device *adev = drm_to_adev(dev);
4162 int r = 0;
4163
4164 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4165 return 0;
4166
4167 if (adev->in_s0ix)
4168 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4169
4170
4171 if (amdgpu_device_need_post(adev)) {
4172 r = amdgpu_device_asic_init(adev);
4173 if (r)
4174 dev_err(adev->dev, "amdgpu asic init failed\n");
4175 }
4176
4177 r = amdgpu_device_ip_resume(adev);
4178 if (r) {
4179 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4180 return r;
4181 }
4182 amdgpu_fence_driver_hw_init(adev);
4183
4184 r = amdgpu_device_ip_late_init(adev);
4185 if (r)
4186 return r;
4187
4188 queue_delayed_work(system_wq, &adev->delayed_init_work,
4189 msecs_to_jiffies(AMDGPU_RESUME_MS));
4190
4191 if (!adev->in_s0ix) {
4192 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4193 if (r)
4194 return r;
4195 }
4196
4197
4198 flush_delayed_work(&adev->delayed_init_work);
4199
4200 if (fbcon)
4201 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4202
4203 drm_kms_helper_poll_enable(dev);
4204
4205 amdgpu_ras_resume(adev);
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216#ifdef CONFIG_PM
4217 dev->dev->power.disable_depth++;
4218#endif
4219 if (!amdgpu_device_has_dc_support(adev))
4220 drm_helper_hpd_irq_event(dev);
4221 else
4222 drm_kms_helper_hotplug_event(dev);
4223#ifdef CONFIG_PM
4224 dev->dev->power.disable_depth--;
4225#endif
4226 adev->in_suspend = false;
4227
4228 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4229 DRM_WARN("smart shift update failed\n");
4230
4231 return 0;
4232}
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4245{
4246 int i;
4247 bool asic_hang = false;
4248
4249 if (amdgpu_sriov_vf(adev))
4250 return true;
4251
4252 if (amdgpu_asic_need_full_reset(adev))
4253 return true;
4254
4255 for (i = 0; i < adev->num_ip_blocks; i++) {
4256 if (!adev->ip_blocks[i].status.valid)
4257 continue;
4258 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4259 adev->ip_blocks[i].status.hang =
4260 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4261 if (adev->ip_blocks[i].status.hang) {
4262 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4263 asic_hang = true;
4264 }
4265 }
4266 return asic_hang;
4267}
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4281{
4282 int i, r = 0;
4283
4284 for (i = 0; i < adev->num_ip_blocks; i++) {
4285 if (!adev->ip_blocks[i].status.valid)
4286 continue;
4287 if (adev->ip_blocks[i].status.hang &&
4288 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4289 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4290 if (r)
4291 return r;
4292 }
4293 }
4294
4295 return 0;
4296}
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4308{
4309 int i;
4310
4311 if (amdgpu_asic_need_full_reset(adev))
4312 return true;
4313
4314 for (i = 0; i < adev->num_ip_blocks; i++) {
4315 if (!adev->ip_blocks[i].status.valid)
4316 continue;
4317 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4318 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4319 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4320 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4321 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4322 if (adev->ip_blocks[i].status.hang) {
4323 dev_info(adev->dev, "Some block need full reset!\n");
4324 return true;
4325 }
4326 }
4327 }
4328 return false;
4329}
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4343{
4344 int i, r = 0;
4345
4346 for (i = 0; i < adev->num_ip_blocks; i++) {
4347 if (!adev->ip_blocks[i].status.valid)
4348 continue;
4349 if (adev->ip_blocks[i].status.hang &&
4350 adev->ip_blocks[i].version->funcs->soft_reset) {
4351 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4352 if (r)
4353 return r;
4354 }
4355 }
4356
4357 return 0;
4358}
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4372{
4373 int i, r = 0;
4374
4375 for (i = 0; i < adev->num_ip_blocks; i++) {
4376 if (!adev->ip_blocks[i].status.valid)
4377 continue;
4378 if (adev->ip_blocks[i].status.hang &&
4379 adev->ip_blocks[i].version->funcs->post_soft_reset)
4380 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4381 if (r)
4382 return r;
4383 }
4384
4385 return 0;
4386}
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4401{
4402 struct dma_fence *fence = NULL, *next = NULL;
4403 struct amdgpu_bo *shadow;
4404 struct amdgpu_bo_vm *vmbo;
4405 long r = 1, tmo;
4406
4407 if (amdgpu_sriov_runtime(adev))
4408 tmo = msecs_to_jiffies(8000);
4409 else
4410 tmo = msecs_to_jiffies(100);
4411
4412 dev_info(adev->dev, "recover vram bo from shadow start\n");
4413 mutex_lock(&adev->shadow_list_lock);
4414 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4415 shadow = &vmbo->bo;
4416
4417 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4418 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4419 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4420 continue;
4421
4422 r = amdgpu_bo_restore_shadow(shadow, &next);
4423 if (r)
4424 break;
4425
4426 if (fence) {
4427 tmo = dma_fence_wait_timeout(fence, false, tmo);
4428 dma_fence_put(fence);
4429 fence = next;
4430 if (tmo == 0) {
4431 r = -ETIMEDOUT;
4432 break;
4433 } else if (tmo < 0) {
4434 r = tmo;
4435 break;
4436 }
4437 } else {
4438 fence = next;
4439 }
4440 }
4441 mutex_unlock(&adev->shadow_list_lock);
4442
4443 if (fence)
4444 tmo = dma_fence_wait_timeout(fence, false, tmo);
4445 dma_fence_put(fence);
4446
4447 if (r < 0 || tmo <= 0) {
4448 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4449 return -EIO;
4450 }
4451
4452 dev_info(adev->dev, "recover vram bo from shadow done\n");
4453 return 0;
4454}
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4467 bool from_hypervisor)
4468{
4469 int r;
4470 struct amdgpu_hive_info *hive = NULL;
4471 int retry_limit = 0;
4472
4473retry:
4474 amdgpu_amdkfd_pre_reset(adev);
4475
4476 amdgpu_amdkfd_pre_reset(adev);
4477
4478 if (from_hypervisor)
4479 r = amdgpu_virt_request_full_gpu(adev, true);
4480 else
4481 r = amdgpu_virt_reset_gpu(adev);
4482 if (r)
4483 return r;
4484
4485
4486 r = amdgpu_device_ip_reinit_early_sriov(adev);
4487 if (r)
4488 goto error;
4489
4490 amdgpu_virt_init_data_exchange(adev);
4491
4492 r = amdgpu_device_fw_loading(adev);
4493 if (r)
4494 return r;
4495
4496
4497 r = amdgpu_device_ip_reinit_late_sriov(adev);
4498 if (r)
4499 goto error;
4500
4501 hive = amdgpu_get_xgmi_hive(adev);
4502
4503 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4504 r = amdgpu_xgmi_update_topology(hive, adev);
4505
4506 if (hive)
4507 amdgpu_put_xgmi_hive(hive);
4508
4509 if (!r) {
4510 amdgpu_irq_gpu_reset_resume_helper(adev);
4511 r = amdgpu_ib_ring_tests(adev);
4512
4513 amdgpu_amdkfd_post_reset(adev);
4514 }
4515
4516error:
4517 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4518 amdgpu_inc_vram_lost(adev);
4519 r = amdgpu_device_recover_vram(adev);
4520 }
4521 amdgpu_virt_release_full_gpu(adev, true);
4522
4523 if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4524 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4525 retry_limit++;
4526 goto retry;
4527 } else
4528 DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4529 }
4530
4531 return r;
4532}
4533
4534
4535
4536
4537
4538
4539
4540
4541bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4542{
4543 int i;
4544 struct drm_sched_job *job;
4545
4546 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4547 struct amdgpu_ring *ring = adev->rings[i];
4548
4549 if (!ring || !ring->sched.thread)
4550 continue;
4551
4552 spin_lock(&ring->sched.job_list_lock);
4553 job = list_first_entry_or_null(&ring->sched.pending_list,
4554 struct drm_sched_job, list);
4555 spin_unlock(&ring->sched.job_list_lock);
4556 if (job)
4557 return true;
4558 }
4559 return false;
4560}
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4571{
4572 if (!amdgpu_device_ip_check_soft_reset(adev)) {
4573 dev_info(adev->dev, "Timeout, but no hardware hang detected.\n");
4574 return false;
4575 }
4576
4577 if (amdgpu_gpu_recovery == 0)
4578 goto disabled;
4579
4580 if (amdgpu_sriov_vf(adev))
4581 return true;
4582
4583 if (amdgpu_gpu_recovery == -1) {
4584 switch (adev->asic_type) {
4585#ifdef CONFIG_DRM_AMDGPU_SI
4586 case CHIP_VERDE:
4587 case CHIP_TAHITI:
4588 case CHIP_PITCAIRN:
4589 case CHIP_OLAND:
4590 case CHIP_HAINAN:
4591#endif
4592#ifdef CONFIG_DRM_AMDGPU_CIK
4593 case CHIP_KAVERI:
4594 case CHIP_KABINI:
4595 case CHIP_MULLINS:
4596#endif
4597 case CHIP_CARRIZO:
4598 case CHIP_STONEY:
4599 case CHIP_CYAN_SKILLFISH:
4600 goto disabled;
4601 default:
4602 break;
4603 }
4604 }
4605
4606 return true;
4607
4608disabled:
4609 dev_info(adev->dev, "GPU recovery disabled.\n");
4610 return false;
4611}
4612
4613int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4614{
4615 u32 i;
4616 int ret = 0;
4617
4618 amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4619
4620 dev_info(adev->dev, "GPU mode1 reset\n");
4621
4622
4623 pci_clear_master(adev->pdev);
4624
4625 amdgpu_device_cache_pci_state(adev->pdev);
4626
4627 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4628 dev_info(adev->dev, "GPU smu mode1 reset\n");
4629 ret = amdgpu_dpm_mode1_reset(adev);
4630 } else {
4631 dev_info(adev->dev, "GPU psp mode1 reset\n");
4632 ret = psp_gpu_reset(adev);
4633 }
4634
4635 if (ret)
4636 dev_err(adev->dev, "GPU mode1 reset failed\n");
4637
4638 amdgpu_device_load_pci_state(adev->pdev);
4639
4640
4641 for (i = 0; i < adev->usec_timeout; i++) {
4642 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4643
4644 if (memsize != 0xffffffff)
4645 break;
4646 udelay(1);
4647 }
4648
4649 amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4650 return ret;
4651}
4652
4653int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4654 struct amdgpu_reset_context *reset_context)
4655{
4656 int i, r = 0;
4657 struct amdgpu_job *job = NULL;
4658 bool need_full_reset =
4659 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4660
4661 if (reset_context->reset_req_dev == adev)
4662 job = reset_context->job;
4663
4664 if (amdgpu_sriov_vf(adev)) {
4665
4666 amdgpu_virt_fini_data_exchange(adev);
4667 }
4668
4669
4670 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4671 struct amdgpu_ring *ring = adev->rings[i];
4672
4673 if (!ring || !ring->sched.thread)
4674 continue;
4675
4676
4677
4678 amdgpu_fence_driver_clear_job_fences(ring);
4679
4680
4681 amdgpu_fence_driver_force_completion(ring);
4682 }
4683
4684 if (job && job->vm)
4685 drm_sched_increase_karma(&job->base);
4686
4687 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4688
4689 if (r == -ENOSYS)
4690 r = 0;
4691 else
4692 return r;
4693
4694
4695 if (!amdgpu_sriov_vf(adev)) {
4696
4697 if (!need_full_reset)
4698 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4699
4700 if (!need_full_reset) {
4701 amdgpu_device_ip_pre_soft_reset(adev);
4702 r = amdgpu_device_ip_soft_reset(adev);
4703 amdgpu_device_ip_post_soft_reset(adev);
4704 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4705 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4706 need_full_reset = true;
4707 }
4708 }
4709
4710 if (need_full_reset)
4711 r = amdgpu_device_ip_suspend(adev);
4712 if (need_full_reset)
4713 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4714 else
4715 clear_bit(AMDGPU_NEED_FULL_RESET,
4716 &reset_context->flags);
4717 }
4718
4719 return r;
4720}
4721
4722static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4723{
4724 uint32_t reg_value;
4725 int i;
4726
4727 lockdep_assert_held(&adev->reset_domain->sem);
4728 dump_stack();
4729
4730 for (i = 0; i < adev->num_regs; i++) {
4731 reg_value = RREG32(adev->reset_dump_reg_list[i]);
4732 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
4733 }
4734
4735 return 0;
4736}
4737
4738int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4739 struct amdgpu_reset_context *reset_context)
4740{
4741 struct amdgpu_device *tmp_adev = NULL;
4742 bool need_full_reset, skip_hw_reset, vram_lost = false;
4743 int r = 0;
4744
4745
4746 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4747 reset_list);
4748 amdgpu_reset_reg_dumps(tmp_adev);
4749 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4750
4751 if (r == -ENOSYS)
4752 r = 0;
4753 else
4754 return r;
4755
4756
4757 need_full_reset =
4758 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4759 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4760
4761
4762
4763
4764
4765 if (!skip_hw_reset && need_full_reset) {
4766 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4767
4768 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4769 tmp_adev->gmc.xgmi.pending_reset = false;
4770 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4771 r = -EALREADY;
4772 } else
4773 r = amdgpu_asic_reset(tmp_adev);
4774
4775 if (r) {
4776 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4777 r, adev_to_drm(tmp_adev)->unique);
4778 break;
4779 }
4780 }
4781
4782
4783 if (!r) {
4784 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4785 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4786 flush_work(&tmp_adev->xgmi_reset_work);
4787 r = tmp_adev->asic_reset_res;
4788 if (r)
4789 break;
4790 }
4791 }
4792 }
4793 }
4794
4795 if (!r && amdgpu_ras_intr_triggered()) {
4796 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4797 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4798 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4799 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4800 }
4801
4802 amdgpu_ras_intr_cleared();
4803 }
4804
4805 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4806 if (need_full_reset) {
4807
4808 r = amdgpu_device_asic_init(tmp_adev);
4809 if (r) {
4810 dev_warn(tmp_adev->dev, "asic atom init failed!");
4811 } else {
4812 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4813 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4814 if (r)
4815 goto out;
4816
4817 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4818 if (r)
4819 goto out;
4820
4821 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4822 if (vram_lost) {
4823 DRM_INFO("VRAM is lost due to GPU reset!\n");
4824 amdgpu_inc_vram_lost(tmp_adev);
4825 }
4826
4827 r = amdgpu_device_fw_loading(tmp_adev);
4828 if (r)
4829 return r;
4830
4831 r = amdgpu_device_ip_resume_phase2(tmp_adev);
4832 if (r)
4833 goto out;
4834
4835 if (vram_lost)
4836 amdgpu_device_fill_reset_magic(tmp_adev);
4837
4838
4839
4840
4841
4842 amdgpu_register_gpu_instance(tmp_adev);
4843
4844 if (!reset_context->hive &&
4845 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4846 amdgpu_xgmi_add_device(tmp_adev);
4847
4848 r = amdgpu_device_ip_late_init(tmp_adev);
4849 if (r)
4850 goto out;
4851
4852 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
4865
4866 amdgpu_ras_resume(tmp_adev);
4867 } else {
4868 r = -EINVAL;
4869 goto out;
4870 }
4871
4872
4873 if (reset_context->hive &&
4874 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
4875 r = amdgpu_xgmi_update_topology(
4876 reset_context->hive, tmp_adev);
4877 }
4878 }
4879
4880out:
4881 if (!r) {
4882 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
4883 r = amdgpu_ib_ring_tests(tmp_adev);
4884 if (r) {
4885 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
4886 need_full_reset = true;
4887 r = -EAGAIN;
4888 goto end;
4889 }
4890 }
4891
4892 if (!r)
4893 r = amdgpu_device_recover_vram(tmp_adev);
4894 else
4895 tmp_adev->asic_reset_res = r;
4896 }
4897
4898end:
4899 if (need_full_reset)
4900 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4901 else
4902 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4903 return r;
4904}
4905
4906static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
4907{
4908
4909 switch (amdgpu_asic_reset_method(adev)) {
4910 case AMD_RESET_METHOD_MODE1:
4911 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
4912 break;
4913 case AMD_RESET_METHOD_MODE2:
4914 adev->mp1_state = PP_MP1_STATE_RESET;
4915 break;
4916 default:
4917 adev->mp1_state = PP_MP1_STATE_NONE;
4918 break;
4919 }
4920}
4921
4922static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
4923{
4924 amdgpu_vf_error_trans_all(adev);
4925 adev->mp1_state = PP_MP1_STATE_NONE;
4926}
4927
4928static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
4929{
4930 struct pci_dev *p = NULL;
4931
4932 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4933 adev->pdev->bus->number, 1);
4934 if (p) {
4935 pm_runtime_enable(&(p->dev));
4936 pm_runtime_resume(&(p->dev));
4937 }
4938}
4939
4940static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
4941{
4942 enum amd_reset_method reset_method;
4943 struct pci_dev *p = NULL;
4944 u64 expires;
4945
4946
4947
4948
4949
4950 reset_method = amdgpu_asic_reset_method(adev);
4951 if ((reset_method != AMD_RESET_METHOD_BACO) &&
4952 (reset_method != AMD_RESET_METHOD_MODE1))
4953 return -EINVAL;
4954
4955 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
4956 adev->pdev->bus->number, 1);
4957 if (!p)
4958 return -ENODEV;
4959
4960 expires = pm_runtime_autosuspend_expiration(&(p->dev));
4961 if (!expires)
4962
4963
4964
4965
4966
4967
4968 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
4969
4970 while (!pm_runtime_status_suspended(&(p->dev))) {
4971 if (!pm_runtime_suspend(&(p->dev)))
4972 break;
4973
4974 if (expires < ktime_get_mono_fast_ns()) {
4975 dev_warn(adev->dev, "failed to suspend display audio\n");
4976
4977 return -ETIMEDOUT;
4978 }
4979 }
4980
4981 pm_runtime_disable(&(p->dev));
4982
4983 return 0;
4984}
4985
4986static void amdgpu_device_recheck_guilty_jobs(
4987 struct amdgpu_device *adev, struct list_head *device_list_handle,
4988 struct amdgpu_reset_context *reset_context)
4989{
4990 int i, r = 0;
4991
4992 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4993 struct amdgpu_ring *ring = adev->rings[i];
4994 int ret = 0;
4995 struct drm_sched_job *s_job;
4996
4997 if (!ring || !ring->sched.thread)
4998 continue;
4999
5000 s_job = list_first_entry_or_null(&ring->sched.pending_list,
5001 struct drm_sched_job, list);
5002 if (s_job == NULL)
5003 continue;
5004
5005
5006 drm_sched_reset_karma(s_job);
5007
5008
5009 dma_fence_get(s_job->s_fence->parent);
5010 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
5011
5012 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
5013 if (ret == 0) {
5014 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
5015 ring->sched.name, s_job->id);
5016
5017
5018 drm_sched_increase_karma(s_job);
5019retry:
5020
5021 if (amdgpu_sriov_vf(adev)) {
5022 amdgpu_virt_fini_data_exchange(adev);
5023 r = amdgpu_device_reset_sriov(adev, false);
5024 if (r)
5025 adev->asic_reset_res = r;
5026 } else {
5027 clear_bit(AMDGPU_SKIP_HW_RESET,
5028 &reset_context->flags);
5029 r = amdgpu_do_asic_reset(device_list_handle,
5030 reset_context);
5031 if (r && r == -EAGAIN)
5032 goto retry;
5033 }
5034
5035
5036
5037
5038
5039 atomic_inc(&adev->gpu_reset_counter);
5040 continue;
5041 }
5042
5043
5044 atomic_dec(ring->sched.score);
5045 dma_fence_put(s_job->s_fence->parent);
5046 dma_fence_get(&s_job->s_fence->finished);
5047 dma_fence_signal(&s_job->s_fence->finished);
5048 dma_fence_put(&s_job->s_fence->finished);
5049
5050
5051 spin_lock(&ring->sched.job_list_lock);
5052 list_del_init(&s_job->list);
5053 spin_unlock(&ring->sched.job_list_lock);
5054 ring->sched.ops->free_job(s_job);
5055 }
5056}
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
5070 struct amdgpu_job *job)
5071{
5072 struct list_head device_list, *device_list_handle = NULL;
5073 bool job_signaled = false;
5074 struct amdgpu_hive_info *hive = NULL;
5075 struct amdgpu_device *tmp_adev = NULL;
5076 int i, r = 0;
5077 bool need_emergency_restart = false;
5078 bool audio_suspended = false;
5079 int tmp_vram_lost_counter;
5080 struct amdgpu_reset_context reset_context;
5081
5082 memset(&reset_context, 0, sizeof(reset_context));
5083
5084
5085
5086
5087 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5088
5089
5090
5091
5092
5093 if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5094 DRM_WARN("Emergency reboot.");
5095
5096 ksys_sync_helper();
5097 emergency_restart();
5098 }
5099
5100 dev_info(adev->dev, "GPU %s begin!\n",
5101 need_emergency_restart ? "jobs stop":"reset");
5102
5103 if (!amdgpu_sriov_vf(adev))
5104 hive = amdgpu_get_xgmi_hive(adev);
5105 if (hive)
5106 mutex_lock(&hive->hive_lock);
5107
5108 reset_context.method = AMD_RESET_METHOD_NONE;
5109 reset_context.reset_req_dev = adev;
5110 reset_context.job = job;
5111 reset_context.hive = hive;
5112 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5113
5114
5115
5116
5117
5118
5119 INIT_LIST_HEAD(&device_list);
5120 if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5121 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
5122 list_add_tail(&tmp_adev->reset_list, &device_list);
5123 if (!list_is_first(&adev->reset_list, &device_list))
5124 list_rotate_to_front(&adev->reset_list, &device_list);
5125 device_list_handle = &device_list;
5126 } else {
5127 list_add_tail(&adev->reset_list, &device_list);
5128 device_list_handle = &device_list;
5129 }
5130
5131
5132 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5133 reset_list);
5134 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5135
5136
5137 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5138
5139 amdgpu_device_set_mp1_state(tmp_adev);
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5152 audio_suspended = true;
5153
5154 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5155
5156 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5157
5158 if (!amdgpu_sriov_vf(tmp_adev))
5159 amdgpu_amdkfd_pre_reset(tmp_adev);
5160
5161
5162
5163
5164
5165 amdgpu_unregister_gpu_instance(tmp_adev);
5166
5167 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5168
5169
5170 if (!need_emergency_restart &&
5171 amdgpu_device_ip_need_full_reset(tmp_adev))
5172 amdgpu_ras_suspend(tmp_adev);
5173
5174 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5175 struct amdgpu_ring *ring = tmp_adev->rings[i];
5176
5177 if (!ring || !ring->sched.thread)
5178 continue;
5179
5180 drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5181
5182 if (need_emergency_restart)
5183 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5184 }
5185 atomic_inc(&tmp_adev->gpu_reset_counter);
5186 }
5187
5188 if (need_emergency_restart)
5189 goto skip_sched_resume;
5190
5191
5192
5193
5194
5195
5196
5197 if (job && job->base.s_fence->parent &&
5198 dma_fence_is_signaled(job->base.s_fence->parent)) {
5199 job_signaled = true;
5200 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5201 goto skip_hw_reset;
5202 }
5203
5204retry:
5205 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5206 r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
5207
5208 if (r) {
5209 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5210 r, adev_to_drm(tmp_adev)->unique);
5211 tmp_adev->asic_reset_res = r;
5212 }
5213 }
5214
5215 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
5216
5217
5218 if (amdgpu_sriov_vf(adev)) {
5219 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5220 if (r)
5221 adev->asic_reset_res = r;
5222
5223
5224 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
5225 amdgpu_ras_resume(adev);
5226 } else {
5227 r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
5228 if (r && r == -EAGAIN)
5229 goto retry;
5230 }
5231
5232skip_hw_reset:
5233
5234
5235 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5236
5237
5238
5239
5240
5241
5242
5243
5244 if (amdgpu_gpu_recovery == 2 &&
5245 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
5246 amdgpu_device_recheck_guilty_jobs(
5247 tmp_adev, device_list_handle, &reset_context);
5248
5249 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5250 struct amdgpu_ring *ring = tmp_adev->rings[i];
5251
5252 if (!ring || !ring->sched.thread)
5253 continue;
5254
5255
5256 if (!tmp_adev->asic_reset_res && !job_signaled)
5257 drm_sched_resubmit_jobs(&ring->sched);
5258
5259 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
5260 }
5261
5262 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5263 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5264 }
5265
5266 if (tmp_adev->asic_reset_res)
5267 r = tmp_adev->asic_reset_res;
5268
5269 tmp_adev->asic_reset_res = 0;
5270
5271 if (r) {
5272
5273 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5274 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5275 } else {
5276 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5277 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5278 DRM_WARN("smart shift update failed\n");
5279 }
5280 }
5281
5282skip_sched_resume:
5283 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5284
5285 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5286 amdgpu_amdkfd_post_reset(tmp_adev);
5287
5288
5289
5290
5291 if (!adev->kfd.init_complete)
5292 amdgpu_amdkfd_device_init(adev);
5293
5294 if (audio_suspended)
5295 amdgpu_device_resume_display_audio(tmp_adev);
5296
5297 amdgpu_device_unset_mp1_state(tmp_adev);
5298 }
5299
5300 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5301 reset_list);
5302 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5303
5304 if (hive) {
5305 mutex_unlock(&hive->hive_lock);
5306 amdgpu_put_xgmi_hive(hive);
5307 }
5308
5309 if (r)
5310 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5311 return r;
5312}
5313
5314struct amdgpu_recover_work_struct {
5315 struct work_struct base;
5316 struct amdgpu_device *adev;
5317 struct amdgpu_job *job;
5318 int ret;
5319};
5320
5321static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
5322{
5323 struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
5324
5325 recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
5326}
5327
5328
5329
5330int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5331 struct amdgpu_job *job)
5332{
5333 struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
5334
5335 INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
5336
5337 if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
5338 return -EAGAIN;
5339
5340 flush_work(&work.base);
5341
5342 return work.ret;
5343}
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5355{
5356 struct pci_dev *pdev;
5357 enum pci_bus_speed speed_cap, platform_speed_cap;
5358 enum pcie_link_width platform_link_width;
5359
5360 if (amdgpu_pcie_gen_cap)
5361 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5362
5363 if (amdgpu_pcie_lane_cap)
5364 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5365
5366
5367 if (pci_is_root_bus(adev->pdev->bus)) {
5368 if (adev->pm.pcie_gen_mask == 0)
5369 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5370 if (adev->pm.pcie_mlw_mask == 0)
5371 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5372 return;
5373 }
5374
5375 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5376 return;
5377
5378 pcie_bandwidth_available(adev->pdev, NULL,
5379 &platform_speed_cap, &platform_link_width);
5380
5381 if (adev->pm.pcie_gen_mask == 0) {
5382
5383 pdev = adev->pdev;
5384 speed_cap = pcie_get_speed_cap(pdev);
5385 if (speed_cap == PCI_SPEED_UNKNOWN) {
5386 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5387 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5388 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5389 } else {
5390 if (speed_cap == PCIE_SPEED_32_0GT)
5391 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5392 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5393 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5394 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5395 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5396 else if (speed_cap == PCIE_SPEED_16_0GT)
5397 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5398 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5399 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5400 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5401 else if (speed_cap == PCIE_SPEED_8_0GT)
5402 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5403 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5404 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5405 else if (speed_cap == PCIE_SPEED_5_0GT)
5406 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5407 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5408 else
5409 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5410 }
5411
5412 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5413 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5414 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5415 } else {
5416 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5417 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5418 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5419 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5420 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5421 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5422 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5423 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5424 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5425 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5426 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5427 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5428 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5429 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5430 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5431 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5432 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5433 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5434 else
5435 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5436
5437 }
5438 }
5439 if (adev->pm.pcie_mlw_mask == 0) {
5440 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5441 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5442 } else {
5443 switch (platform_link_width) {
5444 case PCIE_LNK_X32:
5445 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5446 CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5447 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5448 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5449 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5450 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5451 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5452 break;
5453 case PCIE_LNK_X16:
5454 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5455 CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5456 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5457 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5458 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5459 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5460 break;
5461 case PCIE_LNK_X12:
5462 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5463 CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5464 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5465 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5466 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5467 break;
5468 case PCIE_LNK_X8:
5469 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5470 CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5471 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5472 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5473 break;
5474 case PCIE_LNK_X4:
5475 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5476 CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5477 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5478 break;
5479 case PCIE_LNK_X2:
5480 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5481 CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5482 break;
5483 case PCIE_LNK_X1:
5484 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5485 break;
5486 default:
5487 break;
5488 }
5489 }
5490 }
5491}
5492
5493int amdgpu_device_baco_enter(struct drm_device *dev)
5494{
5495 struct amdgpu_device *adev = drm_to_adev(dev);
5496 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5497
5498 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5499 return -ENOTSUPP;
5500
5501 if (ras && adev->ras_enabled &&
5502 adev->nbio.funcs->enable_doorbell_interrupt)
5503 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5504
5505 return amdgpu_dpm_baco_enter(adev);
5506}
5507
5508int amdgpu_device_baco_exit(struct drm_device *dev)
5509{
5510 struct amdgpu_device *adev = drm_to_adev(dev);
5511 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5512 int ret = 0;
5513
5514 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5515 return -ENOTSUPP;
5516
5517 ret = amdgpu_dpm_baco_exit(adev);
5518 if (ret)
5519 return ret;
5520
5521 if (ras && adev->ras_enabled &&
5522 adev->nbio.funcs->enable_doorbell_interrupt)
5523 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5524
5525 if (amdgpu_passthrough(adev) &&
5526 adev->nbio.funcs->clear_doorbell_interrupt)
5527 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5528
5529 return 0;
5530}
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5542{
5543 struct drm_device *dev = pci_get_drvdata(pdev);
5544 struct amdgpu_device *adev = drm_to_adev(dev);
5545 int i;
5546
5547 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5548
5549 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5550 DRM_WARN("No support for XGMI hive yet...");
5551 return PCI_ERS_RESULT_DISCONNECT;
5552 }
5553
5554 adev->pci_channel_state = state;
5555
5556 switch (state) {
5557 case pci_channel_io_normal:
5558 return PCI_ERS_RESULT_CAN_RECOVER;
5559
5560 case pci_channel_io_frozen:
5561
5562
5563
5564
5565 amdgpu_device_lock_reset_domain(adev->reset_domain);
5566 amdgpu_device_set_mp1_state(adev);
5567
5568
5569
5570
5571
5572 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5573 struct amdgpu_ring *ring = adev->rings[i];
5574
5575 if (!ring || !ring->sched.thread)
5576 continue;
5577
5578 drm_sched_stop(&ring->sched, NULL);
5579 }
5580 atomic_inc(&adev->gpu_reset_counter);
5581 return PCI_ERS_RESULT_NEED_RESET;
5582 case pci_channel_io_perm_failure:
5583
5584 return PCI_ERS_RESULT_DISCONNECT;
5585 }
5586
5587 return PCI_ERS_RESULT_NEED_RESET;
5588}
5589
5590
5591
5592
5593
5594pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5595{
5596
5597 DRM_INFO("PCI error: mmio enabled callback!!\n");
5598
5599
5600
5601
5602
5603
5604
5605
5606 return PCI_ERS_RESULT_RECOVERED;
5607}
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5618{
5619 struct drm_device *dev = pci_get_drvdata(pdev);
5620 struct amdgpu_device *adev = drm_to_adev(dev);
5621 int r, i;
5622 struct amdgpu_reset_context reset_context;
5623 u32 memsize;
5624 struct list_head device_list;
5625
5626 DRM_INFO("PCI error: slot reset callback!!\n");
5627
5628 memset(&reset_context, 0, sizeof(reset_context));
5629
5630 INIT_LIST_HEAD(&device_list);
5631 list_add_tail(&adev->reset_list, &device_list);
5632
5633
5634 msleep(500);
5635
5636
5637 amdgpu_device_load_pci_state(pdev);
5638
5639
5640 for (i = 0; i < adev->usec_timeout; i++) {
5641 memsize = amdgpu_asic_get_config_memsize(adev);
5642
5643 if (memsize != 0xffffffff)
5644 break;
5645 udelay(1);
5646 }
5647 if (memsize == 0xffffffff) {
5648 r = -ETIME;
5649 goto out;
5650 }
5651
5652 reset_context.method = AMD_RESET_METHOD_NONE;
5653 reset_context.reset_req_dev = adev;
5654 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5655 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5656
5657 adev->no_hw_access = true;
5658 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5659 adev->no_hw_access = false;
5660 if (r)
5661 goto out;
5662
5663 r = amdgpu_do_asic_reset(&device_list, &reset_context);
5664
5665out:
5666 if (!r) {
5667 if (amdgpu_device_cache_pci_state(adev->pdev))
5668 pci_restore_state(adev->pdev);
5669
5670 DRM_INFO("PCIe error recovery succeeded\n");
5671 } else {
5672 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5673 amdgpu_device_unset_mp1_state(adev);
5674 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5675 }
5676
5677 return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5678}
5679
5680
5681
5682
5683
5684
5685
5686
5687void amdgpu_pci_resume(struct pci_dev *pdev)
5688{
5689 struct drm_device *dev = pci_get_drvdata(pdev);
5690 struct amdgpu_device *adev = drm_to_adev(dev);
5691 int i;
5692
5693
5694 DRM_INFO("PCI error: resume callback!!\n");
5695
5696
5697 if (adev->pci_channel_state != pci_channel_io_frozen)
5698 return;
5699
5700 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5701 struct amdgpu_ring *ring = adev->rings[i];
5702
5703 if (!ring || !ring->sched.thread)
5704 continue;
5705
5706
5707 drm_sched_resubmit_jobs(&ring->sched);
5708 drm_sched_start(&ring->sched, true);
5709 }
5710
5711 amdgpu_device_unset_mp1_state(adev);
5712 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5713}
5714
5715bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5716{
5717 struct drm_device *dev = pci_get_drvdata(pdev);
5718 struct amdgpu_device *adev = drm_to_adev(dev);
5719 int r;
5720
5721 r = pci_save_state(pdev);
5722 if (!r) {
5723 kfree(adev->pci_state);
5724
5725 adev->pci_state = pci_store_saved_state(pdev);
5726
5727 if (!adev->pci_state) {
5728 DRM_ERROR("Failed to store PCI saved state");
5729 return false;
5730 }
5731 } else {
5732 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5733 return false;
5734 }
5735
5736 return true;
5737}
5738
5739bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5740{
5741 struct drm_device *dev = pci_get_drvdata(pdev);
5742 struct amdgpu_device *adev = drm_to_adev(dev);
5743 int r;
5744
5745 if (!adev->pci_state)
5746 return false;
5747
5748 r = pci_load_saved_state(pdev, adev->pci_state);
5749
5750 if (!r) {
5751 pci_restore_state(pdev);
5752 } else {
5753 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5754 return false;
5755 }
5756
5757 return true;
5758}
5759
5760void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5761 struct amdgpu_ring *ring)
5762{
5763#ifdef CONFIG_X86_64
5764 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5765 return;
5766#endif
5767 if (adev->gmc.xgmi.connected_to_cpu)
5768 return;
5769
5770 if (ring && ring->funcs->emit_hdp_flush)
5771 amdgpu_ring_emit_hdp_flush(ring);
5772 else
5773 amdgpu_asic_flush_hdp(adev, ring);
5774}
5775
5776void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5777 struct amdgpu_ring *ring)
5778{
5779#ifdef CONFIG_X86_64
5780 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5781 return;
5782#endif
5783 if (adev->gmc.xgmi.connected_to_cpu)
5784 return;
5785
5786 amdgpu_asic_invalidate_hdp(adev, ring);
5787}
5788
5789int amdgpu_in_reset(struct amdgpu_device *adev)
5790{
5791 return atomic_read(&adev->reset_domain->in_gpu_reset);
5792 }
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814void amdgpu_device_halt(struct amdgpu_device *adev)
5815{
5816 struct pci_dev *pdev = adev->pdev;
5817 struct drm_device *ddev = adev_to_drm(adev);
5818
5819 drm_dev_unplug(ddev);
5820
5821 amdgpu_irq_disable_all(adev);
5822
5823 amdgpu_fence_driver_hw_fini(adev);
5824
5825 adev->no_hw_access = true;
5826
5827 amdgpu_device_unmap_mmio(adev);
5828
5829 pci_disable_device(pdev);
5830 pci_wait_for_pending_transaction(pdev);
5831}
5832
5833u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5834 u32 reg)
5835{
5836 unsigned long flags, address, data;
5837 u32 r;
5838
5839 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5840 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5841
5842 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5843 WREG32(address, reg * 4);
5844 (void)RREG32(address);
5845 r = RREG32(data);
5846 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5847 return r;
5848}
5849
5850void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5851 u32 reg, u32 v)
5852{
5853 unsigned long flags, address, data;
5854
5855 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5856 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5857
5858 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5859 WREG32(address, reg * 4);
5860 (void)RREG32(address);
5861 WREG32(data, v);
5862 (void)RREG32(data);
5863 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5864}
5865