1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <linux/bsearch.h>
24#include <linux/pci.h>
25#include <linux/slab.h>
26#include "kfd_priv.h"
27#include "kfd_device_queue_manager.h"
28#include "kfd_pm4_headers_vi.h"
29#include "kfd_pm4_headers_aldebaran.h"
30#include "cwsr_trap_handler.h"
31#include "kfd_iommu.h"
32#include "amdgpu_amdkfd.h"
33#include "kfd_smi_events.h"
34#include "kfd_migrate.h"
35#include "amdgpu.h"
36
37#define MQD_SIZE_ALIGNED 768
38
39
40
41
42
43
44static atomic_t kfd_locked = ATOMIC_INIT(0);
45
46#ifdef CONFIG_DRM_AMDGPU_CIK
47extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
48#endif
49extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
50extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
51extern const struct kfd2kgd_calls arcturus_kfd2kgd;
52extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
53extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
54extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
55
56#ifdef KFD_SUPPORT_IOMMU_V2
57static const struct kfd_device_info kaveri_device_info = {
58 .asic_family = CHIP_KAVERI,
59 .asic_name = "kaveri",
60 .gfx_target_version = 70000,
61 .max_pasid_bits = 16,
62
63 .max_no_of_hqd = 24,
64 .doorbell_size = 4,
65 .ih_ring_entry_size = 4 * sizeof(uint32_t),
66 .event_interrupt_class = &event_interrupt_class_cik,
67 .num_of_watch_points = 4,
68 .mqd_size_aligned = MQD_SIZE_ALIGNED,
69 .supports_cwsr = false,
70 .needs_iommu_device = true,
71 .needs_pci_atomics = false,
72 .num_sdma_engines = 2,
73 .num_xgmi_sdma_engines = 0,
74 .num_sdma_queues_per_engine = 2,
75};
76
77static const struct kfd_device_info carrizo_device_info = {
78 .asic_family = CHIP_CARRIZO,
79 .asic_name = "carrizo",
80 .gfx_target_version = 80001,
81 .max_pasid_bits = 16,
82
83 .max_no_of_hqd = 24,
84 .doorbell_size = 4,
85 .ih_ring_entry_size = 4 * sizeof(uint32_t),
86 .event_interrupt_class = &event_interrupt_class_cik,
87 .num_of_watch_points = 4,
88 .mqd_size_aligned = MQD_SIZE_ALIGNED,
89 .supports_cwsr = true,
90 .needs_iommu_device = true,
91 .needs_pci_atomics = false,
92 .num_sdma_engines = 2,
93 .num_xgmi_sdma_engines = 0,
94 .num_sdma_queues_per_engine = 2,
95};
96
97static const struct kfd_device_info raven_device_info = {
98 .asic_family = CHIP_RAVEN,
99 .asic_name = "raven",
100 .gfx_target_version = 90002,
101 .max_pasid_bits = 16,
102 .max_no_of_hqd = 24,
103 .doorbell_size = 8,
104 .ih_ring_entry_size = 8 * sizeof(uint32_t),
105 .event_interrupt_class = &event_interrupt_class_v9,
106 .num_of_watch_points = 4,
107 .mqd_size_aligned = MQD_SIZE_ALIGNED,
108 .supports_cwsr = true,
109 .needs_iommu_device = true,
110 .needs_pci_atomics = true,
111 .num_sdma_engines = 1,
112 .num_xgmi_sdma_engines = 0,
113 .num_sdma_queues_per_engine = 2,
114};
115#endif
116
117#ifdef CONFIG_DRM_AMDGPU_CIK
118static const struct kfd_device_info hawaii_device_info = {
119 .asic_family = CHIP_HAWAII,
120 .asic_name = "hawaii",
121 .gfx_target_version = 70001,
122 .max_pasid_bits = 16,
123
124 .max_no_of_hqd = 24,
125 .doorbell_size = 4,
126 .ih_ring_entry_size = 4 * sizeof(uint32_t),
127 .event_interrupt_class = &event_interrupt_class_cik,
128 .num_of_watch_points = 4,
129 .mqd_size_aligned = MQD_SIZE_ALIGNED,
130 .supports_cwsr = false,
131 .needs_iommu_device = false,
132 .needs_pci_atomics = false,
133 .num_sdma_engines = 2,
134 .num_xgmi_sdma_engines = 0,
135 .num_sdma_queues_per_engine = 2,
136};
137#endif
138
139static const struct kfd_device_info tonga_device_info = {
140 .asic_family = CHIP_TONGA,
141 .asic_name = "tonga",
142 .gfx_target_version = 80002,
143 .max_pasid_bits = 16,
144 .max_no_of_hqd = 24,
145 .doorbell_size = 4,
146 .ih_ring_entry_size = 4 * sizeof(uint32_t),
147 .event_interrupt_class = &event_interrupt_class_cik,
148 .num_of_watch_points = 4,
149 .mqd_size_aligned = MQD_SIZE_ALIGNED,
150 .supports_cwsr = false,
151 .needs_iommu_device = false,
152 .needs_pci_atomics = true,
153 .num_sdma_engines = 2,
154 .num_xgmi_sdma_engines = 0,
155 .num_sdma_queues_per_engine = 2,
156};
157
158static const struct kfd_device_info fiji_device_info = {
159 .asic_family = CHIP_FIJI,
160 .asic_name = "fiji",
161 .gfx_target_version = 80003,
162 .max_pasid_bits = 16,
163 .max_no_of_hqd = 24,
164 .doorbell_size = 4,
165 .ih_ring_entry_size = 4 * sizeof(uint32_t),
166 .event_interrupt_class = &event_interrupt_class_cik,
167 .num_of_watch_points = 4,
168 .mqd_size_aligned = MQD_SIZE_ALIGNED,
169 .supports_cwsr = true,
170 .needs_iommu_device = false,
171 .needs_pci_atomics = true,
172 .num_sdma_engines = 2,
173 .num_xgmi_sdma_engines = 0,
174 .num_sdma_queues_per_engine = 2,
175};
176
177static const struct kfd_device_info fiji_vf_device_info = {
178 .asic_family = CHIP_FIJI,
179 .asic_name = "fiji",
180 .gfx_target_version = 80003,
181 .max_pasid_bits = 16,
182 .max_no_of_hqd = 24,
183 .doorbell_size = 4,
184 .ih_ring_entry_size = 4 * sizeof(uint32_t),
185 .event_interrupt_class = &event_interrupt_class_cik,
186 .num_of_watch_points = 4,
187 .mqd_size_aligned = MQD_SIZE_ALIGNED,
188 .supports_cwsr = true,
189 .needs_iommu_device = false,
190 .needs_pci_atomics = false,
191 .num_sdma_engines = 2,
192 .num_xgmi_sdma_engines = 0,
193 .num_sdma_queues_per_engine = 2,
194};
195
196
197static const struct kfd_device_info polaris10_device_info = {
198 .asic_family = CHIP_POLARIS10,
199 .asic_name = "polaris10",
200 .gfx_target_version = 80003,
201 .max_pasid_bits = 16,
202 .max_no_of_hqd = 24,
203 .doorbell_size = 4,
204 .ih_ring_entry_size = 4 * sizeof(uint32_t),
205 .event_interrupt_class = &event_interrupt_class_cik,
206 .num_of_watch_points = 4,
207 .mqd_size_aligned = MQD_SIZE_ALIGNED,
208 .supports_cwsr = true,
209 .needs_iommu_device = false,
210 .needs_pci_atomics = true,
211 .num_sdma_engines = 2,
212 .num_xgmi_sdma_engines = 0,
213 .num_sdma_queues_per_engine = 2,
214};
215
216static const struct kfd_device_info polaris10_vf_device_info = {
217 .asic_family = CHIP_POLARIS10,
218 .asic_name = "polaris10",
219 .gfx_target_version = 80003,
220 .max_pasid_bits = 16,
221 .max_no_of_hqd = 24,
222 .doorbell_size = 4,
223 .ih_ring_entry_size = 4 * sizeof(uint32_t),
224 .event_interrupt_class = &event_interrupt_class_cik,
225 .num_of_watch_points = 4,
226 .mqd_size_aligned = MQD_SIZE_ALIGNED,
227 .supports_cwsr = true,
228 .needs_iommu_device = false,
229 .needs_pci_atomics = false,
230 .num_sdma_engines = 2,
231 .num_xgmi_sdma_engines = 0,
232 .num_sdma_queues_per_engine = 2,
233};
234
235static const struct kfd_device_info polaris11_device_info = {
236 .asic_family = CHIP_POLARIS11,
237 .asic_name = "polaris11",
238 .gfx_target_version = 80003,
239 .max_pasid_bits = 16,
240 .max_no_of_hqd = 24,
241 .doorbell_size = 4,
242 .ih_ring_entry_size = 4 * sizeof(uint32_t),
243 .event_interrupt_class = &event_interrupt_class_cik,
244 .num_of_watch_points = 4,
245 .mqd_size_aligned = MQD_SIZE_ALIGNED,
246 .supports_cwsr = true,
247 .needs_iommu_device = false,
248 .needs_pci_atomics = true,
249 .num_sdma_engines = 2,
250 .num_xgmi_sdma_engines = 0,
251 .num_sdma_queues_per_engine = 2,
252};
253
254static const struct kfd_device_info polaris12_device_info = {
255 .asic_family = CHIP_POLARIS12,
256 .asic_name = "polaris12",
257 .gfx_target_version = 80003,
258 .max_pasid_bits = 16,
259 .max_no_of_hqd = 24,
260 .doorbell_size = 4,
261 .ih_ring_entry_size = 4 * sizeof(uint32_t),
262 .event_interrupt_class = &event_interrupt_class_cik,
263 .num_of_watch_points = 4,
264 .mqd_size_aligned = MQD_SIZE_ALIGNED,
265 .supports_cwsr = true,
266 .needs_iommu_device = false,
267 .needs_pci_atomics = true,
268 .num_sdma_engines = 2,
269 .num_xgmi_sdma_engines = 0,
270 .num_sdma_queues_per_engine = 2,
271};
272
273static const struct kfd_device_info vegam_device_info = {
274 .asic_family = CHIP_VEGAM,
275 .asic_name = "vegam",
276 .gfx_target_version = 80003,
277 .max_pasid_bits = 16,
278 .max_no_of_hqd = 24,
279 .doorbell_size = 4,
280 .ih_ring_entry_size = 4 * sizeof(uint32_t),
281 .event_interrupt_class = &event_interrupt_class_cik,
282 .num_of_watch_points = 4,
283 .mqd_size_aligned = MQD_SIZE_ALIGNED,
284 .supports_cwsr = true,
285 .needs_iommu_device = false,
286 .needs_pci_atomics = true,
287 .num_sdma_engines = 2,
288 .num_xgmi_sdma_engines = 0,
289 .num_sdma_queues_per_engine = 2,
290};
291
292static const struct kfd_device_info vega10_device_info = {
293 .asic_family = CHIP_VEGA10,
294 .asic_name = "vega10",
295 .gfx_target_version = 90000,
296 .max_pasid_bits = 16,
297 .max_no_of_hqd = 24,
298 .doorbell_size = 8,
299 .ih_ring_entry_size = 8 * sizeof(uint32_t),
300 .event_interrupt_class = &event_interrupt_class_v9,
301 .num_of_watch_points = 4,
302 .mqd_size_aligned = MQD_SIZE_ALIGNED,
303 .supports_cwsr = true,
304 .needs_iommu_device = false,
305 .needs_pci_atomics = false,
306 .num_sdma_engines = 2,
307 .num_xgmi_sdma_engines = 0,
308 .num_sdma_queues_per_engine = 2,
309};
310
311static const struct kfd_device_info vega10_vf_device_info = {
312 .asic_family = CHIP_VEGA10,
313 .asic_name = "vega10",
314 .gfx_target_version = 90000,
315 .max_pasid_bits = 16,
316 .max_no_of_hqd = 24,
317 .doorbell_size = 8,
318 .ih_ring_entry_size = 8 * sizeof(uint32_t),
319 .event_interrupt_class = &event_interrupt_class_v9,
320 .num_of_watch_points = 4,
321 .mqd_size_aligned = MQD_SIZE_ALIGNED,
322 .supports_cwsr = true,
323 .needs_iommu_device = false,
324 .needs_pci_atomics = false,
325 .num_sdma_engines = 2,
326 .num_xgmi_sdma_engines = 0,
327 .num_sdma_queues_per_engine = 2,
328};
329
330static const struct kfd_device_info vega12_device_info = {
331 .asic_family = CHIP_VEGA12,
332 .asic_name = "vega12",
333 .gfx_target_version = 90004,
334 .max_pasid_bits = 16,
335 .max_no_of_hqd = 24,
336 .doorbell_size = 8,
337 .ih_ring_entry_size = 8 * sizeof(uint32_t),
338 .event_interrupt_class = &event_interrupt_class_v9,
339 .num_of_watch_points = 4,
340 .mqd_size_aligned = MQD_SIZE_ALIGNED,
341 .supports_cwsr = true,
342 .needs_iommu_device = false,
343 .needs_pci_atomics = false,
344 .num_sdma_engines = 2,
345 .num_xgmi_sdma_engines = 0,
346 .num_sdma_queues_per_engine = 2,
347};
348
349static const struct kfd_device_info vega20_device_info = {
350 .asic_family = CHIP_VEGA20,
351 .asic_name = "vega20",
352 .gfx_target_version = 90006,
353 .max_pasid_bits = 16,
354 .max_no_of_hqd = 24,
355 .doorbell_size = 8,
356 .ih_ring_entry_size = 8 * sizeof(uint32_t),
357 .event_interrupt_class = &event_interrupt_class_v9,
358 .num_of_watch_points = 4,
359 .mqd_size_aligned = MQD_SIZE_ALIGNED,
360 .supports_cwsr = true,
361 .needs_iommu_device = false,
362 .needs_pci_atomics = false,
363 .num_sdma_engines = 2,
364 .num_xgmi_sdma_engines = 0,
365 .num_sdma_queues_per_engine = 8,
366};
367
368static const struct kfd_device_info arcturus_device_info = {
369 .asic_family = CHIP_ARCTURUS,
370 .asic_name = "arcturus",
371 .gfx_target_version = 90008,
372 .max_pasid_bits = 16,
373 .max_no_of_hqd = 24,
374 .doorbell_size = 8,
375 .ih_ring_entry_size = 8 * sizeof(uint32_t),
376 .event_interrupt_class = &event_interrupt_class_v9,
377 .num_of_watch_points = 4,
378 .mqd_size_aligned = MQD_SIZE_ALIGNED,
379 .supports_cwsr = true,
380 .needs_iommu_device = false,
381 .needs_pci_atomics = false,
382 .num_sdma_engines = 2,
383 .num_xgmi_sdma_engines = 6,
384 .num_sdma_queues_per_engine = 8,
385};
386
387static const struct kfd_device_info aldebaran_device_info = {
388 .asic_family = CHIP_ALDEBARAN,
389 .asic_name = "aldebaran",
390 .gfx_target_version = 90010,
391 .max_pasid_bits = 16,
392 .max_no_of_hqd = 24,
393 .doorbell_size = 8,
394 .ih_ring_entry_size = 8 * sizeof(uint32_t),
395 .event_interrupt_class = &event_interrupt_class_v9,
396 .num_of_watch_points = 4,
397 .mqd_size_aligned = MQD_SIZE_ALIGNED,
398 .supports_cwsr = true,
399 .needs_iommu_device = false,
400 .needs_pci_atomics = false,
401 .num_sdma_engines = 2,
402 .num_xgmi_sdma_engines = 3,
403 .num_sdma_queues_per_engine = 8,
404};
405
406static const struct kfd_device_info renoir_device_info = {
407 .asic_family = CHIP_RENOIR,
408 .asic_name = "renoir",
409 .gfx_target_version = 90012,
410 .max_pasid_bits = 16,
411 .max_no_of_hqd = 24,
412 .doorbell_size = 8,
413 .ih_ring_entry_size = 8 * sizeof(uint32_t),
414 .event_interrupt_class = &event_interrupt_class_v9,
415 .num_of_watch_points = 4,
416 .mqd_size_aligned = MQD_SIZE_ALIGNED,
417 .supports_cwsr = true,
418 .needs_iommu_device = false,
419 .needs_pci_atomics = false,
420 .num_sdma_engines = 1,
421 .num_xgmi_sdma_engines = 0,
422 .num_sdma_queues_per_engine = 2,
423};
424
425static const struct kfd_device_info navi10_device_info = {
426 .asic_family = CHIP_NAVI10,
427 .asic_name = "navi10",
428 .gfx_target_version = 100100,
429 .max_pasid_bits = 16,
430 .max_no_of_hqd = 24,
431 .doorbell_size = 8,
432 .ih_ring_entry_size = 8 * sizeof(uint32_t),
433 .event_interrupt_class = &event_interrupt_class_v9,
434 .num_of_watch_points = 4,
435 .mqd_size_aligned = MQD_SIZE_ALIGNED,
436 .needs_iommu_device = false,
437 .supports_cwsr = true,
438 .needs_pci_atomics = true,
439 .no_atomic_fw_version = 145,
440 .num_sdma_engines = 2,
441 .num_xgmi_sdma_engines = 0,
442 .num_sdma_queues_per_engine = 8,
443};
444
445static const struct kfd_device_info navi12_device_info = {
446 .asic_family = CHIP_NAVI12,
447 .asic_name = "navi12",
448 .gfx_target_version = 100101,
449 .max_pasid_bits = 16,
450 .max_no_of_hqd = 24,
451 .doorbell_size = 8,
452 .ih_ring_entry_size = 8 * sizeof(uint32_t),
453 .event_interrupt_class = &event_interrupt_class_v9,
454 .num_of_watch_points = 4,
455 .mqd_size_aligned = MQD_SIZE_ALIGNED,
456 .needs_iommu_device = false,
457 .supports_cwsr = true,
458 .needs_pci_atomics = true,
459 .no_atomic_fw_version = 145,
460 .num_sdma_engines = 2,
461 .num_xgmi_sdma_engines = 0,
462 .num_sdma_queues_per_engine = 8,
463};
464
465static const struct kfd_device_info navi14_device_info = {
466 .asic_family = CHIP_NAVI14,
467 .asic_name = "navi14",
468 .gfx_target_version = 100102,
469 .max_pasid_bits = 16,
470 .max_no_of_hqd = 24,
471 .doorbell_size = 8,
472 .ih_ring_entry_size = 8 * sizeof(uint32_t),
473 .event_interrupt_class = &event_interrupt_class_v9,
474 .num_of_watch_points = 4,
475 .mqd_size_aligned = MQD_SIZE_ALIGNED,
476 .needs_iommu_device = false,
477 .supports_cwsr = true,
478 .needs_pci_atomics = true,
479 .no_atomic_fw_version = 145,
480 .num_sdma_engines = 2,
481 .num_xgmi_sdma_engines = 0,
482 .num_sdma_queues_per_engine = 8,
483};
484
485static const struct kfd_device_info sienna_cichlid_device_info = {
486 .asic_family = CHIP_SIENNA_CICHLID,
487 .asic_name = "sienna_cichlid",
488 .gfx_target_version = 100300,
489 .max_pasid_bits = 16,
490 .max_no_of_hqd = 24,
491 .doorbell_size = 8,
492 .ih_ring_entry_size = 8 * sizeof(uint32_t),
493 .event_interrupt_class = &event_interrupt_class_v9,
494 .num_of_watch_points = 4,
495 .mqd_size_aligned = MQD_SIZE_ALIGNED,
496 .needs_iommu_device = false,
497 .supports_cwsr = true,
498 .needs_pci_atomics = true,
499 .no_atomic_fw_version = 92,
500 .num_sdma_engines = 4,
501 .num_xgmi_sdma_engines = 0,
502 .num_sdma_queues_per_engine = 8,
503};
504
505static const struct kfd_device_info navy_flounder_device_info = {
506 .asic_family = CHIP_NAVY_FLOUNDER,
507 .asic_name = "navy_flounder",
508 .gfx_target_version = 100301,
509 .max_pasid_bits = 16,
510 .max_no_of_hqd = 24,
511 .doorbell_size = 8,
512 .ih_ring_entry_size = 8 * sizeof(uint32_t),
513 .event_interrupt_class = &event_interrupt_class_v9,
514 .num_of_watch_points = 4,
515 .mqd_size_aligned = MQD_SIZE_ALIGNED,
516 .needs_iommu_device = false,
517 .supports_cwsr = true,
518 .needs_pci_atomics = true,
519 .no_atomic_fw_version = 92,
520 .num_sdma_engines = 2,
521 .num_xgmi_sdma_engines = 0,
522 .num_sdma_queues_per_engine = 8,
523};
524
525static const struct kfd_device_info vangogh_device_info = {
526 .asic_family = CHIP_VANGOGH,
527 .asic_name = "vangogh",
528 .gfx_target_version = 100303,
529 .max_pasid_bits = 16,
530 .max_no_of_hqd = 24,
531 .doorbell_size = 8,
532 .ih_ring_entry_size = 8 * sizeof(uint32_t),
533 .event_interrupt_class = &event_interrupt_class_v9,
534 .num_of_watch_points = 4,
535 .mqd_size_aligned = MQD_SIZE_ALIGNED,
536 .needs_iommu_device = false,
537 .supports_cwsr = true,
538 .needs_pci_atomics = true,
539 .no_atomic_fw_version = 92,
540 .num_sdma_engines = 1,
541 .num_xgmi_sdma_engines = 0,
542 .num_sdma_queues_per_engine = 2,
543};
544
545static const struct kfd_device_info dimgrey_cavefish_device_info = {
546 .asic_family = CHIP_DIMGREY_CAVEFISH,
547 .asic_name = "dimgrey_cavefish",
548 .gfx_target_version = 100302,
549 .max_pasid_bits = 16,
550 .max_no_of_hqd = 24,
551 .doorbell_size = 8,
552 .ih_ring_entry_size = 8 * sizeof(uint32_t),
553 .event_interrupt_class = &event_interrupt_class_v9,
554 .num_of_watch_points = 4,
555 .mqd_size_aligned = MQD_SIZE_ALIGNED,
556 .needs_iommu_device = false,
557 .supports_cwsr = true,
558 .needs_pci_atomics = true,
559 .no_atomic_fw_version = 92,
560 .num_sdma_engines = 2,
561 .num_xgmi_sdma_engines = 0,
562 .num_sdma_queues_per_engine = 8,
563};
564
565static const struct kfd_device_info beige_goby_device_info = {
566 .asic_family = CHIP_BEIGE_GOBY,
567 .asic_name = "beige_goby",
568 .gfx_target_version = 100304,
569 .max_pasid_bits = 16,
570 .max_no_of_hqd = 24,
571 .doorbell_size = 8,
572 .ih_ring_entry_size = 8 * sizeof(uint32_t),
573 .event_interrupt_class = &event_interrupt_class_v9,
574 .num_of_watch_points = 4,
575 .mqd_size_aligned = MQD_SIZE_ALIGNED,
576 .needs_iommu_device = false,
577 .supports_cwsr = true,
578 .needs_pci_atomics = true,
579 .no_atomic_fw_version = 92,
580 .num_sdma_engines = 1,
581 .num_xgmi_sdma_engines = 0,
582 .num_sdma_queues_per_engine = 8,
583};
584
585static const struct kfd_device_info yellow_carp_device_info = {
586 .asic_family = CHIP_YELLOW_CARP,
587 .asic_name = "yellow_carp",
588 .gfx_target_version = 100305,
589 .max_pasid_bits = 16,
590 .max_no_of_hqd = 24,
591 .doorbell_size = 8,
592 .ih_ring_entry_size = 8 * sizeof(uint32_t),
593 .event_interrupt_class = &event_interrupt_class_v9,
594 .num_of_watch_points = 4,
595 .mqd_size_aligned = MQD_SIZE_ALIGNED,
596 .needs_iommu_device = false,
597 .supports_cwsr = true,
598 .needs_pci_atomics = true,
599 .no_atomic_fw_version = 92,
600 .num_sdma_engines = 1,
601 .num_xgmi_sdma_engines = 0,
602 .num_sdma_queues_per_engine = 2,
603};
604
605static const struct kfd_device_info cyan_skillfish_device_info = {
606 .asic_family = CHIP_CYAN_SKILLFISH,
607 .asic_name = "cyan_skillfish",
608 .gfx_target_version = 100103,
609 .max_pasid_bits = 16,
610 .max_no_of_hqd = 24,
611 .doorbell_size = 8,
612 .ih_ring_entry_size = 8 * sizeof(uint32_t),
613 .event_interrupt_class = &event_interrupt_class_v9,
614 .num_of_watch_points = 4,
615 .mqd_size_aligned = MQD_SIZE_ALIGNED,
616 .needs_iommu_device = false,
617 .supports_cwsr = true,
618 .needs_pci_atomics = true,
619 .num_sdma_engines = 2,
620 .num_xgmi_sdma_engines = 0,
621 .num_sdma_queues_per_engine = 8,
622};
623
624static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
625 unsigned int chunk_size);
626static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
627
628static int kfd_resume(struct kfd_dev *kfd);
629
630struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
631{
632 struct kfd_dev *kfd;
633 const struct kfd_device_info *device_info;
634 const struct kfd2kgd_calls *f2g;
635 struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
636 struct pci_dev *pdev = adev->pdev;
637
638 switch (adev->asic_type) {
639#ifdef KFD_SUPPORT_IOMMU_V2
640#ifdef CONFIG_DRM_AMDGPU_CIK
641 case CHIP_KAVERI:
642 if (vf)
643 device_info = NULL;
644 else
645 device_info = &kaveri_device_info;
646 f2g = &gfx_v7_kfd2kgd;
647 break;
648#endif
649 case CHIP_CARRIZO:
650 if (vf)
651 device_info = NULL;
652 else
653 device_info = &carrizo_device_info;
654 f2g = &gfx_v8_kfd2kgd;
655 break;
656#endif
657#ifdef CONFIG_DRM_AMDGPU_CIK
658 case CHIP_HAWAII:
659 if (vf)
660 device_info = NULL;
661 else
662 device_info = &hawaii_device_info;
663 f2g = &gfx_v7_kfd2kgd;
664 break;
665#endif
666 case CHIP_TONGA:
667 if (vf)
668 device_info = NULL;
669 else
670 device_info = &tonga_device_info;
671 f2g = &gfx_v8_kfd2kgd;
672 break;
673 case CHIP_FIJI:
674 if (vf)
675 device_info = &fiji_vf_device_info;
676 else
677 device_info = &fiji_device_info;
678 f2g = &gfx_v8_kfd2kgd;
679 break;
680 case CHIP_POLARIS10:
681 if (vf)
682 device_info = &polaris10_vf_device_info;
683 else
684 device_info = &polaris10_device_info;
685 f2g = &gfx_v8_kfd2kgd;
686 break;
687 case CHIP_POLARIS11:
688 if (vf)
689 device_info = NULL;
690 else
691 device_info = &polaris11_device_info;
692 f2g = &gfx_v8_kfd2kgd;
693 break;
694 case CHIP_POLARIS12:
695 if (vf)
696 device_info = NULL;
697 else
698 device_info = &polaris12_device_info;
699 f2g = &gfx_v8_kfd2kgd;
700 break;
701 case CHIP_VEGAM:
702 if (vf)
703 device_info = NULL;
704 else
705 device_info = &vegam_device_info;
706 f2g = &gfx_v8_kfd2kgd;
707 break;
708 default:
709 switch (adev->ip_versions[GC_HWIP][0]) {
710 case IP_VERSION(9, 0, 1):
711 if (vf)
712 device_info = &vega10_vf_device_info;
713 else
714 device_info = &vega10_device_info;
715 f2g = &gfx_v9_kfd2kgd;
716 break;
717#ifdef KFD_SUPPORT_IOMMU_V2
718 case IP_VERSION(9, 1, 0):
719 case IP_VERSION(9, 2, 2):
720 if (vf)
721 device_info = NULL;
722 else
723 device_info = &raven_device_info;
724 f2g = &gfx_v9_kfd2kgd;
725 break;
726#endif
727 case IP_VERSION(9, 2, 1):
728 if (vf)
729 device_info = NULL;
730 else
731 device_info = &vega12_device_info;
732 f2g = &gfx_v9_kfd2kgd;
733 break;
734 case IP_VERSION(9, 3, 0):
735 if (vf)
736 device_info = NULL;
737 else
738 device_info = &renoir_device_info;
739 f2g = &gfx_v9_kfd2kgd;
740 break;
741 case IP_VERSION(9, 4, 0):
742 if (vf)
743 device_info = NULL;
744 else
745 device_info = &vega20_device_info;
746 f2g = &gfx_v9_kfd2kgd;
747 break;
748 case IP_VERSION(9, 4, 1):
749 device_info = &arcturus_device_info;
750 f2g = &arcturus_kfd2kgd;
751 break;
752 case IP_VERSION(9, 4, 2):
753 device_info = &aldebaran_device_info;
754 f2g = &aldebaran_kfd2kgd;
755 break;
756 case IP_VERSION(10, 1, 10):
757 if (vf)
758 device_info = NULL;
759 else
760 device_info = &navi10_device_info;
761 f2g = &gfx_v10_kfd2kgd;
762 break;
763 case IP_VERSION(10, 1, 2):
764 device_info = &navi12_device_info;
765 f2g = &gfx_v10_kfd2kgd;
766 break;
767 case IP_VERSION(10, 1, 1):
768 if (vf)
769 device_info = NULL;
770 else
771 device_info = &navi14_device_info;
772 f2g = &gfx_v10_kfd2kgd;
773 break;
774 case IP_VERSION(10, 1, 3):
775 if (vf)
776 device_info = NULL;
777 else
778 device_info = &cyan_skillfish_device_info;
779 f2g = &gfx_v10_kfd2kgd;
780 break;
781 case IP_VERSION(10, 3, 0):
782 device_info = &sienna_cichlid_device_info;
783 f2g = &gfx_v10_3_kfd2kgd;
784 break;
785 case IP_VERSION(10, 3, 2):
786 device_info = &navy_flounder_device_info;
787 f2g = &gfx_v10_3_kfd2kgd;
788 break;
789 case IP_VERSION(10, 3, 1):
790 if (vf)
791 device_info = NULL;
792 else
793 device_info = &vangogh_device_info;
794 f2g = &gfx_v10_3_kfd2kgd;
795 break;
796 case IP_VERSION(10, 3, 4):
797 device_info = &dimgrey_cavefish_device_info;
798 f2g = &gfx_v10_3_kfd2kgd;
799 break;
800 case IP_VERSION(10, 3, 5):
801 device_info = &beige_goby_device_info;
802 f2g = &gfx_v10_3_kfd2kgd;
803 break;
804 case IP_VERSION(10, 3, 3):
805 if (vf)
806 device_info = NULL;
807 else
808 device_info = &yellow_carp_device_info;
809 f2g = &gfx_v10_3_kfd2kgd;
810 break;
811 default:
812 return NULL;
813 }
814 break;
815 }
816
817 if (!device_info || !f2g) {
818 dev_err(kfd_device, "%s %s not supported in kfd\n",
819 amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
820 return NULL;
821 }
822
823 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
824 if (!kfd)
825 return NULL;
826
827 kfd->kgd = kgd;
828 kfd->device_info = device_info;
829 kfd->pdev = pdev;
830 kfd->init_complete = false;
831 kfd->kfd2kgd = f2g;
832 atomic_set(&kfd->compute_profile, 0);
833
834 mutex_init(&kfd->doorbell_mutex);
835 memset(&kfd->doorbell_available_index, 0,
836 sizeof(kfd->doorbell_available_index));
837
838 atomic_set(&kfd->sram_ecc_flag, 0);
839
840 ida_init(&kfd->doorbell_ida);
841
842 return kfd;
843}
844
845static void kfd_cwsr_init(struct kfd_dev *kfd)
846{
847 if (cwsr_enable && kfd->device_info->supports_cwsr) {
848 if (kfd->device_info->asic_family < CHIP_VEGA10) {
849 BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
850 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
851 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
852 } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) {
853 BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
854 kfd->cwsr_isa = cwsr_trap_arcturus_hex;
855 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
856 } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) {
857 BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
858 kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
859 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
860 } else if (kfd->device_info->asic_family < CHIP_NAVI10) {
861 BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
862 kfd->cwsr_isa = cwsr_trap_gfx9_hex;
863 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
864 } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) {
865 BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
866 kfd->cwsr_isa = cwsr_trap_nv1x_hex;
867 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
868 } else {
869 BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
870 kfd->cwsr_isa = cwsr_trap_gfx10_hex;
871 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
872 }
873
874 kfd->cwsr_enabled = true;
875 }
876}
877
878static int kfd_gws_init(struct kfd_dev *kfd)
879{
880 int ret = 0;
881
882 if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
883 return 0;
884
885 if (hws_gws_support
886 || (kfd->device_info->asic_family == CHIP_VEGA10
887 && kfd->mec2_fw_version >= 0x81b3)
888 || (kfd->device_info->asic_family >= CHIP_VEGA12
889 && kfd->device_info->asic_family <= CHIP_RAVEN
890 && kfd->mec2_fw_version >= 0x1b3)
891 || (kfd->device_info->asic_family == CHIP_ARCTURUS
892 && kfd->mec2_fw_version >= 0x30)
893 || (kfd->device_info->asic_family == CHIP_ALDEBARAN
894 && kfd->mec2_fw_version >= 0x28))
895 ret = amdgpu_amdkfd_alloc_gws(kfd->kgd,
896 amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws);
897
898 return ret;
899}
900
901static void kfd_smi_init(struct kfd_dev *dev) {
902 INIT_LIST_HEAD(&dev->smi_clients);
903 spin_lock_init(&dev->smi_lock);
904}
905
906bool kgd2kfd_device_init(struct kfd_dev *kfd,
907 struct drm_device *ddev,
908 const struct kgd2kfd_shared_resources *gpu_resources)
909{
910 unsigned int size, map_process_packet_size;
911
912 kfd->ddev = ddev;
913 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
914 KGD_ENGINE_MEC1);
915 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
916 KGD_ENGINE_MEC2);
917 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd,
918 KGD_ENGINE_SDMA1);
919 kfd->shared_resources = *gpu_resources;
920
921 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
922 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
923 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
924 - kfd->vm_info.first_vmid_kfd + 1;
925
926
927
928
929
930 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd);
931 if (!kfd->pci_atomic_requested &&
932 kfd->device_info->needs_pci_atomics &&
933 (!kfd->device_info->no_atomic_fw_version ||
934 kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
935 dev_info(kfd_device,
936 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
937 kfd->pdev->vendor, kfd->pdev->device,
938 kfd->mec_fw_version,
939 kfd->device_info->no_atomic_fw_version);
940 return false;
941 }
942
943
944 if ((hws_max_conc_proc < 0)
945 || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
946 dev_err(kfd_device,
947 "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
948 hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
949 kfd->vm_info.vmid_num_kfd);
950 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
951 } else
952 kfd->max_proc_per_quantum = hws_max_conc_proc;
953
954
955 size = max_num_of_queues_per_device *
956 kfd->device_info->mqd_size_aligned;
957
958
959
960
961
962 map_process_packet_size =
963 kfd->device_info->asic_family == CHIP_ALDEBARAN ?
964 sizeof(struct pm4_mes_map_process_aldebaran) :
965 sizeof(struct pm4_mes_map_process);
966 size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
967 max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
968 + sizeof(struct pm4_mes_runlist)) * 2;
969
970
971 size += KFD_KERNEL_QUEUE_SIZE * 2;
972
973
974 size += 512 * 1024;
975
976 if (amdgpu_amdkfd_alloc_gtt_mem(
977 kfd->kgd, size, &kfd->gtt_mem,
978 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
979 false)) {
980 dev_err(kfd_device, "Could not allocate %d bytes\n", size);
981 goto alloc_gtt_mem_failure;
982 }
983
984 dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
985
986
987 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
988 dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
989 goto kfd_gtt_sa_init_error;
990 }
991
992 if (kfd_doorbell_init(kfd)) {
993 dev_err(kfd_device,
994 "Error initializing doorbell aperture\n");
995 goto kfd_doorbell_error;
996 }
997
998 kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd);
999
1000 kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd);
1001
1002 if (kfd_interrupt_init(kfd)) {
1003 dev_err(kfd_device, "Error initializing interrupts\n");
1004 goto kfd_interrupt_error;
1005 }
1006
1007 kfd->dqm = device_queue_manager_init(kfd);
1008 if (!kfd->dqm) {
1009 dev_err(kfd_device, "Error initializing queue manager\n");
1010 goto device_queue_manager_error;
1011 }
1012
1013
1014
1015
1016 if (kfd_gws_init(kfd)) {
1017 dev_err(kfd_device, "Could not allocate %d gws\n",
1018 amdgpu_amdkfd_get_num_gws(kfd->kgd));
1019 goto gws_error;
1020 }
1021
1022
1023 kfd_double_confirm_iommu_support(kfd);
1024
1025 if (kfd_iommu_device_init(kfd)) {
1026 kfd->use_iommu_v2 = false;
1027 dev_err(kfd_device, "Error initializing iommuv2\n");
1028 goto device_iommu_error;
1029 }
1030
1031 kfd_cwsr_init(kfd);
1032
1033 svm_migrate_init((struct amdgpu_device *)kfd->kgd);
1034
1035 if(kgd2kfd_resume_iommu(kfd))
1036 goto device_iommu_error;
1037
1038 if (kfd_resume(kfd))
1039 goto kfd_resume_error;
1040
1041 kfd->dbgmgr = NULL;
1042
1043 if (kfd_topology_add_device(kfd)) {
1044 dev_err(kfd_device, "Error adding device to topology\n");
1045 goto kfd_topology_add_device_error;
1046 }
1047
1048 kfd_smi_init(kfd);
1049
1050 kfd->init_complete = true;
1051 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
1052 kfd->pdev->device);
1053
1054 pr_debug("Starting kfd with the following scheduling policy %d\n",
1055 kfd->dqm->sched_policy);
1056
1057 goto out;
1058
1059kfd_topology_add_device_error:
1060kfd_resume_error:
1061device_iommu_error:
1062gws_error:
1063 device_queue_manager_uninit(kfd->dqm);
1064device_queue_manager_error:
1065 kfd_interrupt_exit(kfd);
1066kfd_interrupt_error:
1067 kfd_doorbell_fini(kfd);
1068kfd_doorbell_error:
1069 kfd_gtt_sa_fini(kfd);
1070kfd_gtt_sa_init_error:
1071 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
1072alloc_gtt_mem_failure:
1073 if (kfd->gws)
1074 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
1075 dev_err(kfd_device,
1076 "device %x:%x NOT added due to errors\n",
1077 kfd->pdev->vendor, kfd->pdev->device);
1078out:
1079 return kfd->init_complete;
1080}
1081
1082void kgd2kfd_device_exit(struct kfd_dev *kfd)
1083{
1084 if (kfd->init_complete) {
1085 device_queue_manager_uninit(kfd->dqm);
1086 kfd_interrupt_exit(kfd);
1087 kfd_topology_remove_device(kfd);
1088 kfd_doorbell_fini(kfd);
1089 ida_destroy(&kfd->doorbell_ida);
1090 kfd_gtt_sa_fini(kfd);
1091 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
1092 if (kfd->gws)
1093 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
1094 }
1095
1096 kfree(kfd);
1097}
1098
1099int kgd2kfd_pre_reset(struct kfd_dev *kfd)
1100{
1101 if (!kfd->init_complete)
1102 return 0;
1103
1104 kfd_smi_event_update_gpu_reset(kfd, false);
1105
1106 kfd->dqm->ops.pre_reset(kfd->dqm);
1107
1108 kgd2kfd_suspend(kfd, false);
1109
1110 kfd_signal_reset_event(kfd);
1111 return 0;
1112}
1113
1114
1115
1116
1117
1118
1119
1120int kgd2kfd_post_reset(struct kfd_dev *kfd)
1121{
1122 int ret;
1123
1124 if (!kfd->init_complete)
1125 return 0;
1126
1127 ret = kfd_resume(kfd);
1128 if (ret)
1129 return ret;
1130 atomic_dec(&kfd_locked);
1131
1132 atomic_set(&kfd->sram_ecc_flag, 0);
1133
1134 kfd_smi_event_update_gpu_reset(kfd, true);
1135
1136 return 0;
1137}
1138
1139bool kfd_is_locked(void)
1140{
1141 return (atomic_read(&kfd_locked) > 0);
1142}
1143
1144void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
1145{
1146 if (!kfd->init_complete)
1147 return;
1148
1149
1150 if (!run_pm) {
1151
1152 if (atomic_inc_return(&kfd_locked) == 1)
1153 kfd_suspend_all_processes();
1154 }
1155
1156 kfd->dqm->ops.stop(kfd->dqm);
1157 kfd_iommu_suspend(kfd);
1158}
1159
1160int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
1161{
1162 int ret, count;
1163
1164 if (!kfd->init_complete)
1165 return 0;
1166
1167 ret = kfd_resume(kfd);
1168 if (ret)
1169 return ret;
1170
1171
1172 if (!run_pm) {
1173 count = atomic_dec_return(&kfd_locked);
1174 WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
1175 if (count == 0)
1176 ret = kfd_resume_all_processes();
1177 }
1178
1179 return ret;
1180}
1181
1182int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
1183{
1184 int err = 0;
1185
1186 err = kfd_iommu_resume(kfd);
1187 if (err)
1188 dev_err(kfd_device,
1189 "Failed to resume IOMMU for device %x:%x\n",
1190 kfd->pdev->vendor, kfd->pdev->device);
1191 return err;
1192}
1193
1194static int kfd_resume(struct kfd_dev *kfd)
1195{
1196 int err = 0;
1197
1198 err = kfd->dqm->ops.start(kfd->dqm);
1199 if (err)
1200 dev_err(kfd_device,
1201 "Error starting queue manager for device %x:%x\n",
1202 kfd->pdev->vendor, kfd->pdev->device);
1203
1204 return err;
1205}
1206
1207static inline void kfd_queue_work(struct workqueue_struct *wq,
1208 struct work_struct *work)
1209{
1210 int cpu, new_cpu;
1211
1212 cpu = new_cpu = smp_processor_id();
1213 do {
1214 new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
1215 if (cpu_to_node(new_cpu) == numa_node_id())
1216 break;
1217 } while (cpu != new_cpu);
1218
1219 queue_work_on(new_cpu, wq, work);
1220}
1221
1222
1223void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1224{
1225 uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
1226 bool is_patched = false;
1227 unsigned long flags;
1228
1229 if (!kfd->init_complete)
1230 return;
1231
1232 if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
1233 dev_err_once(kfd_device, "Ring entry too small\n");
1234 return;
1235 }
1236
1237 spin_lock_irqsave(&kfd->interrupt_lock, flags);
1238
1239 if (kfd->interrupts_active
1240 && interrupt_is_wanted(kfd, ih_ring_entry,
1241 patched_ihre, &is_patched)
1242 && enqueue_ih_ring_entry(kfd,
1243 is_patched ? patched_ihre : ih_ring_entry))
1244 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
1245
1246 spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
1247}
1248
1249int kgd2kfd_quiesce_mm(struct mm_struct *mm)
1250{
1251 struct kfd_process *p;
1252 int r;
1253
1254
1255
1256
1257
1258 p = kfd_lookup_process_by_mm(mm);
1259 if (!p)
1260 return -ESRCH;
1261
1262 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
1263 r = kfd_process_evict_queues(p);
1264
1265 kfd_unref_process(p);
1266 return r;
1267}
1268
1269int kgd2kfd_resume_mm(struct mm_struct *mm)
1270{
1271 struct kfd_process *p;
1272 int r;
1273
1274
1275
1276
1277
1278 p = kfd_lookup_process_by_mm(mm);
1279 if (!p)
1280 return -ESRCH;
1281
1282 r = kfd_process_restore_queues(p);
1283
1284 kfd_unref_process(p);
1285 return r;
1286}
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
1297 struct dma_fence *fence)
1298{
1299 struct kfd_process *p;
1300 unsigned long active_time;
1301 unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
1302
1303 if (!fence)
1304 return -EINVAL;
1305
1306 if (dma_fence_is_signaled(fence))
1307 return 0;
1308
1309 p = kfd_lookup_process_by_mm(mm);
1310 if (!p)
1311 return -ENODEV;
1312
1313 if (fence->seqno == p->last_eviction_seqno)
1314 goto out;
1315
1316 p->last_eviction_seqno = fence->seqno;
1317
1318
1319
1320
1321 active_time = get_jiffies_64() - p->last_restore_timestamp;
1322 if (delay_jiffies > active_time)
1323 delay_jiffies -= active_time;
1324 else
1325 delay_jiffies = 0;
1326
1327
1328
1329
1330 WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
1331 p->lead_thread->pid, delay_jiffies);
1332 schedule_delayed_work(&p->eviction_work, delay_jiffies);
1333out:
1334 kfd_unref_process(p);
1335 return 0;
1336}
1337
1338static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1339 unsigned int chunk_size)
1340{
1341 unsigned int num_of_longs;
1342
1343 if (WARN_ON(buf_size < chunk_size))
1344 return -EINVAL;
1345 if (WARN_ON(buf_size == 0))
1346 return -EINVAL;
1347 if (WARN_ON(chunk_size == 0))
1348 return -EINVAL;
1349
1350 kfd->gtt_sa_chunk_size = chunk_size;
1351 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1352
1353 num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
1354 BITS_PER_LONG;
1355
1356 kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
1357
1358 if (!kfd->gtt_sa_bitmap)
1359 return -ENOMEM;
1360
1361 pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
1362 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1363
1364 mutex_init(&kfd->gtt_sa_lock);
1365
1366 return 0;
1367
1368}
1369
1370static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1371{
1372 mutex_destroy(&kfd->gtt_sa_lock);
1373 kfree(kfd->gtt_sa_bitmap);
1374}
1375
1376static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
1377 unsigned int bit_num,
1378 unsigned int chunk_size)
1379{
1380 return start_addr + bit_num * chunk_size;
1381}
1382
1383static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
1384 unsigned int bit_num,
1385 unsigned int chunk_size)
1386{
1387 return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
1388}
1389
1390int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1391 struct kfd_mem_obj **mem_obj)
1392{
1393 unsigned int found, start_search, cur_size;
1394
1395 if (size == 0)
1396 return -EINVAL;
1397
1398 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1399 return -ENOMEM;
1400
1401 *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
1402 if (!(*mem_obj))
1403 return -ENOMEM;
1404
1405 pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
1406
1407 start_search = 0;
1408
1409 mutex_lock(&kfd->gtt_sa_lock);
1410
1411kfd_gtt_restart_search:
1412
1413 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1414 kfd->gtt_sa_num_of_chunks,
1415 start_search);
1416
1417 pr_debug("Found = %d\n", found);
1418
1419
1420 if (found == kfd->gtt_sa_num_of_chunks)
1421 goto kfd_gtt_no_free_chunk;
1422
1423
1424 (*mem_obj)->range_start = found;
1425 (*mem_obj)->range_end = found;
1426 (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1427 kfd->gtt_start_gpu_addr,
1428 found,
1429 kfd->gtt_sa_chunk_size);
1430 (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1431 kfd->gtt_start_cpu_ptr,
1432 found,
1433 kfd->gtt_sa_chunk_size);
1434
1435 pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1436 (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1437
1438
1439 if (size <= kfd->gtt_sa_chunk_size) {
1440 pr_debug("Single bit\n");
1441 set_bit(found, kfd->gtt_sa_bitmap);
1442 goto kfd_gtt_out;
1443 }
1444
1445
1446 cur_size = size - kfd->gtt_sa_chunk_size;
1447 do {
1448 (*mem_obj)->range_end =
1449 find_next_zero_bit(kfd->gtt_sa_bitmap,
1450 kfd->gtt_sa_num_of_chunks, ++found);
1451
1452
1453
1454
1455
1456 if ((*mem_obj)->range_end != found) {
1457 start_search = found;
1458 goto kfd_gtt_restart_search;
1459 }
1460
1461
1462
1463
1464 if (found == kfd->gtt_sa_num_of_chunks)
1465 goto kfd_gtt_no_free_chunk;
1466
1467
1468 if (cur_size <= kfd->gtt_sa_chunk_size)
1469 cur_size = 0;
1470 else
1471 cur_size -= kfd->gtt_sa_chunk_size;
1472
1473 } while (cur_size > 0);
1474
1475 pr_debug("range_start = %d, range_end = %d\n",
1476 (*mem_obj)->range_start, (*mem_obj)->range_end);
1477
1478
1479 for (found = (*mem_obj)->range_start;
1480 found <= (*mem_obj)->range_end;
1481 found++)
1482 set_bit(found, kfd->gtt_sa_bitmap);
1483
1484kfd_gtt_out:
1485 mutex_unlock(&kfd->gtt_sa_lock);
1486 return 0;
1487
1488kfd_gtt_no_free_chunk:
1489 pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
1490 mutex_unlock(&kfd->gtt_sa_lock);
1491 kfree(*mem_obj);
1492 return -ENOMEM;
1493}
1494
1495int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1496{
1497 unsigned int bit;
1498
1499
1500 if (!mem_obj)
1501 return 0;
1502
1503 pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1504 mem_obj, mem_obj->range_start, mem_obj->range_end);
1505
1506 mutex_lock(&kfd->gtt_sa_lock);
1507
1508
1509 for (bit = mem_obj->range_start;
1510 bit <= mem_obj->range_end;
1511 bit++)
1512 clear_bit(bit, kfd->gtt_sa_bitmap);
1513
1514 mutex_unlock(&kfd->gtt_sa_lock);
1515
1516 kfree(mem_obj);
1517 return 0;
1518}
1519
1520void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1521{
1522 if (kfd)
1523 atomic_inc(&kfd->sram_ecc_flag);
1524}
1525
1526void kfd_inc_compute_active(struct kfd_dev *kfd)
1527{
1528 if (atomic_inc_return(&kfd->compute_profile) == 1)
1529 amdgpu_amdkfd_set_compute_idle(kfd->kgd, false);
1530}
1531
1532void kfd_dec_compute_active(struct kfd_dev *kfd)
1533{
1534 int count = atomic_dec_return(&kfd->compute_profile);
1535
1536 if (count == 0)
1537 amdgpu_amdkfd_set_compute_idle(kfd->kgd, true);
1538 WARN_ONCE(count < 0, "Compute profile ref. count error");
1539}
1540
1541void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1542{
1543 if (kfd && kfd->init_complete)
1544 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
1545}
1546
1547#if defined(CONFIG_DEBUG_FS)
1548
1549
1550
1551
1552int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1553{
1554 if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1555 pr_err("HWS is not enabled");
1556 return -EINVAL;
1557 }
1558
1559 return dqm_debugfs_hang_hws(dev->dqm);
1560}
1561
1562#endif
1563